From b87422a86f4dd3b59ef91c3ce37945865a6cfbef Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 27 Apr 2020 22:07:21 -0400
Subject: [PATCH] VideoCore/GPU: Delegate subchannel engines to the dma pusher.

---
 src/video_core/dma_pusher.cpp | 20 +++++++++++++++++---
 src/video_core/dma_pusher.h   | 11 +++++++++++
 src/video_core/gpu.cpp        | 22 +++++++++++++++++++++-
 3 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 16311f05e..bdc023d54 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -27,6 +27,8 @@ void DmaPusher::DispatchCalls() {
 
     dma_pushbuffer_subindex = 0;
 
+    dma_state.is_last_call = true;
+
     while (system.IsPoweredOn()) {
         if (!Step()) {
             break;
@@ -82,9 +84,11 @@ bool DmaPusher::Step() {
                     index);
                 CallMultiMethod(&command_header.argument, max_write);
                 dma_state.method_count -= max_write;
+                dma_state.is_last_call = true;
                 index += max_write;
                 continue;
             } else {
+                dma_state.is_last_call = dma_state.method_count <= 1;
                 CallMethod(command_header.argument);
             }
 
@@ -144,12 +148,22 @@ void DmaPusher::SetState(const CommandHeader& command_header) {
 }
 
 void DmaPusher::CallMethod(u32 argument) const {
-    gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count});
+    if (dma_state.method < non_puller_methods) {
+        gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count});
+    } else {
+        subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument,
+                                                      dma_state.is_last_call);
+    }
 }
 
 void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
-    gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
-                        dma_state.method_count);
+    if (dma_state.method < non_puller_methods) {
+        gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
+                            dma_state.method_count);
+    } else {
+        subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
+                                                           num_methods, dma_state.method_count);
+    }
 }
 
 } // namespace Tegra
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 6cef71306..e8b714e94 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -4,11 +4,13 @@
 
 #pragma once
 
+#include <array>
 #include <vector>
 #include <queue>
 
 #include "common/bit_field.h"
 #include "common/common_types.h"
+#include "video_core/engines/engine_interface.h"
 
 namespace Core {
 class System;
@@ -69,7 +71,13 @@ public:
 
     void DispatchCalls();
 
+    void BindSubchannel(Tegra::Engines::EngineInterface* engine, u32 subchannel_id) {
+        subchannels[subchannel_id] = engine;
+    }
+
 private:
+    static constexpr u32 non_puller_methods = 0x40;
+    static constexpr u32 max_subchannels = 8;
     bool Step();
 
     void SetState(const CommandHeader& command_header);
@@ -88,6 +96,7 @@ private:
         u32 method_count;      ///< Current method count
         u32 length_pending;    ///< Large NI command length pending
         bool non_incrementing; ///< Current command's NI flag
+        bool is_last_call;
     };
 
     DmaState dma_state{};
@@ -96,6 +105,8 @@ private:
     GPUVAddr dma_mget{};  ///< main pushbuffer last read address
     bool ib_enable{true}; ///< IB mode enabled
 
+    std::array<Tegra::Engines::EngineInterface*, max_subchannels> subchannels{};
+
     GPU& gpu;
     Core::System& system;
 };
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 4868437c1..f10d69fd5 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -347,7 +347,27 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
     // Bind the current subchannel to the desired engine id.
     LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
               method_call.argument);
-    bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
+    auto engine_id = static_cast<EngineID>(method_call.argument);
+    bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
+    switch (engine_id) {
+    case EngineID::FERMI_TWOD_A:
+        dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
+        break;
+    case EngineID::MAXWELL_B:
+        dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
+        break;
+    case EngineID::KEPLER_COMPUTE_B:
+        dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
+        break;
+    case EngineID::MAXWELL_DMA_COPY_A:
+        dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
+        break;
+    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+        dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented engine");
+    }
 }
 
 void GPU::ProcessSemaphoreTriggerMethod() {