Merge pull request #556 from Subv/dma_engine
GPU: Partially implemented the Maxwell DMA engine.
This commit is contained in:
commit
019d7208c8
|
@ -9,6 +9,8 @@ add_library(video_core STATIC
|
||||||
engines/maxwell_3d.h
|
engines/maxwell_3d.h
|
||||||
engines/maxwell_compute.cpp
|
engines/maxwell_compute.cpp
|
||||||
engines/maxwell_compute.h
|
engines/maxwell_compute.h
|
||||||
|
engines/maxwell_dma.cpp
|
||||||
|
engines/maxwell_dma.h
|
||||||
engines/shader_bytecode.h
|
engines/shader_bytecode.h
|
||||||
gpu.cpp
|
gpu.cpp
|
||||||
gpu.h
|
gpu.h
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#include "video_core/engines/fermi_2d.h"
|
#include "video_core/engines/fermi_2d.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/engines/maxwell_compute.h"
|
#include "video_core/engines/maxwell_compute.h"
|
||||||
|
#include "video_core/engines/maxwell_dma.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
#include "video_core/video_core.h"
|
#include "video_core/video_core.h"
|
||||||
|
@ -60,8 +61,11 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params)
|
||||||
case EngineID::MAXWELL_COMPUTE_B:
|
case EngineID::MAXWELL_COMPUTE_B:
|
||||||
maxwell_compute->WriteReg(method, value);
|
maxwell_compute->WriteReg(method, value);
|
||||||
break;
|
break;
|
||||||
|
case EngineID::MAXWELL_DMA_COPY_A:
|
||||||
|
maxwell_dma->WriteReg(method, value);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED_MSG("Unimplemented engine");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -47,6 +47,7 @@ void Fermi2D::HandleSurfaceCopy() {
|
||||||
|
|
||||||
if (regs.src.linear == regs.dst.linear) {
|
if (regs.src.linear == regs.dst.linear) {
|
||||||
// If the input layout and the output layout are the same, just perform a raw copy.
|
// If the input layout and the output layout are the same, just perform a raw copy.
|
||||||
|
ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
|
||||||
Memory::CopyBlock(dest_cpu, source_cpu,
|
Memory::CopyBlock(dest_cpu, source_cpu,
|
||||||
src_bytes_per_pixel * regs.dst.width * regs.dst.height);
|
src_bytes_per_pixel * regs.dst.width * regs.dst.height);
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -0,0 +1,69 @@
|
||||||
|
// Copyright 2018 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "core/memory.h"
|
||||||
|
#include "video_core/engines/maxwell_dma.h"
|
||||||
|
#include "video_core/textures/decoders.h"
|
||||||
|
|
||||||
|
namespace Tegra {
|
||||||
|
namespace Engines {
|
||||||
|
|
||||||
|
MaxwellDMA::MaxwellDMA(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
|
||||||
|
|
||||||
|
void MaxwellDMA::WriteReg(u32 method, u32 value) {
|
||||||
|
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||||
|
"Invalid MaxwellDMA register, increase the size of the Regs structure");
|
||||||
|
|
||||||
|
regs.reg_array[method] = value;
|
||||||
|
|
||||||
|
#define MAXWELLDMA_REG_INDEX(field_name) \
|
||||||
|
(offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
|
||||||
|
|
||||||
|
switch (method) {
|
||||||
|
case MAXWELLDMA_REG_INDEX(exec): {
|
||||||
|
HandleCopy();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef MAXWELLDMA_REG_INDEX
|
||||||
|
}
|
||||||
|
|
||||||
|
void MaxwellDMA::HandleCopy() {
|
||||||
|
NGLOG_WARNING(HW_GPU, "Requested a DMA copy");
|
||||||
|
|
||||||
|
const GPUVAddr source = regs.src_address.Address();
|
||||||
|
const GPUVAddr dest = regs.dst_address.Address();
|
||||||
|
|
||||||
|
const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
|
||||||
|
const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
|
||||||
|
|
||||||
|
// TODO(Subv): Perform more research and implement all features of this engine.
|
||||||
|
ASSERT(regs.exec.enable_swizzle == 0);
|
||||||
|
ASSERT(regs.exec.enable_2d == 1);
|
||||||
|
ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
|
||||||
|
ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
|
||||||
|
ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
|
||||||
|
ASSERT(regs.src_params.pos_x == 0);
|
||||||
|
ASSERT(regs.src_params.pos_y == 0);
|
||||||
|
ASSERT(regs.dst_params.pos_x == 0);
|
||||||
|
ASSERT(regs.dst_params.pos_y == 0);
|
||||||
|
ASSERT(regs.exec.is_dst_linear != regs.exec.is_src_linear);
|
||||||
|
|
||||||
|
u8* src_buffer = Memory::GetPointer(source_cpu);
|
||||||
|
u8* dst_buffer = Memory::GetPointer(dest_cpu);
|
||||||
|
|
||||||
|
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
|
||||||
|
// If the input is tiled and the output is linear, deswizzle the input and copy it over.
|
||||||
|
Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y, 1, 1, src_buffer,
|
||||||
|
dst_buffer, true, regs.src_params.BlockHeight());
|
||||||
|
} else {
|
||||||
|
// If the input is linear and the output is tiled, swizzle the input and copy it over.
|
||||||
|
Texture::CopySwizzledData(regs.dst_params.size_x, regs.dst_params.size_y, 1, 1, dst_buffer,
|
||||||
|
src_buffer, false, regs.dst_params.BlockHeight());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Engines
|
||||||
|
} // namespace Tegra
|
|
@ -0,0 +1,155 @@
|
||||||
|
// Copyright 2018 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/bit_field.h"
|
||||||
|
#include "common/common_funcs.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/memory_manager.h"
|
||||||
|
|
||||||
|
namespace Tegra {
|
||||||
|
namespace Engines {
|
||||||
|
|
||||||
|
class MaxwellDMA final {
|
||||||
|
public:
|
||||||
|
explicit MaxwellDMA(MemoryManager& memory_manager);
|
||||||
|
~MaxwellDMA() = default;
|
||||||
|
|
||||||
|
/// Write the value to the register identified by method.
|
||||||
|
void WriteReg(u32 method, u32 value);
|
||||||
|
|
||||||
|
struct Regs {
|
||||||
|
static constexpr size_t NUM_REGS = 0x1D6;
|
||||||
|
|
||||||
|
struct Parameters {
|
||||||
|
union {
|
||||||
|
BitField<0, 4, u32> block_depth;
|
||||||
|
BitField<4, 4, u32> block_height;
|
||||||
|
BitField<8, 4, u32> block_width;
|
||||||
|
};
|
||||||
|
u32 size_x;
|
||||||
|
u32 size_y;
|
||||||
|
u32 size_z;
|
||||||
|
u32 pos_z;
|
||||||
|
union {
|
||||||
|
BitField<0, 16, u32> pos_x;
|
||||||
|
BitField<16, 16, u32> pos_y;
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 BlockHeight() const {
|
||||||
|
return 1 << block_height;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
|
||||||
|
|
||||||
|
enum class CopyMode : u32 {
|
||||||
|
None = 0,
|
||||||
|
Unk1 = 1,
|
||||||
|
Unk2 = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class QueryMode : u32 {
|
||||||
|
None = 0,
|
||||||
|
Short = 1,
|
||||||
|
Long = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class QueryIntr : u32 {
|
||||||
|
None = 0,
|
||||||
|
Block = 1,
|
||||||
|
NonBlock = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
INSERT_PADDING_WORDS(0xC0);
|
||||||
|
|
||||||
|
struct {
|
||||||
|
union {
|
||||||
|
BitField<0, 2, CopyMode> copy_mode;
|
||||||
|
BitField<2, 1, u32> flush;
|
||||||
|
|
||||||
|
BitField<3, 2, QueryMode> query_mode;
|
||||||
|
BitField<5, 2, QueryIntr> query_intr;
|
||||||
|
|
||||||
|
BitField<7, 1, u32> is_src_linear;
|
||||||
|
BitField<8, 1, u32> is_dst_linear;
|
||||||
|
|
||||||
|
BitField<9, 1, u32> enable_2d;
|
||||||
|
BitField<10, 1, u32> enable_swizzle;
|
||||||
|
};
|
||||||
|
} exec;
|
||||||
|
|
||||||
|
INSERT_PADDING_WORDS(0x3F);
|
||||||
|
|
||||||
|
struct {
|
||||||
|
u32 address_high;
|
||||||
|
u32 address_low;
|
||||||
|
|
||||||
|
GPUVAddr Address() const {
|
||||||
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||||
|
address_low);
|
||||||
|
}
|
||||||
|
} src_address;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
u32 address_high;
|
||||||
|
u32 address_low;
|
||||||
|
|
||||||
|
GPUVAddr Address() const {
|
||||||
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||||
|
address_low);
|
||||||
|
}
|
||||||
|
} dst_address;
|
||||||
|
|
||||||
|
u32 src_pitch;
|
||||||
|
u32 dst_pitch;
|
||||||
|
u32 x_count;
|
||||||
|
u32 y_count;
|
||||||
|
|
||||||
|
INSERT_PADDING_WORDS(0xBB);
|
||||||
|
|
||||||
|
Parameters dst_params;
|
||||||
|
|
||||||
|
INSERT_PADDING_WORDS(1);
|
||||||
|
|
||||||
|
Parameters src_params;
|
||||||
|
|
||||||
|
INSERT_PADDING_WORDS(0x13);
|
||||||
|
};
|
||||||
|
std::array<u32, NUM_REGS> reg_array;
|
||||||
|
};
|
||||||
|
} regs{};
|
||||||
|
|
||||||
|
MemoryManager& memory_manager;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// Performs the copy from the source buffer to the destination buffer as configured in the
|
||||||
|
/// registers.
|
||||||
|
void HandleCopy();
|
||||||
|
};
|
||||||
|
|
||||||
|
#define ASSERT_REG_POSITION(field_name, position) \
|
||||||
|
static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \
|
||||||
|
"Field " #field_name " has invalid position")
|
||||||
|
|
||||||
|
ASSERT_REG_POSITION(exec, 0xC0);
|
||||||
|
ASSERT_REG_POSITION(src_address, 0x100);
|
||||||
|
ASSERT_REG_POSITION(dst_address, 0x102);
|
||||||
|
ASSERT_REG_POSITION(src_pitch, 0x104);
|
||||||
|
ASSERT_REG_POSITION(dst_pitch, 0x105);
|
||||||
|
ASSERT_REG_POSITION(x_count, 0x106);
|
||||||
|
ASSERT_REG_POSITION(y_count, 0x107);
|
||||||
|
ASSERT_REG_POSITION(dst_params, 0x1C3);
|
||||||
|
ASSERT_REG_POSITION(src_params, 0x1CA);
|
||||||
|
|
||||||
|
#undef ASSERT_REG_POSITION
|
||||||
|
|
||||||
|
} // namespace Engines
|
||||||
|
} // namespace Tegra
|
|
@ -5,6 +5,7 @@
|
||||||
#include "video_core/engines/fermi_2d.h"
|
#include "video_core/engines/fermi_2d.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/engines/maxwell_compute.h"
|
#include "video_core/engines/maxwell_compute.h"
|
||||||
|
#include "video_core/engines/maxwell_dma.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
@ -14,6 +15,7 @@ GPU::GPU() {
|
||||||
maxwell_3d = std::make_unique<Engines::Maxwell3D>(*memory_manager);
|
maxwell_3d = std::make_unique<Engines::Maxwell3D>(*memory_manager);
|
||||||
fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
|
fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
|
||||||
maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
|
maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
|
||||||
|
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(*memory_manager);
|
||||||
}
|
}
|
||||||
|
|
||||||
GPU::~GPU() = default;
|
GPU::~GPU() = default;
|
||||||
|
|
|
@ -63,6 +63,7 @@ namespace Engines {
|
||||||
class Fermi2D;
|
class Fermi2D;
|
||||||
class Maxwell3D;
|
class Maxwell3D;
|
||||||
class MaxwellCompute;
|
class MaxwellCompute;
|
||||||
|
class MaxwellDMA;
|
||||||
} // namespace Engines
|
} // namespace Engines
|
||||||
|
|
||||||
enum class EngineID {
|
enum class EngineID {
|
||||||
|
@ -103,6 +104,8 @@ private:
|
||||||
std::unique_ptr<Engines::Fermi2D> fermi_2d;
|
std::unique_ptr<Engines::Fermi2D> fermi_2d;
|
||||||
/// Compute engine
|
/// Compute engine
|
||||||
std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
|
std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
|
||||||
|
/// DMA engine
|
||||||
|
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
||||||
|
|
Reference in New Issue