Skip to content

Commit

Permalink
Add optional driver workaround to RenderingDevice for Adreno 6XX.
Browse files Browse the repository at this point in the history
Co-authored-by: Clay John <claynjohn@gmail.com>
  • Loading branch information
DarioSamo and clayjohn committed May 9, 2024
1 parent c4279fe commit 94aef4b
Show file tree
Hide file tree
Showing 8 changed files with 181 additions and 36 deletions.
1 change: 1 addition & 0 deletions drivers/d3d12/rendering_context_driver_d3d12.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ Error RenderingContextDriverD3D12::_initialize_devices() {
Device &device = driver_devices[i];
device.name = desc.Description;
device.vendor = Vendor(desc.VendorId);
device.workarounds = Workarounds();

if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
device.type = DEVICE_TYPE_CPU;
Expand Down
27 changes: 27 additions & 0 deletions drivers/vulkan/rendering_context_driver_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,9 @@ Error RenderingContextDriverVulkan::_initialize_devices() {
driver_device.name = String::utf8(props.deviceName);
driver_device.vendor = Vendor(props.vendorID);
driver_device.type = DeviceType(props.deviceType);
driver_device.workarounds = Workarounds();

_check_driver_workarounds(props, driver_device);

uint32_t queue_family_properties_count = 0;
vkGetPhysicalDeviceQueueFamilyProperties(physical_devices[i], &queue_family_properties_count, nullptr);
Expand All @@ -515,6 +518,30 @@ Error RenderingContextDriverVulkan::_initialize_devices() {
return OK;
}

void RenderingContextDriverVulkan::_check_driver_workarounds(const VkPhysicalDeviceProperties &p_device_properties, Device &r_device) {
// Workaround for the Adreno 6XX family of devices.
//
// There's a known issue with the Vulkan driver in this family of devices where it'll crash if a dynamic state for drawing is
// used in a command buffer before a dispatch call is issued. As both dynamic scissor and viewport are basic requirements for
// the engine to not bake this state into the PSO, the only known way to fix this issue is to reset the command buffer entirely.
//
// As the render graph has no built in limitations of whether it'll issue compute work before anything needs to draw on the
// frame, and there's no guarantee that compute work will never be dependent on rasterization in the future, this workaround
// will end recording on the current command buffer any time a compute list is encountered after a draw list was executed.
// A new command buffer will be created afterwards and the appropriate synchronization primitives will be inserted.
//
// Executing this workaround has the added cost of synchronization between all the command buffers that are created as well as
// all the individual submissions. This performance hit is accepted for the sake of being able to support these devices without
// limiting the design of the renderer.
//
// This bug was fixed in driver version 512.503.0, so we only enabled it on devices older than this.
//
r_device.workarounds.avoid_compute_after_draw =
r_device.vendor == VENDOR_QUALCOMM &&
p_device_properties.driverVersion < VK_MAKE_VERSION(512, 503, 0) &&
r_device.name.find("Turnip") < 0;
}

bool RenderingContextDriverVulkan::_use_validation_layers() const {
return Engine::get_singleton()->is_validation_layers_enabled();
}
Expand Down
1 change: 1 addition & 0 deletions drivers/vulkan/rendering_context_driver_vulkan.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class RenderingContextDriverVulkan : public RenderingContextDriver {
Error _initialize_instance_extensions();
Error _initialize_instance();
Error _initialize_devices();
void _check_driver_workarounds(const VkPhysicalDeviceProperties &p_device_properties, Device &r_device);

// Static callbacks.
static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_messenger_callback(VkDebugUtilsMessageSeverityFlagBitsEXT p_message_severity, VkDebugUtilsMessageTypeFlagsEXT p_message_type, const VkDebugUtilsMessengerCallbackDataEXT *p_callback_data, void *p_user_data);
Expand Down
5 changes: 5 additions & 0 deletions servers/rendering/rendering_context_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,15 @@ class RenderingContextDriver {
DEVICE_TYPE_MAX = 0x5
};

struct Workarounds {
bool avoid_compute_after_draw = false;
};

struct Device {
String name = "Unknown";
Vendor vendor = VENDOR_UNKNOWN;
DeviceType type = DEVICE_TYPE_OTHER;
Workarounds workarounds;
};

virtual ~RenderingContextDriver();
Expand Down
75 changes: 68 additions & 7 deletions servers/rendering/rendering_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4877,25 +4877,78 @@ void RenderingDevice::_end_frame() {
ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work).");
}

draw_graph.end(frames[frame].draw_command_buffer, RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS);
driver->command_buffer_end(frames[frame].setup_command_buffer);
driver->command_buffer_end(frames[frame].draw_command_buffer);

// The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use.
RDD::CommandBufferID command_buffer = frames[frame].draw_command_buffer;
draw_graph.end(RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS, command_buffer, frames[frame].command_buffer_pool);
driver->command_buffer_end(command_buffer);
driver->end_segment();
}

void RenderingDevice::_execute_frame(bool p_present) {
// Check whether this frame should present the swap chains and in which queue.
const bool frame_can_present = p_present && !frames[frame].swap_chains_to_present.is_empty();
const bool separate_present_queue = main_queue != present_queue;
const VectorView<RDD::SemaphoreID> execute_draw_semaphore = frame_can_present && separate_present_queue ? frames[frame].draw_semaphore : VectorView<RDD::SemaphoreID>();
const VectorView<RDD::SwapChainID> execute_draw_swap_chains = frame_can_present && !separate_present_queue ? frames[frame].swap_chains_to_present : VectorView<RDD::SwapChainID>();
thread_local LocalVector<RDD::SwapChainID> swap_chains;
swap_chains.clear();

// Execute the setup command buffer.
driver->command_queue_execute_and_present(main_queue, {}, frames[frame].setup_command_buffer, frames[frame].setup_semaphore, {}, {});
driver->command_queue_execute_and_present(main_queue, frames[frame].setup_semaphore, frames[frame].draw_command_buffer, execute_draw_semaphore, frames[frame].draw_fence, execute_draw_swap_chains);

// Execute command buffers and use semaphores to wait on the execution of the previous one. Normally there's only one command buffer,
// but driver workarounds can force situations where there'll be more.
uint32_t command_buffer_count = 1;
RDG::CommandBufferPool &buffer_pool = frames[frame].command_buffer_pool;
if (buffer_pool.buffers_used > 0) {
command_buffer_count += buffer_pool.buffers_used;
buffer_pool.buffers_used = 0;
}

RDD::SemaphoreID wait_semaphore = frames[frame].setup_semaphore;
for (uint32_t i = 0; i < command_buffer_count; i++) {
RDD::CommandBufferID command_buffer;
RDD::SemaphoreID signal_semaphore;
RDD::FenceID signal_fence;
if (i > 0) {
command_buffer = buffer_pool.buffers[i - 1];
signal_semaphore = buffer_pool.semaphores[i - 1];
} else {
command_buffer = frames[frame].draw_command_buffer;
signal_semaphore = frames[frame].draw_semaphore;
}

bool signal_semaphore_valid;
if (i == (command_buffer_count - 1)) {
// This is the last command buffer, it should signal the fence.
signal_fence = frames[frame].draw_fence;
signal_semaphore_valid = false;

if (frame_can_present && separate_present_queue) {
// The semaphore is required if the frame can be presented and a separate present queue is used.
signal_semaphore_valid = true;
} else if (frame_can_present) {
// Just present the swap chains as part of the last command execution.
swap_chains = frames[frame].swap_chains_to_present;
}
} else {
// Semaphores always need to be signaled if it's not the last command buffer.
signal_semaphore_valid = true;
}

driver->command_queue_execute_and_present(main_queue, wait_semaphore, command_buffer, signal_semaphore_valid ? signal_semaphore : VectorView<RDD::SemaphoreID>(), signal_fence, swap_chains);

// Make the next command buffer wait on the semaphore signaled by this one.
wait_semaphore = signal_semaphore;
}

// Indicate the fence has been signaled so the next time the frame's contents need to be used, the CPU needs to wait on the work to be completed.
frames[frame].draw_fence_signaled = true;

if (frame_can_present) {
if (separate_present_queue) {
// Issue the presentation separately if the presentation queue is different from the main queue.
driver->command_queue_execute_and_present(present_queue, frames[frame].draw_semaphore, {}, {}, {}, frames[frame].swap_chains_to_present);
driver->command_queue_execute_and_present(present_queue, wait_semaphore, {}, {}, {}, frames[frame].swap_chains_to_present);
}

frames[frame].swap_chains_to_present.clear();
Expand Down Expand Up @@ -5044,6 +5097,9 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ
frames[i].timestamp_cpu_result_values.resize(max_timestamp_query_elements);
frames[i].timestamp_result_values.resize(max_timestamp_query_elements);
frames[i].timestamp_result_count = 0;

// Assign the main queue family and command pool to the command buffer pool.
frames[i].command_buffer_pool.pool = frames[i].command_pool;
}

// Start from frame count, so everything else is immediately old.
Expand All @@ -5055,7 +5111,7 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ
driver->command_buffer_begin(frames[0].draw_command_buffer);

// Create draw graph and start it initialized as well.
draw_graph.initialize(driver, frames.size(), main_queue_family, SECONDARY_COMMAND_BUFFERS_PER_FRAME);
draw_graph.initialize(driver, device, frames.size(), main_queue_family, SECONDARY_COMMAND_BUFFERS_PER_FRAME);
draw_graph.begin();

for (uint32_t i = 0; i < frames.size(); i++) {
Expand Down Expand Up @@ -5388,6 +5444,11 @@ void RenderingDevice::finalize() {
driver->semaphore_free(frames[i].setup_semaphore);
driver->semaphore_free(frames[i].draw_semaphore);
driver->fence_free(frames[i].draw_fence);

RDG::CommandBufferPool &buffer_pool = frames[i].command_buffer_pool;
for (uint32_t j = 0; j < buffer_pool.buffers.size(); j++) {
driver->semaphore_free(buffer_pool.semaphores[j]);
}
}

if (pipeline_cache_enabled) {
Expand Down
3 changes: 3 additions & 0 deletions servers/rendering/rendering_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -1261,6 +1261,9 @@ class RenderingDevice : public RenderingDeviceCommons {
// Swap chains prepared for drawing during the frame that must be presented.
LocalVector<RDD::SwapChainID> swap_chains_to_present;

// Extra command buffer pool used for driver workarounds.
RDG::CommandBufferPool command_buffer_pool;

struct Timestamp {
String description;
uint64_t value = 0;
Expand Down

0 comments on commit 94aef4b

Please sign in to comment.