Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add optional driver workaround to RenderingDevice for Adreno 6XX. #91514

Merged
merged 1 commit into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions drivers/d3d12/rendering_context_driver_d3d12.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ Error RenderingContextDriverD3D12::_initialize_devices() {
Device &device = driver_devices[i];
device.name = desc.Description;
device.vendor = Vendor(desc.VendorId);
device.workarounds = Workarounds();

if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
device.type = DEVICE_TYPE_CPU;
Expand Down
28 changes: 28 additions & 0 deletions drivers/vulkan/rendering_context_driver_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,9 @@ Error RenderingContextDriverVulkan::_initialize_devices() {
driver_device.name = String::utf8(props.deviceName);
driver_device.vendor = Vendor(props.vendorID);
driver_device.type = DeviceType(props.deviceType);
driver_device.workarounds = Workarounds();

_check_driver_workarounds(props, driver_device);

uint32_t queue_family_properties_count = 0;
vkGetPhysicalDeviceQueueFamilyProperties(physical_devices[i], &queue_family_properties_count, nullptr);
Expand All @@ -515,6 +518,31 @@ Error RenderingContextDriverVulkan::_initialize_devices() {
return OK;
}

void RenderingContextDriverVulkan::_check_driver_workarounds(const VkPhysicalDeviceProperties &p_device_properties, Device &r_device) {
// Workaround for the Adreno 6XX family of devices.
//
// There's a known issue with the Vulkan driver in this family of devices where it'll crash if a dynamic state for drawing is
// used in a command buffer before a dispatch call is issued. As both dynamic scissor and viewport are basic requirements for
// the engine to not bake this state into the PSO, the only known way to fix this issue is to reset the command buffer entirely.
//
// As the render graph has no built in limitations of whether it'll issue compute work before anything needs to draw on the
// frame, and there's no guarantee that compute work will never be dependent on rasterization in the future, this workaround
// will end recording on the current command buffer any time a compute list is encountered after a draw list was executed.
// A new command buffer will be created afterwards and the appropriate synchronization primitives will be inserted.
//
// Executing this workaround has the added cost of synchronization between all the command buffers that are created as well as
// all the individual submissions. This performance hit is accepted for the sake of being able to support these devices without
// limiting the design of the renderer.
//
// This bug was fixed in driver version 512.503.0, so we only enabled it on devices older than this.
//
r_device.workarounds.avoid_compute_after_draw =
r_device.vendor == VENDOR_QUALCOMM &&
p_device_properties.deviceID >= 0x6000000 && // Adreno 6xx
p_device_properties.driverVersion < VK_MAKE_VERSION(512, 503, 0) &&
r_device.name.find("Turnip") < 0;
}

bool RenderingContextDriverVulkan::_use_validation_layers() const {
return Engine::get_singleton()->is_validation_layers_enabled();
}
Expand Down
1 change: 1 addition & 0 deletions drivers/vulkan/rendering_context_driver_vulkan.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class RenderingContextDriverVulkan : public RenderingContextDriver {
Error _initialize_instance_extensions();
Error _initialize_instance();
Error _initialize_devices();
void _check_driver_workarounds(const VkPhysicalDeviceProperties &p_device_properties, Device &r_device);

// Static callbacks.
static VKAPI_ATTR VkBool32 VKAPI_CALL _debug_messenger_callback(VkDebugUtilsMessageSeverityFlagBitsEXT p_message_severity, VkDebugUtilsMessageTypeFlagsEXT p_message_type, const VkDebugUtilsMessengerCallbackDataEXT *p_callback_data, void *p_user_data);
Expand Down
5 changes: 5 additions & 0 deletions servers/rendering/rendering_context_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,15 @@ class RenderingContextDriver {
DEVICE_TYPE_MAX = 0x5
};

struct Workarounds {
bool avoid_compute_after_draw = false;
};

struct Device {
String name = "Unknown";
Vendor vendor = VENDOR_UNKNOWN;
DeviceType type = DEVICE_TYPE_OTHER;
Workarounds workarounds;
};

virtual ~RenderingContextDriver();
Expand Down
75 changes: 68 additions & 7 deletions servers/rendering/rendering_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4877,25 +4877,78 @@ void RenderingDevice::_end_frame() {
ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work).");
}

draw_graph.end(frames[frame].draw_command_buffer, RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS);
driver->command_buffer_end(frames[frame].setup_command_buffer);
driver->command_buffer_end(frames[frame].draw_command_buffer);

// The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use.
RDD::CommandBufferID command_buffer = frames[frame].draw_command_buffer;
draw_graph.end(RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS, command_buffer, frames[frame].command_buffer_pool);
driver->command_buffer_end(command_buffer);
driver->end_segment();
}

void RenderingDevice::_execute_frame(bool p_present) {
// Check whether this frame should present the swap chains and in which queue.
const bool frame_can_present = p_present && !frames[frame].swap_chains_to_present.is_empty();
const bool separate_present_queue = main_queue != present_queue;
const VectorView<RDD::SemaphoreID> execute_draw_semaphore = frame_can_present && separate_present_queue ? frames[frame].draw_semaphore : VectorView<RDD::SemaphoreID>();
const VectorView<RDD::SwapChainID> execute_draw_swap_chains = frame_can_present && !separate_present_queue ? frames[frame].swap_chains_to_present : VectorView<RDD::SwapChainID>();
thread_local LocalVector<RDD::SwapChainID> swap_chains;
swap_chains.clear();

// Execute the setup command buffer.
driver->command_queue_execute_and_present(main_queue, {}, frames[frame].setup_command_buffer, frames[frame].setup_semaphore, {}, {});
driver->command_queue_execute_and_present(main_queue, frames[frame].setup_semaphore, frames[frame].draw_command_buffer, execute_draw_semaphore, frames[frame].draw_fence, execute_draw_swap_chains);

// Execute command buffers and use semaphores to wait on the execution of the previous one. Normally there's only one command buffer,
// but driver workarounds can force situations where there'll be more.
uint32_t command_buffer_count = 1;
RDG::CommandBufferPool &buffer_pool = frames[frame].command_buffer_pool;
if (buffer_pool.buffers_used > 0) {
command_buffer_count += buffer_pool.buffers_used;
buffer_pool.buffers_used = 0;
}

RDD::SemaphoreID wait_semaphore = frames[frame].setup_semaphore;
for (uint32_t i = 0; i < command_buffer_count; i++) {
RDD::CommandBufferID command_buffer;
RDD::SemaphoreID signal_semaphore;
RDD::FenceID signal_fence;
if (i > 0) {
command_buffer = buffer_pool.buffers[i - 1];
signal_semaphore = buffer_pool.semaphores[i - 1];
} else {
command_buffer = frames[frame].draw_command_buffer;
signal_semaphore = frames[frame].draw_semaphore;
}

bool signal_semaphore_valid;
if (i == (command_buffer_count - 1)) {
// This is the last command buffer, it should signal the fence.
signal_fence = frames[frame].draw_fence;
signal_semaphore_valid = false;

if (frame_can_present && separate_present_queue) {
// The semaphore is required if the frame can be presented and a separate present queue is used.
signal_semaphore_valid = true;
} else if (frame_can_present) {
// Just present the swap chains as part of the last command execution.
swap_chains = frames[frame].swap_chains_to_present;
}
} else {
// Semaphores always need to be signaled if it's not the last command buffer.
signal_semaphore_valid = true;
}

driver->command_queue_execute_and_present(main_queue, wait_semaphore, command_buffer, signal_semaphore_valid ? signal_semaphore : VectorView<RDD::SemaphoreID>(), signal_fence, swap_chains);

// Make the next command buffer wait on the semaphore signaled by this one.
wait_semaphore = signal_semaphore;
}

// Indicate the fence has been signaled so the next time the frame's contents need to be used, the CPU needs to wait on the work to be completed.
frames[frame].draw_fence_signaled = true;

if (frame_can_present) {
if (separate_present_queue) {
// Issue the presentation separately if the presentation queue is different from the main queue.
driver->command_queue_execute_and_present(present_queue, frames[frame].draw_semaphore, {}, {}, {}, frames[frame].swap_chains_to_present);
driver->command_queue_execute_and_present(present_queue, wait_semaphore, {}, {}, {}, frames[frame].swap_chains_to_present);
}

frames[frame].swap_chains_to_present.clear();
Expand Down Expand Up @@ -5044,6 +5097,9 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ
frames[i].timestamp_cpu_result_values.resize(max_timestamp_query_elements);
frames[i].timestamp_result_values.resize(max_timestamp_query_elements);
frames[i].timestamp_result_count = 0;

// Assign the main queue family and command pool to the command buffer pool.
frames[i].command_buffer_pool.pool = frames[i].command_pool;
}

// Start from frame count, so everything else is immediately old.
Expand All @@ -5055,7 +5111,7 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ
driver->command_buffer_begin(frames[0].draw_command_buffer);

// Create draw graph and start it initialized as well.
draw_graph.initialize(driver, frames.size(), main_queue_family, SECONDARY_COMMAND_BUFFERS_PER_FRAME);
draw_graph.initialize(driver, device, frames.size(), main_queue_family, SECONDARY_COMMAND_BUFFERS_PER_FRAME);
draw_graph.begin();

for (uint32_t i = 0; i < frames.size(); i++) {
Expand Down Expand Up @@ -5388,6 +5444,11 @@ void RenderingDevice::finalize() {
driver->semaphore_free(frames[i].setup_semaphore);
driver->semaphore_free(frames[i].draw_semaphore);
driver->fence_free(frames[i].draw_fence);

RDG::CommandBufferPool &buffer_pool = frames[i].command_buffer_pool;
for (uint32_t j = 0; j < buffer_pool.buffers.size(); j++) {
driver->semaphore_free(buffer_pool.semaphores[j]);
}
}

if (pipeline_cache_enabled) {
Expand Down
3 changes: 3 additions & 0 deletions servers/rendering/rendering_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -1261,6 +1261,9 @@ class RenderingDevice : public RenderingDeviceCommons {
// Swap chains prepared for drawing during the frame that must be presented.
LocalVector<RDD::SwapChainID> swap_chains_to_present;

// Extra command buffer pool used for driver workarounds.
RDG::CommandBufferPool command_buffer_pool;

struct Timestamp {
String description;
uint64_t value = 0;
Expand Down