After upgrading from v0.34 to v0.35, I get (exit code: 0xc0000005, STATUS_ACCESS_VIOLATION) whenever I create the app. Maybe that's related to switching to CStr?
#[derive(Debug, Clone, Deref)]
pub struct App(Arc<inner::App>);
impl App {
/// Creates a new Vulkan application instance.
///
/// # Safety
///
/// This function is unsafe because it:
/// - Calls Vulkan API functions which may have undefined behavior if used incorrectly
/// - Vulkan validation layers are only enabled in debug builds
///
/// Callers must ensure:
/// - Vulkan drivers are properly installed on the system
/// - The system has a compatible GPU with required Vulkan extensions
unsafe fn new_unsafe() -> Result<Self, Box<dyn Error>> {
let loader = LibloadingLoader::new(LIBRARY)?;
let entry = Entry::new(loader).map_err(|_| "failed to load vulkan")?;
log::info!("{}", entry.version()?);
let layers: HashSet<_> = entry
.enumerate_instance_layer_properties()?
.iter()
.map(|properties| properties.layer_name)
.collect();
if VALIDATION_ENABLED && !layers.contains(&VALIDATION_LAYER) {
return Err("validation layer not found".into());
}
let layers = match VALIDATION_ENABLED {
true => vec![VALIDATION_LAYER],
false => vec![],
};
let layers = layers.iter().map(|name| name.as_ptr()).collect_vec();
let extensions = {
// enumerate and validate instance extensions
log::info!("enumerating available instance extensions...");
let properties = entry.enumerate_instance_extension_properties(None)?;
log::info!("found {} available instance extensions", properties.len());
properties
.iter()
.for_each(|ext| log::info!("\t{}", ext.extension_name));
let supported: HashSet<_> = properties
.into_iter()
.map(|property| property.extension_name)
.collect();
INSTANCE_EXTENSIONS
.iter()
.filter(|ext| !supported.contains(&ext.name))
.for_each(|ext| log::warn!("instance does not support extension {}", ext.name));
INSTANCE_EXTENSIONS
.iter()
.filter(|ext| supported.contains(&ext.name))
.map(|ext| ext.name.as_ptr())
.collect_vec()
};
let info = vk::ApplicationInfo::builder()
.application_name(env!("CARGO_PKG_NAME").as_bytes())
.application_version(parse_version(env!("CARGO_PKG_VERSION")))
.api_version(vk::make_version(1, 3, 0));
let info = vk::InstanceCreateInfo::builder()
.application_info(&info)
.enabled_layer_names(&layers)
.enabled_extension_names(&extensions);
let instance = entry.create_instance(&info, None)?;
let (device, compute_family_index, transfer_family_index) = instance
.enumerate_physical_devices()?
.into_iter()
.filter_map(|device| {
let properties = instance
.enumerate_device_extension_properties(device, None)
.ok()?;
let supported: HashSet<_> = properties
.iter()
.map(|properties| properties.extension_name)
.collect();
if let Some(ext) = DEVICE_EXTENSIONS
.iter()
.find(|ext| !supported.contains(&ext.name))
{
let properties = instance.get_physical_device_properties(device);
log::warn!(
"physical device {} does not support extension {}",
properties.device_name,
ext.name
);
return None;
}
let queue_families = instance.get_physical_device_queue_family_properties(device);
let compute_family_index = queue_families
.iter()
.enumerate()
.filter(|(_, p)| p.timestamp_valid_bits != 0)
.find(|(_, p)| p.queue_flags.contains(vk::QueueFlags::COMPUTE))?
.0 as u32;
let transfer_family_index = queue_families
.iter()
.enumerate()
.filter(|&(index, _)| index as u32 != compute_family_index)
.filter(|(_, p)| p.queue_flags.contains(vk::QueueFlags::TRANSFER))
.min_by_key(|(_, p)| p.queue_flags.bits().count_ones())?
.0 as u32;
Some((device, compute_family_index, transfer_family_index))
})
.min_by_key(|&(device, _, _)| {
let properties = instance.get_physical_device_properties(device);
match properties.device_type {
vk::PhysicalDeviceType::DISCRETE_GPU => 0,
vk::PhysicalDeviceType::INTEGRATED_GPU => 1,
_ => 2,
}
})
.ok_or("cannot find physical device")?;
let cooperative_matrix = instance
.get_physical_device_cooperative_matrix_properties_khr(device)?
.into_iter()
.filter(|p| p.scope == vk::ScopeKHR::SUBGROUP)
.filter_map(|p| p.try_into().ok())
.collect_vec();
let mut vk13 = vk::PhysicalDeviceVulkan13Properties::builder();
let mut properties2 = vk::PhysicalDeviceProperties2::builder().push_next(&mut vk13);
instance.get_physical_device_properties2(device, &mut properties2);
let properties = properties2.properties;
let device_name = properties.device_name.as_bytes();
let device_name = std::ffi::CStr::from_bytes_until_nul(device_name).unwrap_or_default();
let device_name = device_name.to_str().unwrap_or("invalid device name");
log::info!("\tdevice name: {device_name}");
log::info!("\tdevice type: {:?}", properties.device_type);
log::info!(
"\tdriver version: {}.{}.{}",
vk::version_major(properties.driver_version),
vk::version_minor(properties.driver_version),
vk::version_patch(properties.driver_version)
);
log::info!(
"\tAPI version: {}.{}.{}",
vk::version_major(properties.api_version),
vk::version_minor(properties.api_version),
vk::version_patch(properties.api_version)
);
log::info!("\tvendor ID: 0x{:04X}", properties.vendor_id);
log::info!("\tdevice ID: 0x{:04X}", properties.device_id);
let limits = properties.limits;
let vk13 = vk13.build();
let properties = instance.get_physical_device_memory_properties(device);
log::info!("\tmemory heaps: {} heap(s)", properties.memory_heap_count);
for i in 0..properties.memory_heap_count as usize {
let heap = &properties.memory_heaps[i];
let size = heap.size / 1024 / 1024;
let flags = heap.flags;
log::info!("\t\theap {i}: {size} MB, flags: {flags:?}");
}
log::info!("\tmemory types: {} type(s)", properties.memory_type_count);
let memory = properties.into();
let properties = Properties {
memory,
cooperative_matrix,
vk13,
limits,
};
log::info!("{properties}");
// !!! CRASHES SOMEWHERE AFTER THIS LINE !!!
let extensions = DEVICE_EXTENSIONS
.iter()
.map(|ext| ext.name.as_ptr())
.collect_vec();
assert_ne!(compute_family_index, transfer_family_index);
let infos = &[
vk::DeviceQueueCreateInfo::builder()
.queue_family_index(compute_family_index)
.queue_priorities(&[1.0]),
vk::DeviceQueueCreateInfo::builder()
.queue_family_index(transfer_family_index)
.queue_priorities(&[0.0]),
];
let mut features_11 =
vk::PhysicalDeviceVulkan11Features::builder().storage_buffer_16bit_access(true);
let mut features_12 = vk::PhysicalDeviceVulkan12Features::builder()
.buffer_device_address(true)
.shader_float16(true)
.shader_int8(true)
.shader_subgroup_extended_types(true)
.vulkan_memory_model(true)
.vulkan_memory_model_device_scope(true)
.timeline_semaphore(true);
let mut features_13 = vk::PhysicalDeviceVulkan13Features::builder()
.maintenance4(true)
.synchronization2(true)
.compute_full_subgroups(true)
.subgroup_size_control(true);
let mut features_cooperative_matrix =
vk::PhysicalDeviceCooperativeMatrixFeaturesKHR::builder().cooperative_matrix(true);
let info = vk::DeviceCreateInfo::builder()
.enabled_extension_names(&extensions)
.queue_create_infos(infos)
.push_next(&mut features_11)
.push_next(&mut features_12)
.push_next(&mut features_13)
.push_next(&mut features_cooperative_matrix);
let device = instance.create_device(device, &info, None)?;
let compute = Submit::new(&device, compute_family_index, 0)?;
let transfer = Submit::new(&device, transfer_family_index, 0)?;
let info = vk::PipelineCacheCreateInfo::builder();
let pipeline_cache = device.create_pipeline_cache(&info, None)?;
let sizes = [
vk::DescriptorPoolSize::builder()
.type_(vk::DescriptorType::UNIFORM_BUFFER)
.descriptor_count(DESCRIPTOR_COUNT_UNIFORM_BUFFER as u32),
vk::DescriptorPoolSize::builder()
.type_(vk::DescriptorType::COMBINED_IMAGE_SAMPLER)
.descriptor_count(DESCRIPTOR_COUNT_SAMPLER as u32),
vk::DescriptorPoolSize::builder()
.type_(vk::DescriptorType::STORAGE_IMAGE)
.descriptor_count(DESCRIPTOR_COUNT_STORAGE_IMAGE as u32),
];
let info = vk::DescriptorPoolCreateInfo::builder()
.flags(vk::DescriptorPoolCreateFlags::FREE_DESCRIPTOR_SET)
.pool_sizes(&sizes)
.max_sets(DESCRIPTOR_POOL_MAX_SETS as u32);
let descriptor_pool = device.create_descriptor_pool(&info, None)?;
let info = vk::QueryPoolCreateInfo::builder()
.query_type(vk::QueryType::TIMESTAMP)
.query_count(QUERY_POOL_SIZE as u32);
let query_pool = device.create_query_pool(&info, None)?;
Ok(Self(Arc::new(inner::App {
instance,
device,
properties,
compute,
transfer,
pipeline_cache,
descriptor_pool,
query_pool,
})))
}
}
running 1 test
19:52:18 [INFO] load suite: 8098 patches
19:52:18 [INFO] model patches: 4
19:52:19 [INFO] 1.4.328
19:52:19 [INFO] enumerating available instance extensions...
19:52:20 [INFO] found 19 available instance extensions
19:52:20 [INFO] VK_KHR_device_group_creation
19:52:20 [INFO] VK_KHR_display
19:52:20 [INFO] VK_KHR_external_fence_capabilities
19:52:20 [INFO] VK_KHR_external_memory_capabilities
19:52:20 [INFO] VK_KHR_external_semaphore_capabilities
19:52:20 [INFO] VK_KHR_get_display_properties2
19:52:20 [INFO] VK_KHR_get_physical_device_properties2
19:52:20 [INFO] VK_KHR_get_surface_capabilities2
19:52:20 [INFO] VK_KHR_surface
19:52:20 [INFO] VK_KHR_surface_protected_capabilities
19:52:20 [INFO] VK_KHR_win32_surface
19:52:20 [INFO] VK_EXT_debug_report
19:52:20 [INFO] VK_EXT_debug_utils
19:52:20 [INFO] VK_EXT_direct_mode_display
19:52:20 [INFO] VK_EXT_surface_maintenance1
19:52:20 [INFO] VK_EXT_swapchain_colorspace
19:52:20 [INFO] VK_NV_external_memory_capabilities
19:52:20 [INFO] VK_KHR_portability_enumeration
19:52:20 [INFO] VK_LUNARG_direct_driver_loading
19:52:20 [WARN] physical device Intel(R) Iris(R) Xe Graphics does not support extension VK_KHR_cooperative_matrix
19:52:20 [INFO] device name: NVIDIA RTX 2000 Ada Generation Laptop GPU
19:52:20 [INFO] device type: DISCRETE_GPU
19:52:20 [INFO] driver version: 573.284.0
19:52:20 [INFO] API version: 1.4.303
19:52:20 [INFO] vendor ID: 0x10DE
19:52:20 [INFO] device ID: 0x28B8
19:52:20 [INFO] memory heaps: 2 heap(s)
19:52:20 [INFO] heap 0: 7957 MB, flags: DEVICE_LOCAL
19:52:20 [INFO] heap 1: 16200 MB, flags: (empty)
19:52:20 [INFO] memory types: 5 type(s)
19:52:20 [INFO] properties:
cooperative matrix:
16 x 16 x 16 F16 x F16 + F16 → F16
16 x 8 x 16 F16 x F16 + F16 → F16
16 x 8 x 8 F16 x F16 + F16 → F16
16 x 16 x 16 F16 x F16 + F32 → F32
16 x 8 x 16 F16 x F16 + F32 → F32
16 x 8 x 8 F16 x F16 + F32 → F32
16 x 16 x 32 U8 x U8 + U32 → U32
16 x 16 x 32 I8 x I8 + I32 → I32
16 x 8 x 32 U8 x U8 + U32 → U32
16 x 8 x 32 I8 x I8 + I32 → I32
error: process didn't exit successfully: `target\debug\mollymawk.exe model --mode fused-feature-mlp` (exit code: 0xc0000005, STATUS_ACCESS_VIOLATION)
After upgrading from v0.34 to v0.35, I get (exit code: 0xc0000005, STATUS_ACCESS_VIOLATION) whenever I create the app. Maybe that's related to switching to
CStr?Output when calling
App::new():