Skip to content

Crashes after upgrading from v0.34 to v0.35 #400

Description

@cryscan

After upgrading from v0.34 to v0.35, I get (exit code: 0xc0000005, STATUS_ACCESS_VIOLATION) whenever I create the app. Maybe that's related to switching to CStr?

#[derive(Debug, Clone, Deref)]
pub struct App(Arc<inner::App>);

impl App {
    /// Creates a new Vulkan application instance.
    ///
    /// # Safety
    ///
    /// This function is unsafe because it:
    /// - Calls Vulkan API functions which may have undefined behavior if used incorrectly
    /// - Vulkan validation layers are only enabled in debug builds
    ///
    /// Callers must ensure:
    /// - Vulkan drivers are properly installed on the system
    /// - The system has a compatible GPU with required Vulkan extensions
    unsafe fn new_unsafe() -> Result<Self, Box<dyn Error>> {
        let loader = LibloadingLoader::new(LIBRARY)?;
        let entry = Entry::new(loader).map_err(|_| "failed to load vulkan")?;
        log::info!("{}", entry.version()?);

        let layers: HashSet<_> = entry
            .enumerate_instance_layer_properties()?
            .iter()
            .map(|properties| properties.layer_name)
            .collect();
        if VALIDATION_ENABLED && !layers.contains(&VALIDATION_LAYER) {
            return Err("validation layer not found".into());
        }

        let layers = match VALIDATION_ENABLED {
            true => vec![VALIDATION_LAYER],
            false => vec![],
        };
        let layers = layers.iter().map(|name| name.as_ptr()).collect_vec();

        let extensions = {
            // enumerate and validate instance extensions
            log::info!("enumerating available instance extensions...");
            let properties = entry.enumerate_instance_extension_properties(None)?;

            log::info!("found {} available instance extensions", properties.len());
            properties
                .iter()
                .for_each(|ext| log::info!("\t{}", ext.extension_name));

            let supported: HashSet<_> = properties
                .into_iter()
                .map(|property| property.extension_name)
                .collect();
            INSTANCE_EXTENSIONS
                .iter()
                .filter(|ext| !supported.contains(&ext.name))
                .for_each(|ext| log::warn!("instance does not support extension {}", ext.name));
            INSTANCE_EXTENSIONS
                .iter()
                .filter(|ext| supported.contains(&ext.name))
                .map(|ext| ext.name.as_ptr())
                .collect_vec()
        };

        let info = vk::ApplicationInfo::builder()
            .application_name(env!("CARGO_PKG_NAME").as_bytes())
            .application_version(parse_version(env!("CARGO_PKG_VERSION")))
            .api_version(vk::make_version(1, 3, 0));

        let info = vk::InstanceCreateInfo::builder()
            .application_info(&info)
            .enabled_layer_names(&layers)
            .enabled_extension_names(&extensions);
        let instance = entry.create_instance(&info, None)?;

        let (device, compute_family_index, transfer_family_index) = instance
            .enumerate_physical_devices()?
            .into_iter()
            .filter_map(|device| {
                let properties = instance
                    .enumerate_device_extension_properties(device, None)
                    .ok()?;
                let supported: HashSet<_> = properties
                    .iter()
                    .map(|properties| properties.extension_name)
                    .collect();
                if let Some(ext) = DEVICE_EXTENSIONS
                    .iter()
                    .find(|ext| !supported.contains(&ext.name))
                {
                    let properties = instance.get_physical_device_properties(device);
                    log::warn!(
                        "physical device {} does not support extension {}",
                        properties.device_name,
                        ext.name
                    );
                    return None;
                }

                let queue_families = instance.get_physical_device_queue_family_properties(device);
                let compute_family_index = queue_families
                    .iter()
                    .enumerate()
                    .filter(|(_, p)| p.timestamp_valid_bits != 0)
                    .find(|(_, p)| p.queue_flags.contains(vk::QueueFlags::COMPUTE))?
                    .0 as u32;
                let transfer_family_index = queue_families
                    .iter()
                    .enumerate()
                    .filter(|&(index, _)| index as u32 != compute_family_index)
                    .filter(|(_, p)| p.queue_flags.contains(vk::QueueFlags::TRANSFER))
                    .min_by_key(|(_, p)| p.queue_flags.bits().count_ones())?
                    .0 as u32;

                Some((device, compute_family_index, transfer_family_index))
            })
            .min_by_key(|&(device, _, _)| {
                let properties = instance.get_physical_device_properties(device);
                match properties.device_type {
                    vk::PhysicalDeviceType::DISCRETE_GPU => 0,
                    vk::PhysicalDeviceType::INTEGRATED_GPU => 1,
                    _ => 2,
                }
            })
            .ok_or("cannot find physical device")?;

        let cooperative_matrix = instance
            .get_physical_device_cooperative_matrix_properties_khr(device)?
            .into_iter()
            .filter(|p| p.scope == vk::ScopeKHR::SUBGROUP)
            .filter_map(|p| p.try_into().ok())
            .collect_vec();

        let mut vk13 = vk::PhysicalDeviceVulkan13Properties::builder();
        let mut properties2 = vk::PhysicalDeviceProperties2::builder().push_next(&mut vk13);
        instance.get_physical_device_properties2(device, &mut properties2);

        let properties = properties2.properties;
        let device_name = properties.device_name.as_bytes();
        let device_name = std::ffi::CStr::from_bytes_until_nul(device_name).unwrap_or_default();
        let device_name = device_name.to_str().unwrap_or("invalid device name");
        log::info!("\tdevice name: {device_name}");
        log::info!("\tdevice type: {:?}", properties.device_type);
        log::info!(
            "\tdriver version: {}.{}.{}",
            vk::version_major(properties.driver_version),
            vk::version_minor(properties.driver_version),
            vk::version_patch(properties.driver_version)
        );
        log::info!(
            "\tAPI version: {}.{}.{}",
            vk::version_major(properties.api_version),
            vk::version_minor(properties.api_version),
            vk::version_patch(properties.api_version)
        );
        log::info!("\tvendor ID: 0x{:04X}", properties.vendor_id);
        log::info!("\tdevice ID: 0x{:04X}", properties.device_id);

        let limits = properties.limits;
        let vk13 = vk13.build();

        let properties = instance.get_physical_device_memory_properties(device);

        log::info!("\tmemory heaps: {} heap(s)", properties.memory_heap_count);
        for i in 0..properties.memory_heap_count as usize {
            let heap = &properties.memory_heaps[i];
            let size = heap.size / 1024 / 1024;
            let flags = heap.flags;
            log::info!("\t\theap {i}: {size} MB, flags: {flags:?}");
        }
        log::info!("\tmemory types: {} type(s)", properties.memory_type_count);

        let memory = properties.into();

        let properties = Properties {
            memory,
            cooperative_matrix,
            vk13,
            limits,
        };
        log::info!("{properties}");

        // !!! CRASHES SOMEWHERE AFTER THIS LINE !!!

        let extensions = DEVICE_EXTENSIONS
            .iter()
            .map(|ext| ext.name.as_ptr())
            .collect_vec();
        assert_ne!(compute_family_index, transfer_family_index);
        let infos = &[
            vk::DeviceQueueCreateInfo::builder()
                .queue_family_index(compute_family_index)
                .queue_priorities(&[1.0]),
            vk::DeviceQueueCreateInfo::builder()
                .queue_family_index(transfer_family_index)
                .queue_priorities(&[0.0]),
        ];

        let mut features_11 =
            vk::PhysicalDeviceVulkan11Features::builder().storage_buffer_16bit_access(true);
        let mut features_12 = vk::PhysicalDeviceVulkan12Features::builder()
            .buffer_device_address(true)
            .shader_float16(true)
            .shader_int8(true)
            .shader_subgroup_extended_types(true)
            .vulkan_memory_model(true)
            .vulkan_memory_model_device_scope(true)
            .timeline_semaphore(true);
        let mut features_13 = vk::PhysicalDeviceVulkan13Features::builder()
            .maintenance4(true)
            .synchronization2(true)
            .compute_full_subgroups(true)
            .subgroup_size_control(true);
        let mut features_cooperative_matrix =
            vk::PhysicalDeviceCooperativeMatrixFeaturesKHR::builder().cooperative_matrix(true);

        let info = vk::DeviceCreateInfo::builder()
            .enabled_extension_names(&extensions)
            .queue_create_infos(infos)
            .push_next(&mut features_11)
            .push_next(&mut features_12)
            .push_next(&mut features_13)
            .push_next(&mut features_cooperative_matrix);
        let device = instance.create_device(device, &info, None)?;

        let compute = Submit::new(&device, compute_family_index, 0)?;
        let transfer = Submit::new(&device, transfer_family_index, 0)?;

        let info = vk::PipelineCacheCreateInfo::builder();
        let pipeline_cache = device.create_pipeline_cache(&info, None)?;

        let sizes = [
            vk::DescriptorPoolSize::builder()
                .type_(vk::DescriptorType::UNIFORM_BUFFER)
                .descriptor_count(DESCRIPTOR_COUNT_UNIFORM_BUFFER as u32),
            vk::DescriptorPoolSize::builder()
                .type_(vk::DescriptorType::COMBINED_IMAGE_SAMPLER)
                .descriptor_count(DESCRIPTOR_COUNT_SAMPLER as u32),
            vk::DescriptorPoolSize::builder()
                .type_(vk::DescriptorType::STORAGE_IMAGE)
                .descriptor_count(DESCRIPTOR_COUNT_STORAGE_IMAGE as u32),
        ];
        let info = vk::DescriptorPoolCreateInfo::builder()
            .flags(vk::DescriptorPoolCreateFlags::FREE_DESCRIPTOR_SET)
            .pool_sizes(&sizes)
            .max_sets(DESCRIPTOR_POOL_MAX_SETS as u32);
        let descriptor_pool = device.create_descriptor_pool(&info, None)?;

        let info = vk::QueryPoolCreateInfo::builder()
            .query_type(vk::QueryType::TIMESTAMP)
            .query_count(QUERY_POOL_SIZE as u32);
        let query_pool = device.create_query_pool(&info, None)?;

        Ok(Self(Arc::new(inner::App {
            instance,
            device,
            properties,
            compute,
            transfer,
            pipeline_cache,
            descriptor_pool,
            query_pool,
        })))
    }
}

Output when calling App::new():

running 1 test
19:52:18 [INFO] load suite: 8098 patches
19:52:18 [INFO] model patches: 4
19:52:19 [INFO] 1.4.328
19:52:19 [INFO] enumerating available instance extensions...
19:52:20 [INFO] found 19 available instance extensions
19:52:20 [INFO]         VK_KHR_device_group_creation
19:52:20 [INFO]         VK_KHR_display
19:52:20 [INFO]         VK_KHR_external_fence_capabilities
19:52:20 [INFO]         VK_KHR_external_memory_capabilities
19:52:20 [INFO]         VK_KHR_external_semaphore_capabilities
19:52:20 [INFO]         VK_KHR_get_display_properties2
19:52:20 [INFO]         VK_KHR_get_physical_device_properties2
19:52:20 [INFO]         VK_KHR_get_surface_capabilities2
19:52:20 [INFO]         VK_KHR_surface
19:52:20 [INFO]         VK_KHR_surface_protected_capabilities
19:52:20 [INFO]         VK_KHR_win32_surface
19:52:20 [INFO]         VK_EXT_debug_report
19:52:20 [INFO]         VK_EXT_debug_utils
19:52:20 [INFO]         VK_EXT_direct_mode_display
19:52:20 [INFO]         VK_EXT_surface_maintenance1
19:52:20 [INFO]         VK_EXT_swapchain_colorspace
19:52:20 [INFO]         VK_NV_external_memory_capabilities
19:52:20 [INFO]         VK_KHR_portability_enumeration
19:52:20 [INFO]         VK_LUNARG_direct_driver_loading
19:52:20 [WARN] physical device Intel(R) Iris(R) Xe Graphics does not support extension VK_KHR_cooperative_matrix
19:52:20 [INFO]         device name: NVIDIA RTX 2000 Ada Generation Laptop GPU
19:52:20 [INFO]         device type: DISCRETE_GPU
19:52:20 [INFO]         driver version: 573.284.0
19:52:20 [INFO]         API version: 1.4.303
19:52:20 [INFO]         vendor ID: 0x10DE
19:52:20 [INFO]         device ID: 0x28B8
19:52:20 [INFO]         memory heaps: 2 heap(s)
19:52:20 [INFO]                 heap 0: 7957 MB, flags: DEVICE_LOCAL
19:52:20 [INFO]                 heap 1: 16200 MB, flags: (empty)
19:52:20 [INFO]         memory types: 5 type(s)
19:52:20 [INFO] properties:
cooperative matrix:
        16 x 16 x 16    F16 x F16 + F16 → F16
        16 x  8 x 16    F16 x F16 + F16 → F16
        16 x  8 x  8    F16 x F16 + F16 → F16
        16 x 16 x 16    F16 x F16 + F32 → F32
        16 x  8 x 16    F16 x F16 + F32 → F32
        16 x  8 x  8    F16 x F16 + F32 → F32
        16 x 16 x 32    U8 x U8 + U32 → U32
        16 x 16 x 32    I8 x I8 + I32 → I32
        16 x  8 x 32    U8 x U8 + U32 → U32
        16 x  8 x 32    I8 x I8 + I32 → I32
error: process didn't exit successfully: `target\debug\mollymawk.exe model --mode fused-feature-mlp` (exit code: 0xc0000005, STATUS_ACCESS_VIOLATION)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions