diff --git a/src/nouveau/compiler/nak.h b/src/nouveau/compiler/nak.h index 32bcdc9da44..7ee830c124c 100644 --- a/src/nouveau/compiler/nak.h +++ b/src/nouveau/compiler/nak.h @@ -245,7 +245,6 @@ struct nak_qmd_info { uint64_t addr; uint32_t smem_size; - uint32_t smem_max; uint32_t global_size[3]; diff --git a/src/nouveau/compiler/nak/hw_runner.rs b/src/nouveau/compiler/nak/hw_runner.rs index f1f0249fce5..71650119954 100644 --- a/src/nouveau/compiler/nak/hw_runner.rs +++ b/src/nouveau/compiler/nak/hw_runner.rs @@ -503,7 +503,6 @@ impl<'a> Runner { smem_size: unsafe { shader.info.__bindgen_anon_1.cs } .smem_size .into(), - smem_max: 48 * 1024, global_size: [invocations.div_ceil(local_size.into()), 1, 1], num_cbufs: 1, cbufs: qmd_cbufs, diff --git a/src/nouveau/compiler/nak/qmd.rs b/src/nouveau/compiler/nak/qmd.rs index d3084e54016..f33fa5181b6 100644 --- a/src/nouveau/compiler/nak/qmd.rs +++ b/src/nouveau/compiler/nak/qmd.rs @@ -25,7 +25,7 @@ trait QMD { fn set_register_count(&mut self, register_count: u8); fn set_crs_size(&mut self, crs_size: u32); fn set_slm_size(&mut self, slm_size: u32); - fn set_smem_size(&mut self, smem_size: u32, smem_max: u32); + fn set_smem_size(&mut self, smem_size: u32, smem_sizes: &[u16]); } macro_rules! set_enum { @@ -282,7 +282,7 @@ mod qmd_0_6 { qmd_impl_set_register_count!(cla0c0, QMDV00_06, REGISTER_COUNT); qmd_impl_set_slm_size!(cla0c0, QMDV00_06, NONE); - fn set_smem_size(&mut self, smem_size: u32, _smem_max: u32) { + fn set_smem_size(&mut self, smem_size: u32, _smem_sizes: &[u16]) { let mut bv = QMDBitView::new(&mut self.qmd); let smem_size = smem_size.next_multiple_of(0x100); @@ -328,7 +328,7 @@ mod qmd_2_1 { qmd_impl_set_register_count!(clc0c0, QMDV02_01, REGISTER_COUNT); qmd_impl_set_slm_size!(clc0c0, QMDV02_01, NONE); - fn set_smem_size(&mut self, smem_size: u32, _smem_max: u32) { + fn set_smem_size(&mut self, smem_size: u32, _smem_sizes: &[u16]) { let mut bv = QMDBitView::new(&mut self.qmd); let smem_size = smem_size.next_multiple_of(0x100); @@ -338,43 +338,41 @@ mod qmd_2_1 { } use qmd_2_1::Qmd2_1; -fn gv100_sm_config_smem_size(size: u32) -> u32 { - let size = if size > 64 * 1024 { - 96 * 1024 - } else if size > 32 * 1024 { - 64 * 1024 - } else if size > 16 * 1024 { - 32 * 1024 - } else if size > 8 * 1024 { - 16 * 1024 - } else { - 8 * 1024 - }; +fn gv100_smem_size_to_hw(size_kb: u16) -> u16 { + assert!(size_kb % 4 == 0); + (size_kb / 4) + 1 +} - size / 4096 + 1 +fn gv100_pick_smem_size_kb(size: u32, smem_sizes_kb: &[u16]) -> u16 { + *smem_sizes_kb + .iter() + .find(|&&val| u32::from(val) * 1024 >= size) + .expect("Requested shared memory not supported by the hw.") } macro_rules! qmd_impl_set_smem_size_bounded { ($c:ident, $s:ident) => { - fn set_smem_size(&mut self, smem_size: u32, smem_max: u32) { + fn set_smem_size(&mut self, smem_size: u32, smem_sizes: &[u16]) { let mut bv = QMDBitView::new(&mut self.qmd); let smem_size = smem_size.next_multiple_of(0x100); set_field!(bv, $c, $s, SHARED_MEMORY_SIZE, smem_size); - let max = gv100_sm_config_smem_size(smem_max); - let min = gv100_sm_config_smem_size(smem_size.into()); - let target = gv100_sm_config_smem_size(smem_size.into()); - set_field!(bv, $c, $s, MIN_SM_CONFIG_SHARED_MEM_SIZE, min); - set_field!(bv, $c, $s, MAX_SM_CONFIG_SHARED_MEM_SIZE, max); - set_field!(bv, $c, $s, TARGET_SM_CONFIG_SHARED_MEM_SIZE, target); + let smem_size_kb = gv100_pick_smem_size_kb(smem_size, smem_sizes); + let smem_hw = gv100_smem_size_to_hw(smem_size_kb); + let smem_hw_max = + gv100_smem_size_to_hw(*smem_sizes.last().unwrap()); + + set_field!(bv, $c, $s, MIN_SM_CONFIG_SHARED_MEM_SIZE, smem_hw); + set_field!(bv, $c, $s, MAX_SM_CONFIG_SHARED_MEM_SIZE, smem_hw_max); + set_field!(bv, $c, $s, TARGET_SM_CONFIG_SHARED_MEM_SIZE, smem_hw); } }; } macro_rules! qmd_impl_set_smem_size_bounded_gb { ($c:ident, $s:ident) => { - fn set_smem_size(&mut self, smem_size: u32, smem_max: u32) { + fn set_smem_size(&mut self, smem_size: u32, smem_sizes: &[u16]) { let mut bv = QMDBitView::new(&mut self.qmd); let smem_size = smem_size.next_multiple_of(0x100); @@ -389,12 +387,14 @@ macro_rules! qmd_impl_set_smem_size_bounded_gb { smem_size_shifted ); - let max = gv100_sm_config_smem_size(smem_max); - let min = gv100_sm_config_smem_size(smem_size.into()); - let target = gv100_sm_config_smem_size(smem_size.into()); - set_field!(bv, $c, $s, MIN_SM_CONFIG_SHARED_MEM_SIZE, min); - set_field!(bv, $c, $s, MAX_SM_CONFIG_SHARED_MEM_SIZE, max); - set_field!(bv, $c, $s, TARGET_SM_CONFIG_SHARED_MEM_SIZE, target); + let smem_size_kb = gv100_pick_smem_size_kb(smem_size, smem_sizes); + let smem_hw = gv100_smem_size_to_hw(smem_size_kb); + let smem_hw_max = + gv100_smem_size_to_hw(*smem_sizes.last().unwrap()); + + set_field!(bv, $c, $s, MIN_SM_CONFIG_SHARED_MEM_SIZE, smem_hw); + set_field!(bv, $c, $s, MAX_SM_CONFIG_SHARED_MEM_SIZE, smem_hw_max); + set_field!(bv, $c, $s, TARGET_SM_CONFIG_SHARED_MEM_SIZE, smem_hw); } }; } @@ -545,7 +545,11 @@ mod qmd_5_0 { } use qmd_5_0::Qmd5_0; -fn fill_qmd(info: &nak_shader_info, qmd_info: &nak_qmd_info) -> Q { +fn fill_qmd( + dev: &nv_device_info, + info: &nak_shader_info, + qmd_info: &nak_qmd_info, +) -> Q { let cs_info = unsafe { assert!(info.stage == MESA_SHADER_COMPUTE); &info.__bindgen_anon_1.cs @@ -569,9 +573,10 @@ fn fill_qmd(info: &nak_shader_info, qmd_info: &nak_qmd_info) -> Q { qmd.set_crs_size(info.crs_size); qmd.set_slm_size(info.slm_size); - assert!(qmd_info.smem_size >= u32::from(cs_info.smem_size)); - assert!(qmd_info.smem_size <= qmd_info.smem_max); - qmd.set_smem_size(qmd_info.smem_size.into(), qmd_info.smem_max.into()); + assert!(qmd_info.smem_size <= u32::from(dev.max_smem_per_wg_kB) * 1024); + + let smem_sizes = &dev.sm_smem_sizes_kB[0..dev.sm_smem_size_count.into()]; + qmd.set_smem_size(qmd_info.smem_size.into(), smem_sizes); for i in 0..qmd_info.num_cbufs { let cb = &qmd_info.cbufs[usize::try_from(i).unwrap()]; @@ -625,27 +630,27 @@ pub extern "C" fn nak_fill_qmd( if dev.cls_compute >= clcdc0::BLACKWELL_COMPUTE_A { let qmd_out = qmd_out as *mut Qmd5_0; assert!(qmd_size == size_of_val(&*qmd_out)); - qmd_out.write(fill_qmd(info, qmd_info)); + qmd_out.write(fill_qmd(dev, info, qmd_info)); } else if dev.cls_compute >= clcbc0::HOPPER_COMPUTE_A { let qmd_out = qmd_out as *mut Qmd4_0; assert!(qmd_size == size_of_val(&*qmd_out)); - qmd_out.write(fill_qmd(info, qmd_info)); + qmd_out.write(fill_qmd(dev, info, qmd_info)); } else if dev.cls_compute >= clc6c0::AMPERE_COMPUTE_A { let qmd_out = qmd_out as *mut Qmd3_0; assert!(qmd_size == size_of_val(&*qmd_out)); - qmd_out.write(fill_qmd(info, qmd_info)); + qmd_out.write(fill_qmd(dev, info, qmd_info)); } else if dev.cls_compute >= clc3c0::VOLTA_COMPUTE_A { let qmd_out = qmd_out as *mut Qmd2_2; assert!(qmd_size == size_of_val(&*qmd_out)); - qmd_out.write(fill_qmd(info, qmd_info)); + qmd_out.write(fill_qmd(dev, info, qmd_info)); } else if dev.cls_compute >= clc0c0::PASCAL_COMPUTE_A { let qmd_out = qmd_out as *mut Qmd2_1; assert!(qmd_size == size_of_val(&*qmd_out)); - qmd_out.write(fill_qmd(info, qmd_info)); + qmd_out.write(fill_qmd(dev, info, qmd_info)); } else if dev.cls_compute >= cla0c0::KEPLER_COMPUTE_A { let qmd_out = qmd_out as *mut Qmd0_6; assert!(qmd_size == size_of_val(&*qmd_out)); - qmd_out.write(fill_qmd(info, qmd_info)); + qmd_out.write(fill_qmd(dev, info, qmd_info)); } else { panic!("Unknown shader model"); } diff --git a/src/nouveau/vulkan/nvk_cmd_dispatch.c b/src/nouveau/vulkan/nvk_cmd_dispatch.c index 8abbc9112c9..2d8c9ced06f 100644 --- a/src/nouveau/vulkan/nvk_cmd_dispatch.c +++ b/src/nouveau/vulkan/nvk_cmd_dispatch.c @@ -187,7 +187,6 @@ nvk_cmd_upload_qmd(struct nvk_cmd_buffer *cmd, struct nak_qmd_info qmd_info = { .addr = shader->hdr_addr, .smem_size = shader->info.cs.smem_size, - .smem_max = NVK_MAX_SHARED_SIZE, .global_size = { global_size[0], global_size[1], diff --git a/src/nouveau/vulkan/nvk_indirect_execution_set.c b/src/nouveau/vulkan/nvk_indirect_execution_set.c index 1e74e15e620..5d9a45bf57a 100644 --- a/src/nouveau/vulkan/nvk_indirect_execution_set.c +++ b/src/nouveau/vulkan/nvk_indirect_execution_set.c @@ -31,7 +31,6 @@ nvk_ies_cs_qmd_init(const struct nvk_physical_device *pdev, struct nak_qmd_info qmd_info = { .addr = shader->hdr_addr, .smem_size = shader->info.cs.smem_size, - .smem_max = NVK_MAX_SHARED_SIZE, }; assert(shader->cbuf_map.cbuf_count <= ARRAY_SIZE(qmd_info.cbufs));