diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index bde25876b9f..39e6b261242 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -71,6 +71,10 @@ void genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer, void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer); +void genX(emit_l3_config)(struct anv_batch *batch, + const struct anv_device *device, + const struct intel_l3_config *cfg); + void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, const struct intel_l3_config *cfg); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index b7afb2d8231..c082c23bd79 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1892,8 +1892,6 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, intel_dump_l3_config(cfg, stderr); } - UNUSED const bool has_slm = cfg->n[INTEL_L3P_SLM]; - /* According to the hardware docs, the L3 partitioning can only be changed * while the pipeline is completely drained and the caches are flushed, * which involves a first PIPE_CONTROL flush which stalls the pipeline... @@ -1935,112 +1933,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, pc.CommandStreamerStallEnable = true; } -#if GEN_GEN >= 8 - - assert(!cfg->n[INTEL_L3P_IS] && !cfg->n[INTEL_L3P_C] && !cfg->n[INTEL_L3P_T]); - -#if GEN_GEN >= 12 -#define L3_ALLOCATION_REG GENX(L3ALLOC) -#define L3_ALLOCATION_REG_num GENX(L3ALLOC_num) -#else -#define L3_ALLOCATION_REG GENX(L3CNTLREG) -#define L3_ALLOCATION_REG_num GENX(L3CNTLREG_num) -#endif - - anv_batch_write_reg(&cmd_buffer->batch, L3_ALLOCATION_REG, l3cr) { -#if GEN_GEN < 11 - l3cr.SLMEnable = has_slm; -#endif -#if GEN_GEN == 11 - /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set - * in L3CNTLREG register. The default setting of the bit is not the - * desirable behavior. - */ - l3cr.ErrorDetectionBehaviorControl = true; - l3cr.UseFullWays = true; -#endif - l3cr.URBAllocation = cfg->n[INTEL_L3P_URB]; - l3cr.ROAllocation = cfg->n[INTEL_L3P_RO]; - l3cr.DCAllocation = cfg->n[INTEL_L3P_DC]; - l3cr.AllAllocation = cfg->n[INTEL_L3P_ALL]; - } - -#else - - const bool has_dc = cfg->n[INTEL_L3P_DC] || cfg->n[INTEL_L3P_ALL]; - const bool has_is = cfg->n[INTEL_L3P_IS] || cfg->n[INTEL_L3P_RO] || - cfg->n[INTEL_L3P_ALL]; - const bool has_c = cfg->n[INTEL_L3P_C] || cfg->n[INTEL_L3P_RO] || - cfg->n[INTEL_L3P_ALL]; - const bool has_t = cfg->n[INTEL_L3P_T] || cfg->n[INTEL_L3P_RO] || - cfg->n[INTEL_L3P_ALL]; - - assert(!cfg->n[INTEL_L3P_ALL]); - - /* When enabled SLM only uses a portion of the L3 on half of the banks, - * the matching space on the remaining banks has to be allocated to a - * client (URB for all validated configurations) set to the - * lower-bandwidth 2-bank address hashing mode. - */ - const struct gen_device_info *devinfo = &cmd_buffer->device->info; - const bool urb_low_bw = has_slm && !devinfo->is_baytrail; - assert(!urb_low_bw || cfg->n[INTEL_L3P_URB] == cfg->n[INTEL_L3P_SLM]); - - /* Minimum number of ways that can be allocated to the URB. */ - const unsigned n0_urb = devinfo->is_baytrail ? 32 : 0; - assert(cfg->n[INTEL_L3P_URB] >= n0_urb); - - anv_batch_write_reg(&cmd_buffer->batch, GENX(L3SQCREG1), l3sqc) { - l3sqc.ConvertDC_UC = !has_dc; - l3sqc.ConvertIS_UC = !has_is; - l3sqc.ConvertC_UC = !has_c; - l3sqc.ConvertT_UC = !has_t; -#if GEN_IS_HASWELL - l3sqc.L3SQGeneralPriorityCreditInitialization = SQGPCI_DEFAULT; -#else - l3sqc.L3SQGeneralPriorityCreditInitialization = - devinfo->is_baytrail ? BYT_SQGPCI_DEFAULT : SQGPCI_DEFAULT; -#endif - l3sqc.L3SQHighPriorityCreditInitialization = SQHPCI_DEFAULT; - } - - anv_batch_write_reg(&cmd_buffer->batch, GENX(L3CNTLREG2), l3cr2) { - l3cr2.SLMEnable = has_slm; - l3cr2.URBLowBandwidth = urb_low_bw; - l3cr2.URBAllocation = cfg->n[INTEL_L3P_URB] - n0_urb; -#if !GEN_IS_HASWELL - l3cr2.ALLAllocation = cfg->n[INTEL_L3P_ALL]; -#endif - l3cr2.ROAllocation = cfg->n[INTEL_L3P_RO]; - l3cr2.DCAllocation = cfg->n[INTEL_L3P_DC]; - } - - anv_batch_write_reg(&cmd_buffer->batch, GENX(L3CNTLREG3), l3cr3) { - l3cr3.ISAllocation = cfg->n[INTEL_L3P_IS]; - l3cr3.ISLowBandwidth = 0; - l3cr3.CAllocation = cfg->n[INTEL_L3P_C]; - l3cr3.CLowBandwidth = 0; - l3cr3.TAllocation = cfg->n[INTEL_L3P_T]; - l3cr3.TLowBandwidth = 0; - } - -#if GEN_IS_HASWELL - if (cmd_buffer->device->physical->cmd_parser_version >= 4) { - /* Enable L3 atomics on HSW if we have a DC partition, otherwise keep - * them disabled to avoid crashing the system hard. - */ - anv_batch_write_reg(&cmd_buffer->batch, GENX(SCRATCH1), s1) { - s1.L3AtomicDisable = !has_dc; - } - anv_batch_write_reg(&cmd_buffer->batch, GENX(CHICKEN3), c3) { - c3.L3AtomicDisableMask = true; - c3.L3AtomicDisable = !has_dc; - } - } -#endif - -#endif - + genX(emit_l3_config)(&cmd_buffer->batch, cmd_buffer->device, cfg); cmd_buffer->state.current_l3_config = cfg; } diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index beaec4cf5c3..2351228bfd6 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -291,14 +291,13 @@ init_render_queue_state(struct anv_queue *queue) #endif } -#if GEN_GEN >= 12 +#if GEN_GEN >= 11 + /* Starting with GFX version 11, SLM is no longer part of the L3$ config + * so it never changes throughout the lifetime of the VkDevice. + */ const struct intel_l3_config *cfg = intel_get_default_l3_config(&device->info); - if (!cfg) { - /* Platforms with no configs just setup full-way allocation. */ - anv_batch_write_reg(&batch, GENX(L3ALLOC), l3a) { - l3a.L3FullWayAllocationEnable = true; - } - } + genX(emit_l3_config)(&batch, device, cfg); + device->l3_config = cfg; #endif anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe); @@ -336,6 +335,129 @@ genX(init_device_state)(struct anv_device *device) return res; } +void +genX(emit_l3_config)(struct anv_batch *batch, + const struct anv_device *device, + const struct intel_l3_config *cfg) +{ + UNUSED const struct gen_device_info *devinfo = &device->info; + UNUSED const bool has_slm = cfg->n[INTEL_L3P_SLM]; + +#if GEN_GEN >= 8 + +#if GEN_GEN >= 12 +#define L3_ALLOCATION_REG GENX(L3ALLOC) +#define L3_ALLOCATION_REG_num GENX(L3ALLOC_num) +#else +#define L3_ALLOCATION_REG GENX(L3CNTLREG) +#define L3_ALLOCATION_REG_num GENX(L3CNTLREG_num) +#endif + + anv_batch_write_reg(batch, L3_ALLOCATION_REG, l3cr) { + if (cfg == NULL) { +#if GEN_GEN >= 12 + l3cr.L3FullWayAllocationEnable = true; +#else + unreachable("Invalid L3$ config"); +#endif + } else { +#if GEN_GEN < 11 + l3cr.SLMEnable = has_slm; +#endif +#if GEN_GEN == 11 + /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be + * set in L3CNTLREG register. The default setting of the bit is not + * the desirable behavior. + */ + l3cr.ErrorDetectionBehaviorControl = true; + l3cr.UseFullWays = true; +#endif /* GEN_GEN == 11 */ + assert(cfg->n[INTEL_L3P_IS] == 0); + assert(cfg->n[INTEL_L3P_C] == 0); + assert(cfg->n[INTEL_L3P_T] == 0); + l3cr.URBAllocation = cfg->n[INTEL_L3P_URB]; + l3cr.ROAllocation = cfg->n[INTEL_L3P_RO]; + l3cr.DCAllocation = cfg->n[INTEL_L3P_DC]; + l3cr.AllAllocation = cfg->n[INTEL_L3P_ALL]; + } + } + +#else /* GEN_GEN < 8 */ + + const bool has_dc = cfg->n[INTEL_L3P_DC] || cfg->n[INTEL_L3P_ALL]; + const bool has_is = cfg->n[INTEL_L3P_IS] || cfg->n[INTEL_L3P_RO] || + cfg->n[INTEL_L3P_ALL]; + const bool has_c = cfg->n[INTEL_L3P_C] || cfg->n[INTEL_L3P_RO] || + cfg->n[INTEL_L3P_ALL]; + const bool has_t = cfg->n[INTEL_L3P_T] || cfg->n[INTEL_L3P_RO] || + cfg->n[INTEL_L3P_ALL]; + + assert(!cfg->n[INTEL_L3P_ALL]); + + /* When enabled SLM only uses a portion of the L3 on half of the banks, + * the matching space on the remaining banks has to be allocated to a + * client (URB for all validated configurations) set to the + * lower-bandwidth 2-bank address hashing mode. + */ + const bool urb_low_bw = has_slm && !devinfo->is_baytrail; + assert(!urb_low_bw || cfg->n[INTEL_L3P_URB] == cfg->n[INTEL_L3P_SLM]); + + /* Minimum number of ways that can be allocated to the URB. */ + const unsigned n0_urb = devinfo->is_baytrail ? 32 : 0; + assert(cfg->n[INTEL_L3P_URB] >= n0_urb); + + anv_batch_write_reg(batch, GENX(L3SQCREG1), l3sqc) { + l3sqc.ConvertDC_UC = !has_dc; + l3sqc.ConvertIS_UC = !has_is; + l3sqc.ConvertC_UC = !has_c; + l3sqc.ConvertT_UC = !has_t; +#if GEN_IS_HASWELL + l3sqc.L3SQGeneralPriorityCreditInitialization = SQGPCI_DEFAULT; +#else + l3sqc.L3SQGeneralPriorityCreditInitialization = + devinfo->is_baytrail ? BYT_SQGPCI_DEFAULT : SQGPCI_DEFAULT; +#endif + l3sqc.L3SQHighPriorityCreditInitialization = SQHPCI_DEFAULT; + } + + anv_batch_write_reg(batch, GENX(L3CNTLREG2), l3cr2) { + l3cr2.SLMEnable = has_slm; + l3cr2.URBLowBandwidth = urb_low_bw; + l3cr2.URBAllocation = cfg->n[INTEL_L3P_URB] - n0_urb; +#if !GEN_IS_HASWELL + l3cr2.ALLAllocation = cfg->n[INTEL_L3P_ALL]; +#endif + l3cr2.ROAllocation = cfg->n[INTEL_L3P_RO]; + l3cr2.DCAllocation = cfg->n[INTEL_L3P_DC]; + } + + anv_batch_write_reg(batch, GENX(L3CNTLREG3), l3cr3) { + l3cr3.ISAllocation = cfg->n[INTEL_L3P_IS]; + l3cr3.ISLowBandwidth = 0; + l3cr3.CAllocation = cfg->n[INTEL_L3P_C]; + l3cr3.CLowBandwidth = 0; + l3cr3.TAllocation = cfg->n[INTEL_L3P_T]; + l3cr3.TLowBandwidth = 0; + } + +#if GEN_IS_HASWELL + if (device->physical->cmd_parser_version >= 4) { + /* Enable L3 atomics on HSW if we have a DC partition, otherwise keep + * them disabled to avoid crashing the system hard. + */ + anv_batch_write_reg(batch, GENX(SCRATCH1), s1) { + s1.L3AtomicDisable = !has_dc; + } + anv_batch_write_reg(batch, GENX(CHICKEN3), c3) { + c3.L3AtomicDisableMask = true; + c3.L3AtomicDisable = !has_dc; + } + } +#endif /* GEN_IS_HASWELL */ + +#endif /* GEN_GEN < 8 */ +} + void genX(emit_multisample)(struct anv_batch *batch, uint32_t samples, const VkSampleLocationEXT *locations)