diff --git a/src/intel/compiler/brw_compile_gs.cpp b/src/intel/compiler/brw_compile_gs.cpp index 65beda418a2..db20319edb0 100644 --- a/src/intel/compiler/brw_compile_gs.cpp +++ b/src/intel/compiler/brw_compile_gs.cpp @@ -38,7 +38,7 @@ brw_emit_gs_thread_end(fs_visitor &s) struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(s.prog_data); - if (s.gs_compile->control_data_header_size_bits > 0) { + if (s.gs.control_data_header_size_bits > 0) { s.emit_gs_control_data_bits(s.final_gs_vertex_count); } @@ -96,7 +96,7 @@ run_gs(fs_visitor &s) s.final_gs_vertex_count = bld.vgrf(BRW_TYPE_UD); - if (s.gs_compile->control_data_header_size_bits > 0) { + if (s.gs.control_data_header_size_bits > 0) { /* Create a VGRF to store accumulated control data bits. */ s.control_data_bits = bld.vgrf(BRW_TYPE_UD); @@ -104,7 +104,7 @@ run_gs(fs_visitor &s) * will set control_data_bits to 0 after emitting the first vertex. * Otherwise, we need to initialize it to 0 here. */ - if (s.gs_compile->control_data_header_size_bits <= 32) { + if (s.gs.control_data_header_size_bits <= 32) { const brw_builder abld = bld.annotate("initialize control data bits"); abld.MOV(s.control_data_bits, brw_imm_ud(0u)); } @@ -144,9 +144,10 @@ brw_compile_gs(const struct brw_compiler *compiler, struct brw_gs_prog_data *prog_data = params->prog_data; const unsigned dispatch_width = brw_geometry_stage_dispatch_width(compiler->devinfo); - struct brw_gs_compile c; - memset(&c, 0, sizeof(c)); - c.key = *key; + struct intel_vue_map input_vue_map = {0}; + + unsigned control_data_bits_per_vertex = 0; + unsigned control_data_header_size_bits = 0; const bool debug_enabled = brw_should_print_shader(nir, DEBUG_GS); @@ -164,11 +165,11 @@ brw_compile_gs(const struct brw_compiler *compiler, */ GLbitfield64 inputs_read = nir->info.inputs_read; brw_compute_vue_map(compiler->devinfo, - &c.input_vue_map, inputs_read, + &input_vue_map, inputs_read, nir->info.separate_shader, 1); brw_nir_apply_key(nir, compiler, &key->base, dispatch_width); - brw_nir_lower_vue_inputs(nir, &c.input_vue_map); + brw_nir_lower_vue_inputs(nir, &input_vue_map); brw_nir_lower_vue_outputs(nir); brw_postprocess_nir(nir, compiler, debug_enabled, key->base.robust_flags); @@ -196,9 +197,9 @@ brw_compile_gs(const struct brw_compiler *compiler, /* We only have to emit control bits if we are using non-zero streams */ if (nir->info.gs.active_stream_mask != (1 << 0)) - c.control_data_bits_per_vertex = 2; + control_data_bits_per_vertex = 2; else - c.control_data_bits_per_vertex = 0; + control_data_bits_per_vertex = 0; } else { /* When the output type is triangle_strip or line_strip, EndPrimitive() * may be used to terminate the current strip and start a new one @@ -211,16 +212,16 @@ brw_compile_gs(const struct brw_compiler *compiler, /* We only need to output control data if the shader actually calls * EndPrimitive(). */ - c.control_data_bits_per_vertex = + control_data_bits_per_vertex = nir->info.gs.uses_end_primitive ? 1 : 0; } - c.control_data_header_size_bits = - nir->info.gs.vertices_out * c.control_data_bits_per_vertex; + control_data_header_size_bits = + nir->info.gs.vertices_out * control_data_bits_per_vertex; /* 1 HWORD = 32 bytes = 256 bits */ prog_data->control_data_header_size_hwords = - ALIGN(c.control_data_header_size_bits, 256) / 256; + ALIGN(control_data_header_size_bits, 256) / 256; /* Compute the output vertex size. * @@ -338,20 +339,23 @@ brw_compile_gs(const struct brw_compiler *compiler, /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we * need to program a URB read length of ceiling(num_slots / 2). */ - prog_data->base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2; + prog_data->base.urb_read_length = (input_vue_map.num_slots + 1) / 2; /* Now that prog_data setup is done, we are ready to actually compile the * program. */ if (unlikely(debug_enabled)) { fprintf(stderr, "GS Input "); - brw_print_vue_map(stderr, &c.input_vue_map, MESA_SHADER_GEOMETRY); + brw_print_vue_map(stderr, &input_vue_map, MESA_SHADER_GEOMETRY); fprintf(stderr, "GS Output "); brw_print_vue_map(stderr, &prog_data->base.vue_map, MESA_SHADER_GEOMETRY); } - fs_visitor v(compiler, ¶ms->base, &c, prog_data, nir, + fs_visitor v(compiler, ¶ms->base, &key->base, &prog_data->base.base, + nir, dispatch_width, params->base.stats != NULL, debug_enabled); + v.gs.control_data_bits_per_vertex = control_data_bits_per_vertex; + v.gs.control_data_header_size_bits = control_data_header_size_bits; if (run_gs(v)) { prog_data->base.dispatch_mode = INTEL_DISPATCH_MODE_SIMD8; diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 55dafe5d8f5..12b0f9c1625 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -126,18 +126,6 @@ namespace brw { #define UBO_START ((1 << 16) - 4) -/** - * Scratch data used when compiling a GLSL geometry shader. - */ -struct brw_gs_compile -{ - struct brw_gs_prog_key key; - struct intel_vue_map input_vue_map; - - unsigned control_data_bits_per_vertex; - unsigned control_data_header_size_bits; -}; - class brw_builder; struct brw_shader_stats { @@ -283,13 +271,6 @@ public: unsigned num_polygons, bool needs_register_pressure, bool debug_enabled); - fs_visitor(const struct brw_compiler *compiler, - const struct brw_compile_params *params, - struct brw_gs_compile *gs_compile, - struct brw_gs_prog_data *prog_data, - const nir_shader *shader, - bool needs_register_pressure, - bool debug_enabled); void init(); ~fs_visitor(); @@ -334,8 +315,6 @@ public: const brw_base_prog_key *const key; - struct brw_gs_compile *gs_compile; - struct brw_stage_prog_data *prog_data; brw_analysis live_analysis; @@ -425,6 +404,11 @@ public: brw_reg control_data_bits; brw_reg invocation_id; + struct { + unsigned control_data_bits_per_vertex; + unsigned control_data_header_size_bits; + } gs; + unsigned grf_used; bool spilled_any_registers; bool needs_register_pressure; diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index fe787e22587..d907332fdad 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -2354,7 +2354,7 @@ emit_gs_end_primitive(nir_to_brw_state &ntb, const nir_src &vertex_count_nir_src struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(s.prog_data); - if (s.gs_compile->control_data_header_size_bits == 0) + if (s.gs.control_data_header_size_bits == 0) return; /* We can only do EndPrimitive() functionality when the control data @@ -2367,7 +2367,7 @@ emit_gs_end_primitive(nir_to_brw_state &ntb, const nir_src &vertex_count_nir_src } /* Cut bits use one bit per vertex. */ - assert(s.gs_compile->control_data_bits_per_vertex == 1); + assert(s.gs.control_data_bits_per_vertex == 1); brw_reg vertex_count = get_nir_src(ntb, vertex_count_nir_src); vertex_count.type = BRW_TYPE_UD; @@ -2449,7 +2449,7 @@ fs_visitor::gs_urb_per_slot_dword_index(const brw_reg &vertex_count) */ brw_reg prev_count = abld.ADD(vertex_count, brw_imm_ud(0xffffffffu)); unsigned log2_bits_per_vertex = - util_last_bit(gs_compile->control_data_bits_per_vertex); + util_last_bit(gs.control_data_bits_per_vertex); return abld.SHR(prev_count, brw_imm_ud(6u - log2_bits_per_vertex)); } @@ -2477,7 +2477,7 @@ fs_visitor::gs_urb_channel_mask(const brw_reg &dword_index) * Similarly, if the control data header is <= 32 bits, there is only one * DWord, so we can skip channel masks. */ - if (gs_compile->control_data_header_size_bits <= 32) + if (gs.control_data_header_size_bits <= 32) return channel_mask; const brw_builder bld = brw_builder(this).at_end(); @@ -2495,7 +2495,7 @@ void fs_visitor::emit_gs_control_data_bits(const brw_reg &vertex_count) { assert(stage == MESA_SHADER_GEOMETRY); - assert(gs_compile->control_data_bits_per_vertex != 0); + assert(gs.control_data_bits_per_vertex != 0); const struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data); @@ -2509,7 +2509,7 @@ fs_visitor::emit_gs_control_data_bits(const brw_reg &vertex_count) const unsigned max_control_data_header_size_bits = devinfo->ver >= 20 ? 32 : 128; - if (gs_compile->control_data_header_size_bits > max_control_data_header_size_bits) { + if (gs.control_data_header_size_bits > max_control_data_header_size_bits) { /* Convert dword_index to bytes on Xe2+ since LSC can do operate on byte * offset granularity. */ @@ -2564,7 +2564,7 @@ set_gs_stream_control_data_bits(nir_to_brw_state &ntb, const brw_reg &vertex_cou */ /* Stream mode uses 2 bits per vertex */ - assert(s.gs_compile->control_data_bits_per_vertex == 2); + assert(s.gs.control_data_bits_per_vertex == 2); /* Must be a valid stream */ assert(stream_id < 4); /* MAX_VERTEX_STREAMS */ @@ -2625,7 +2625,7 @@ emit_gs_vertex(nir_to_brw_state &ntb, const nir_src &vertex_count_nir_src, * control data bits associated with the (vertex_count - 1)th vertex are * correct. */ - if (s.gs_compile->control_data_header_size_bits > 32) { + if (s.gs.control_data_header_size_bits > 32) { const brw_builder abld = ntb.bld.annotate("emit vertex: emit control data bits"); @@ -2652,7 +2652,7 @@ emit_gs_vertex(nir_to_brw_state &ntb, const nir_src &vertex_count_nir_src, */ brw_inst *inst = abld.AND(ntb.bld.null_reg_d(), vertex_count, - brw_imm_ud(32u / s.gs_compile->control_data_bits_per_vertex - 1u)); + brw_imm_ud(32u / s.gs.control_data_bits_per_vertex - 1u)); inst->conditional_mod = BRW_CONDITIONAL_Z; abld.IF(BRW_PREDICATE_NORMAL); @@ -2682,7 +2682,7 @@ emit_gs_vertex(nir_to_brw_state &ntb, const nir_src &vertex_count_nir_src, * unless we have disabled control data bits completely (which we do * do for MESA_PRIM_POINTS outputs that don't use streams). */ - if (s.gs_compile->control_data_header_size_bits > 0 && + if (s.gs.control_data_header_size_bits > 0 && gs_prog_data->control_data_format == GFX7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) { set_gs_stream_control_data_bits(ntb, vertex_count, stream_id); diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index c0bed77a28f..7b5ccebc041 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -383,7 +383,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, mem_ctx(params->mem_ctx), cfg(NULL), stage(shader->info.stage), debug_enabled(debug_enabled), - key(key), gs_compile(NULL), prog_data(prog_data), + key(key), prog_data(prog_data), live_analysis(this), regpressure_analysis(this), performance_analysis(this), idom_analysis(this), def_analysis(this), needs_register_pressure(needs_register_pressure), @@ -407,7 +407,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, mem_ctx(params->mem_ctx), cfg(NULL), stage(shader->info.stage), debug_enabled(debug_enabled), - key(&key->base), gs_compile(NULL), prog_data(&prog_data->base), + key(&key->base), prog_data(&prog_data->base), live_analysis(this), regpressure_analysis(this), performance_analysis(this), idom_analysis(this), def_analysis(this), needs_register_pressure(needs_register_pressure), @@ -422,34 +422,6 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, api_subgroup_size == 32); } -fs_visitor::fs_visitor(const struct brw_compiler *compiler, - const struct brw_compile_params *params, - struct brw_gs_compile *c, - struct brw_gs_prog_data *prog_data, - const nir_shader *shader, - bool needs_register_pressure, - bool debug_enabled) - : compiler(compiler), log_data(params->log_data), - devinfo(compiler->devinfo), nir(shader), - mem_ctx(params->mem_ctx), - cfg(NULL), stage(shader->info.stage), - debug_enabled(debug_enabled), - key(&c->key.base), gs_compile(c), - prog_data(&prog_data->base.base), - live_analysis(this), regpressure_analysis(this), - performance_analysis(this), idom_analysis(this), def_analysis(this), - needs_register_pressure(needs_register_pressure), - dispatch_width(compiler->devinfo->ver >= 20 ? 16 : 8), - max_polygons(0), - api_subgroup_size(brw_nir_api_subgroup_size(shader, dispatch_width)) -{ - init(); - assert(api_subgroup_size == 0 || - api_subgroup_size == 8 || - api_subgroup_size == 16 || - api_subgroup_size == 32); -} - void fs_visitor::init() { @@ -473,6 +445,9 @@ fs_visitor::init() this->phase = BRW_SHADER_PHASE_INITIAL; this->next_address_register_nr = 1; + + this->gs.control_data_bits_per_vertex = 0; + this->gs.control_data_header_size_bits = 0; } fs_visitor::~fs_visitor()