From 865ef36609eff540861437750d5290e95e6ac750 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Wed, 28 Feb 2024 13:59:35 -0800 Subject: [PATCH] intel/brw: Remove brw_shader.h Find a better home for its existing content. Some functions are now just static functions at the usage sites. Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw_compiler.c | 2 +- src/intel/compiler/brw_disasm.c | 2 +- src/intel/compiler/brw_eu.c | 2 +- src/intel/compiler/brw_fs.cpp | 354 +++++++++++++++ src/intel/compiler/brw_fs.h | 25 +- src/intel/compiler/brw_fs_builder.h | 1 - src/intel/compiler/brw_fs_generator.cpp | 30 ++ src/intel/compiler/brw_fs_nir.cpp | 32 ++ src/intel/compiler/brw_fs_reg_allocate.cpp | 2 +- src/intel/compiler/brw_nir.c | 1 - src/intel/compiler/brw_nir.h | 25 ++ src/intel/compiler/brw_private.h | 19 + src/intel/compiler/brw_reg.h | 4 + .../compiler/brw_schedule_instructions.cpp | 1 - src/intel/compiler/brw_shader.cpp | 408 ------------------ src/intel/compiler/brw_shader.h | 116 ----- src/intel/compiler/meson.build | 1 - 17 files changed, 491 insertions(+), 534 deletions(-) delete mode 100644 src/intel/compiler/brw_shader.h diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 84f0b8546a1..3ef23e95948 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -22,9 +22,9 @@ */ #include "brw_compiler.h" -#include "brw_shader.h" #include "brw_eu.h" #include "brw_nir.h" +#include "brw_private.h" #include "dev/intel_debug.h" #include "compiler/nir/nir.h" #include "util/u_debug.h" diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index a46cc7628b1..aa71e6fd836 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -21,6 +21,7 @@ * OF THIS SOFTWARE. */ +#include #include #include #include @@ -33,7 +34,6 @@ #include "brw_inst.h" #include "brw_isa_info.h" #include "brw_reg.h" -#include "brw_shader.h" #include "util/half_float.h" bool diff --git a/src/intel/compiler/brw_eu.c b/src/intel/compiler/brw_eu.c index d9f1fc61cc2..b16b855d2b8 100644 --- a/src/intel/compiler/brw_eu.c +++ b/src/intel/compiler/brw_eu.c @@ -35,7 +35,7 @@ #include "brw_disasm.h" #include "brw_eu_defines.h" #include "brw_eu.h" -#include "brw_shader.h" +#include "brw_private.h" #include "intel_gfx_ver_enum.h" #include "dev/intel_debug.h" diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 218ba2d1ef0..c4d2090aecb 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1039,6 +1039,57 @@ fs_inst::has_sampler_residency() const } } +static enum brw_reg_type +brw_type_for_base_type(const struct glsl_type *type) +{ + switch (type->base_type) { + case GLSL_TYPE_FLOAT16: + return BRW_REGISTER_TYPE_HF; + case GLSL_TYPE_FLOAT: + return BRW_REGISTER_TYPE_F; + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_SUBROUTINE: + return BRW_REGISTER_TYPE_D; + case GLSL_TYPE_INT16: + return BRW_REGISTER_TYPE_W; + case GLSL_TYPE_INT8: + return BRW_REGISTER_TYPE_B; + case GLSL_TYPE_UINT: + return BRW_REGISTER_TYPE_UD; + case GLSL_TYPE_UINT16: + return BRW_REGISTER_TYPE_UW; + case GLSL_TYPE_UINT8: + return BRW_REGISTER_TYPE_UB; + case GLSL_TYPE_ARRAY: + return brw_type_for_base_type(type->fields.array); + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_TEXTURE: + case GLSL_TYPE_ATOMIC_UINT: + /* These should be overridden with the type of the member when + * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely + * way to trip up if we don't. + */ + return BRW_REGISTER_TYPE_UD; + case GLSL_TYPE_IMAGE: + return BRW_REGISTER_TYPE_UD; + case GLSL_TYPE_DOUBLE: + return BRW_REGISTER_TYPE_DF; + case GLSL_TYPE_UINT64: + return BRW_REGISTER_TYPE_UQ; + case GLSL_TYPE_INT64: + return BRW_REGISTER_TYPE_Q; + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_COOPERATIVE_MATRIX: + unreachable("not reached"); + } + + return BRW_REGISTER_TYPE_F; +} + fs_reg fs_visitor::vgrf(const glsl_type *const type) { @@ -2185,6 +2236,302 @@ fs_visitor::dump_instructions(const char *name) const } } +static const char * +brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) +{ + const struct intel_device_info *devinfo = isa->devinfo; + + switch (op) { + case 0 ... NUM_BRW_OPCODES - 1: + /* The DO instruction doesn't exist on Gfx9+, but we use it to mark the + * start of a loop in the IR. + */ + if (op == BRW_OPCODE_DO) + return "do"; + + /* DPAS instructions may transiently exist on platforms that do not + * support DPAS. They will eventually be lowered, but in the meantime it + * must be possible to query the instruction name. + */ + if (devinfo->verx10 < 125 && op == BRW_OPCODE_DPAS) + return "dpas"; + + assert(brw_opcode_desc(isa, op)->name); + return brw_opcode_desc(isa, op)->name; + case FS_OPCODE_FB_WRITE_LOGICAL: + return "fb_write_logical"; + case FS_OPCODE_FB_READ: + return "fb_read"; + case FS_OPCODE_FB_READ_LOGICAL: + return "fb_read_logical"; + + case SHADER_OPCODE_RCP: + return "rcp"; + case SHADER_OPCODE_RSQ: + return "rsq"; + case SHADER_OPCODE_SQRT: + return "sqrt"; + case SHADER_OPCODE_EXP2: + return "exp2"; + case SHADER_OPCODE_LOG2: + return "log2"; + case SHADER_OPCODE_POW: + return "pow"; + case SHADER_OPCODE_INT_QUOTIENT: + return "int_quot"; + case SHADER_OPCODE_INT_REMAINDER: + return "int_rem"; + case SHADER_OPCODE_SIN: + return "sin"; + case SHADER_OPCODE_COS: + return "cos"; + + case SHADER_OPCODE_SEND: + return "send"; + + case SHADER_OPCODE_UNDEF: + return "undef"; + + case SHADER_OPCODE_TEX: + return "tex"; + case SHADER_OPCODE_TEX_LOGICAL: + return "tex_logical"; + case SHADER_OPCODE_TXD: + return "txd"; + case SHADER_OPCODE_TXD_LOGICAL: + return "txd_logical"; + case SHADER_OPCODE_TXF: + return "txf"; + case SHADER_OPCODE_TXF_LOGICAL: + return "txf_logical"; + case SHADER_OPCODE_TXF_LZ: + return "txf_lz"; + case SHADER_OPCODE_TXL: + return "txl"; + case SHADER_OPCODE_TXL_LOGICAL: + return "txl_logical"; + case SHADER_OPCODE_TXL_LZ: + return "txl_lz"; + case SHADER_OPCODE_TXS: + return "txs"; + case SHADER_OPCODE_TXS_LOGICAL: + return "txs_logical"; + case FS_OPCODE_TXB: + return "txb"; + case FS_OPCODE_TXB_LOGICAL: + return "txb_logical"; + case SHADER_OPCODE_TXF_CMS: + return "txf_cms"; + case SHADER_OPCODE_TXF_CMS_LOGICAL: + return "txf_cms_logical"; + case SHADER_OPCODE_TXF_CMS_W: + return "txf_cms_w"; + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: + return "txf_cms_w_logical"; + case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL: + return "txf_cms_w_gfx12_logical"; + case SHADER_OPCODE_TXF_UMS: + return "txf_ums"; + case SHADER_OPCODE_TXF_UMS_LOGICAL: + return "txf_ums_logical"; + case SHADER_OPCODE_TXF_MCS: + return "txf_mcs"; + case SHADER_OPCODE_TXF_MCS_LOGICAL: + return "txf_mcs_logical"; + case SHADER_OPCODE_LOD: + return "lod"; + case SHADER_OPCODE_LOD_LOGICAL: + return "lod_logical"; + case SHADER_OPCODE_TG4: + return "tg4"; + case SHADER_OPCODE_TG4_LOGICAL: + return "tg4_logical"; + case SHADER_OPCODE_TG4_OFFSET: + return "tg4_offset"; + case SHADER_OPCODE_TG4_OFFSET_LOGICAL: + return "tg4_offset_logical"; + case SHADER_OPCODE_TG4_OFFSET_LOD: + return "tg4_offset_lod"; + case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL: + return "tg4_offset_lod_logical"; + case SHADER_OPCODE_TG4_OFFSET_BIAS: + return "tg4_offset_bias"; + case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL: + return "tg4_offset_bias_logical"; + case SHADER_OPCODE_TG4_BIAS: + return "tg4_b"; + case SHADER_OPCODE_TG4_BIAS_LOGICAL: + return "tg4_b_logical"; + case SHADER_OPCODE_TG4_EXPLICIT_LOD: + return "tg4_l"; + case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: + return "tg4_l_logical"; + case SHADER_OPCODE_TG4_IMPLICIT_LOD: + return "tg4_i"; + case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: + return "tg4_i_logical"; + case SHADER_OPCODE_SAMPLEINFO: + return "sampleinfo"; + case SHADER_OPCODE_SAMPLEINFO_LOGICAL: + return "sampleinfo_logical"; + + case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: + return "image_size_logical"; + + case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + return "untyped_atomic_logical"; + case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: + return "untyped_surface_read_logical"; + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: + return "untyped_surface_write_logical"; + case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: + return "unaligned_oword_block_read_logical"; + case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: + return "oword_block_write_logical"; + case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: + return "a64_untyped_read_logical"; + case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL: + return "a64_oword_block_read_logical"; + case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: + return "a64_unaligned_oword_block_read_logical"; + case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: + return "a64_oword_block_write_logical"; + case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: + return "a64_untyped_write_logical"; + case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: + return "a64_byte_scattered_read_logical"; + case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: + return "a64_byte_scattered_write_logical"; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + return "a64_untyped_atomic_logical"; + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: + return "typed_atomic_logical"; + case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: + return "typed_surface_read_logical"; + case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: + return "typed_surface_write_logical"; + case SHADER_OPCODE_MEMORY_FENCE: + return "memory_fence"; + case FS_OPCODE_SCHEDULING_FENCE: + return "scheduling_fence"; + case SHADER_OPCODE_INTERLOCK: + /* For an interlock we actually issue a memory fence via sendc. */ + return "interlock"; + + case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: + return "byte_scattered_read_logical"; + case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: + return "byte_scattered_write_logical"; + case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: + return "dword_scattered_read_logical"; + case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: + return "dword_scattered_write_logical"; + + case SHADER_OPCODE_LOAD_PAYLOAD: + return "load_payload"; + case FS_OPCODE_PACK: + return "pack"; + + case SHADER_OPCODE_SCRATCH_HEADER: + return "scratch_header"; + + case SHADER_OPCODE_URB_WRITE_LOGICAL: + return "urb_write_logical"; + case SHADER_OPCODE_URB_READ_LOGICAL: + return "urb_read_logical"; + + case SHADER_OPCODE_FIND_LIVE_CHANNEL: + return "find_live_channel"; + case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: + return "find_last_live_channel"; + case SHADER_OPCODE_LOAD_LIVE_CHANNELS: + return "load_live_channels"; + case FS_OPCODE_LOAD_LIVE_CHANNELS: + return "fs_load_live_channels"; + + case SHADER_OPCODE_BROADCAST: + return "broadcast"; + case SHADER_OPCODE_SHUFFLE: + return "shuffle"; + case SHADER_OPCODE_SEL_EXEC: + return "sel_exec"; + case SHADER_OPCODE_QUAD_SWIZZLE: + return "quad_swizzle"; + case SHADER_OPCODE_CLUSTER_BROADCAST: + return "cluster_broadcast"; + + case SHADER_OPCODE_GET_BUFFER_SIZE: + return "get_buffer_size"; + + case FS_OPCODE_DDX_COARSE: + return "ddx_coarse"; + case FS_OPCODE_DDX_FINE: + return "ddx_fine"; + case FS_OPCODE_DDY_COARSE: + return "ddy_coarse"; + case FS_OPCODE_DDY_FINE: + return "ddy_fine"; + + case FS_OPCODE_LINTERP: + return "linterp"; + + case FS_OPCODE_PIXEL_X: + return "pixel_x"; + case FS_OPCODE_PIXEL_Y: + return "pixel_y"; + + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: + return "uniform_pull_const"; + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: + return "varying_pull_const_logical"; + + case FS_OPCODE_PACK_HALF_2x16_SPLIT: + return "pack_half_2x16_split"; + + case SHADER_OPCODE_HALT_TARGET: + return "halt_target"; + + case FS_OPCODE_INTERPOLATE_AT_SAMPLE: + return "interp_sample"; + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: + return "interp_shared_offset"; + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: + return "interp_per_slot_offset"; + + case CS_OPCODE_CS_TERMINATE: + return "cs_terminate"; + case SHADER_OPCODE_BARRIER: + return "barrier"; + case SHADER_OPCODE_MULH: + return "mulh"; + case SHADER_OPCODE_ISUB_SAT: + return "isub_sat"; + case SHADER_OPCODE_USUB_SAT: + return "usub_sat"; + case SHADER_OPCODE_MOV_INDIRECT: + return "mov_indirect"; + case SHADER_OPCODE_MOV_RELOC_IMM: + return "mov_reloc_imm"; + + case RT_OPCODE_TRACE_RAY_LOGICAL: + return "rt_trace_ray_logical"; + + case SHADER_OPCODE_RND_MODE: + return "rnd_mode"; + case SHADER_OPCODE_FLOAT_CONTROL_MODE: + return "float_control_mode"; + case SHADER_OPCODE_BTD_SPAWN_LOGICAL: + return "btd_spawn_logical"; + case SHADER_OPCODE_BTD_RETIRE_LOGICAL: + return "btd_retire_logical"; + case SHADER_OPCODE_READ_SR_REG: + return "read_sr_reg"; + } + + unreachable("not reached"); +} + + void fs_visitor::dump_instruction_to_file(const fs_inst *inst, FILE *file) const { @@ -2504,6 +2851,13 @@ restore_instruction_order(struct cfg_t *cfg, fs_inst **inst_arr) assert(ip == num_insts); } +/* Per-thread scratch space is a power-of-two multiple of 1KB. */ +static inline unsigned +brw_get_scratch_size(int size) +{ + return MAX2(1024, util_next_power_of_two(size)); +} + void fs_visitor::allocate_registers(bool allow_spilling) { diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 8fb8ad84630..e9a87cb2772 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -28,7 +28,8 @@ #ifndef BRW_FS_H #define BRW_FS_H -#include "brw_shader.h" +#include "brw_cfg.h" +#include "brw_compiler.h" #include "brw_ir_allocator.h" #include "brw_ir_fs.h" #include "brw_fs_live_variables.h" @@ -70,7 +71,19 @@ namespace brw { }; } -struct brw_gs_compile; +#define UBO_START ((1 << 16) - 4) + +/** + * Scratch data used when compiling a GLSL geometry shader. + */ +struct brw_gs_compile +{ + struct brw_gs_prog_key key; + struct intel_vue_map input_vue_map; + + unsigned control_data_bits_per_vertex; + unsigned control_data_header_size_bits; +}; namespace brw { class fs_builder; @@ -175,6 +188,14 @@ struct bs_thread_payload : public thread_payload { void load_shader_type(const brw::fs_builder &bld, fs_reg &dest) const; }; +enum instruction_scheduler_mode { + SCHEDULE_PRE, + SCHEDULE_PRE_NON_LIFO, + SCHEDULE_PRE_LIFO, + SCHEDULE_POST, + SCHEDULE_NONE, +}; + class instruction_scheduler; /** diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 334a72b2fad..54c403e6b1d 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -26,7 +26,6 @@ #define BRW_FS_BUILDER_H #include "brw_ir_fs.h" -#include "brw_shader.h" #include "brw_eu.h" #include "brw_fs.h" diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 861b24da7b5..b41fb9c46e7 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -35,6 +35,36 @@ #include "util/mesa-sha1.h" #include "util/half_float.h" +static uint32_t +brw_math_function(enum opcode op) +{ + switch (op) { + case SHADER_OPCODE_RCP: + return BRW_MATH_FUNCTION_INV; + case SHADER_OPCODE_RSQ: + return BRW_MATH_FUNCTION_RSQ; + case SHADER_OPCODE_SQRT: + return BRW_MATH_FUNCTION_SQRT; + case SHADER_OPCODE_EXP2: + return BRW_MATH_FUNCTION_EXP; + case SHADER_OPCODE_LOG2: + return BRW_MATH_FUNCTION_LOG; + case SHADER_OPCODE_POW: + return BRW_MATH_FUNCTION_POW; + case SHADER_OPCODE_SIN: + return BRW_MATH_FUNCTION_SIN; + case SHADER_OPCODE_COS: + return BRW_MATH_FUNCTION_COS; + case SHADER_OPCODE_INT_QUOTIENT: + return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; + case SHADER_OPCODE_INT_REMAINDER: + return BRW_MATH_FUNCTION_INT_DIV_REMAINDER; + default: + unreachable("not reached: unknown math function"); + } +} + + static enum brw_reg_file brw_file_from_reg(fs_reg *reg) { diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index ef5b2760793..240f77c80ca 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -87,6 +87,38 @@ static void fs_nir_emit_global_atomic(nir_to_brw_state &ntb, const fs_builder &bld, nir_intrinsic_instr *instr); +static bool +brw_texture_offset(const nir_tex_instr *tex, unsigned src, + uint32_t *offset_bits_out) +{ + if (!nir_src_is_const(tex->src[src].src)) + return false; + + const unsigned num_components = nir_tex_instr_src_size(tex, src); + + /* Combine all three offsets into a single unsigned dword: + * + * bits 11:8 - U Offset (X component) + * bits 7:4 - V Offset (Y component) + * bits 3:0 - R Offset (Z component) + */ + uint32_t offset_bits = 0; + for (unsigned i = 0; i < num_components; i++) { + int offset = nir_src_comp_as_int(tex->src[src].src, i); + + /* offset out of bounds; caller will handle it. */ + if (offset > 7 || offset < -8) + return false; + + const unsigned shift = 4 * (2 - i); + offset_bits |= (offset << shift) & (0xF << shift); + } + + *offset_bits_out = offset_bits; + + return true; +} + static fs_reg setup_imm_b(const fs_builder &bld, int8_t v) { diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index ae03b23d6a0..990ada7293b 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -78,7 +78,7 @@ fs_visitor::assign_regs_trivial() } -void +extern "C" void brw_fs_alloc_reg_sets(struct brw_compiler *compiler) { const struct intel_device_info *devinfo = compiler->devinfo; diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 54db8fc96ee..dbadd56d52f 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -23,7 +23,6 @@ #include "intel_nir.h" #include "brw_nir.h" -#include "brw_shader.h" #include "compiler/glsl_types.h" #include "compiler/nir/nir_builder.h" diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 54781da84c2..744ba46a360 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -242,6 +242,31 @@ const struct glsl_type *brw_nir_get_var_type(const struct nir_shader *nir, void brw_nir_adjust_payload(nir_shader *shader); +static inline nir_variable_mode +brw_nir_no_indirect_mask(const struct brw_compiler *compiler, + gl_shader_stage stage) +{ + nir_variable_mode indirect_mask = (nir_variable_mode) 0; + + switch (stage) { + case MESA_SHADER_VERTEX: + case MESA_SHADER_FRAGMENT: + indirect_mask |= nir_var_shader_in; + break; + + default: + /* Everything else can handle indirect inputs */ + break; + } + + if (stage != MESA_SHADER_TESS_CTRL && + stage != MESA_SHADER_TASK && + stage != MESA_SHADER_MESH) + indirect_mask |= nir_var_shader_out; + + return indirect_mask; +} + #ifdef __cplusplus } #endif diff --git a/src/intel/compiler/brw_private.h b/src/intel/compiler/brw_private.h index 922ec8abc31..e054e420f0e 100644 --- a/src/intel/compiler/brw_private.h +++ b/src/intel/compiler/brw_private.h @@ -27,6 +27,23 @@ #include "brw_compiler.h" +#ifdef __cplusplus +extern "C" { +#endif + +/* brw_fs_reg_allocate.cpp */ +void brw_fs_alloc_reg_sets(struct brw_compiler *compiler); + +/* brw_disasm.c */ +extern const char *const conditional_modifier[16]; +extern const char *const pred_ctrl_align16[16]; + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus + #include unsigned brw_required_dispatch_width(const struct shader_info *info); @@ -73,4 +90,6 @@ int brw_simd_select_for_workgroup_size(const struct intel_device_info *devinfo, bool brw_should_print_shader(const nir_shader *shader, uint64_t debug_flag); +#endif // __cplusplus + #endif // BRW_PRIVATE_H diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h index 8e610a671c6..17747089981 100644 --- a/src/intel/compiler/brw_reg.h +++ b/src/intel/compiler/brw_reg.h @@ -1280,6 +1280,10 @@ element_sz(struct brw_reg reg) int brw_float_to_vf(float f); float brw_vf_to_float(unsigned char vf); +bool brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg); +bool brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg); +bool brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg); + #ifdef __cplusplus } #endif diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index e7edc72b766..f8d1c985d19 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -29,7 +29,6 @@ #include "brw_fs.h" #include "brw_fs_live_variables.h" #include "brw_cfg.h" -#include "brw_shader.h" #include using namespace brw; diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index e2d40df722b..bc631450bdd 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -28,414 +28,6 @@ #include "brw_private.h" #include "dev/intel_debug.h" #include "util/macros.h" -#include "util/u_debug.h" - -enum brw_reg_type -brw_type_for_base_type(const struct glsl_type *type) -{ - switch (type->base_type) { - case GLSL_TYPE_FLOAT16: - return BRW_REGISTER_TYPE_HF; - case GLSL_TYPE_FLOAT: - return BRW_REGISTER_TYPE_F; - case GLSL_TYPE_INT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_SUBROUTINE: - return BRW_REGISTER_TYPE_D; - case GLSL_TYPE_INT16: - return BRW_REGISTER_TYPE_W; - case GLSL_TYPE_INT8: - return BRW_REGISTER_TYPE_B; - case GLSL_TYPE_UINT: - return BRW_REGISTER_TYPE_UD; - case GLSL_TYPE_UINT16: - return BRW_REGISTER_TYPE_UW; - case GLSL_TYPE_UINT8: - return BRW_REGISTER_TYPE_UB; - case GLSL_TYPE_ARRAY: - return brw_type_for_base_type(type->fields.array); - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_INTERFACE: - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_TEXTURE: - case GLSL_TYPE_ATOMIC_UINT: - /* These should be overridden with the type of the member when - * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely - * way to trip up if we don't. - */ - return BRW_REGISTER_TYPE_UD; - case GLSL_TYPE_IMAGE: - return BRW_REGISTER_TYPE_UD; - case GLSL_TYPE_DOUBLE: - return BRW_REGISTER_TYPE_DF; - case GLSL_TYPE_UINT64: - return BRW_REGISTER_TYPE_UQ; - case GLSL_TYPE_INT64: - return BRW_REGISTER_TYPE_Q; - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - case GLSL_TYPE_COOPERATIVE_MATRIX: - unreachable("not reached"); - } - - return BRW_REGISTER_TYPE_F; -} - -uint32_t -brw_math_function(enum opcode op) -{ - switch (op) { - case SHADER_OPCODE_RCP: - return BRW_MATH_FUNCTION_INV; - case SHADER_OPCODE_RSQ: - return BRW_MATH_FUNCTION_RSQ; - case SHADER_OPCODE_SQRT: - return BRW_MATH_FUNCTION_SQRT; - case SHADER_OPCODE_EXP2: - return BRW_MATH_FUNCTION_EXP; - case SHADER_OPCODE_LOG2: - return BRW_MATH_FUNCTION_LOG; - case SHADER_OPCODE_POW: - return BRW_MATH_FUNCTION_POW; - case SHADER_OPCODE_SIN: - return BRW_MATH_FUNCTION_SIN; - case SHADER_OPCODE_COS: - return BRW_MATH_FUNCTION_COS; - case SHADER_OPCODE_INT_QUOTIENT: - return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; - case SHADER_OPCODE_INT_REMAINDER: - return BRW_MATH_FUNCTION_INT_DIV_REMAINDER; - default: - unreachable("not reached: unknown math function"); - } -} - -bool -brw_texture_offset(const nir_tex_instr *tex, unsigned src, - uint32_t *offset_bits_out) -{ - if (!nir_src_is_const(tex->src[src].src)) - return false; - - const unsigned num_components = nir_tex_instr_src_size(tex, src); - - /* Combine all three offsets into a single unsigned dword: - * - * bits 11:8 - U Offset (X component) - * bits 7:4 - V Offset (Y component) - * bits 3:0 - R Offset (Z component) - */ - uint32_t offset_bits = 0; - for (unsigned i = 0; i < num_components; i++) { - int offset = nir_src_comp_as_int(tex->src[src].src, i); - - /* offset out of bounds; caller will handle it. */ - if (offset > 7 || offset < -8) - return false; - - const unsigned shift = 4 * (2 - i); - offset_bits |= (offset << shift) & (0xF << shift); - } - - *offset_bits_out = offset_bits; - - return true; -} - -const char * -brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) -{ - const struct intel_device_info *devinfo = isa->devinfo; - - switch (op) { - case 0 ... NUM_BRW_OPCODES - 1: - /* The DO instruction doesn't exist on Gfx9+, but we use it to mark the - * start of a loop in the IR. - */ - if (op == BRW_OPCODE_DO) - return "do"; - - /* DPAS instructions may transiently exist on platforms that do not - * support DPAS. They will eventually be lowered, but in the meantime it - * must be possible to query the instruction name. - */ - if (devinfo->verx10 < 125 && op == BRW_OPCODE_DPAS) - return "dpas"; - - assert(brw_opcode_desc(isa, op)->name); - return brw_opcode_desc(isa, op)->name; - case FS_OPCODE_FB_WRITE_LOGICAL: - return "fb_write_logical"; - case FS_OPCODE_FB_READ: - return "fb_read"; - case FS_OPCODE_FB_READ_LOGICAL: - return "fb_read_logical"; - - case SHADER_OPCODE_RCP: - return "rcp"; - case SHADER_OPCODE_RSQ: - return "rsq"; - case SHADER_OPCODE_SQRT: - return "sqrt"; - case SHADER_OPCODE_EXP2: - return "exp2"; - case SHADER_OPCODE_LOG2: - return "log2"; - case SHADER_OPCODE_POW: - return "pow"; - case SHADER_OPCODE_INT_QUOTIENT: - return "int_quot"; - case SHADER_OPCODE_INT_REMAINDER: - return "int_rem"; - case SHADER_OPCODE_SIN: - return "sin"; - case SHADER_OPCODE_COS: - return "cos"; - - case SHADER_OPCODE_SEND: - return "send"; - - case SHADER_OPCODE_UNDEF: - return "undef"; - - case SHADER_OPCODE_TEX: - return "tex"; - case SHADER_OPCODE_TEX_LOGICAL: - return "tex_logical"; - case SHADER_OPCODE_TXD: - return "txd"; - case SHADER_OPCODE_TXD_LOGICAL: - return "txd_logical"; - case SHADER_OPCODE_TXF: - return "txf"; - case SHADER_OPCODE_TXF_LOGICAL: - return "txf_logical"; - case SHADER_OPCODE_TXF_LZ: - return "txf_lz"; - case SHADER_OPCODE_TXL: - return "txl"; - case SHADER_OPCODE_TXL_LOGICAL: - return "txl_logical"; - case SHADER_OPCODE_TXL_LZ: - return "txl_lz"; - case SHADER_OPCODE_TXS: - return "txs"; - case SHADER_OPCODE_TXS_LOGICAL: - return "txs_logical"; - case FS_OPCODE_TXB: - return "txb"; - case FS_OPCODE_TXB_LOGICAL: - return "txb_logical"; - case SHADER_OPCODE_TXF_CMS: - return "txf_cms"; - case SHADER_OPCODE_TXF_CMS_LOGICAL: - return "txf_cms_logical"; - case SHADER_OPCODE_TXF_CMS_W: - return "txf_cms_w"; - case SHADER_OPCODE_TXF_CMS_W_LOGICAL: - return "txf_cms_w_logical"; - case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL: - return "txf_cms_w_gfx12_logical"; - case SHADER_OPCODE_TXF_UMS: - return "txf_ums"; - case SHADER_OPCODE_TXF_UMS_LOGICAL: - return "txf_ums_logical"; - case SHADER_OPCODE_TXF_MCS: - return "txf_mcs"; - case SHADER_OPCODE_TXF_MCS_LOGICAL: - return "txf_mcs_logical"; - case SHADER_OPCODE_LOD: - return "lod"; - case SHADER_OPCODE_LOD_LOGICAL: - return "lod_logical"; - case SHADER_OPCODE_TG4: - return "tg4"; - case SHADER_OPCODE_TG4_LOGICAL: - return "tg4_logical"; - case SHADER_OPCODE_TG4_OFFSET: - return "tg4_offset"; - case SHADER_OPCODE_TG4_OFFSET_LOGICAL: - return "tg4_offset_logical"; - case SHADER_OPCODE_TG4_OFFSET_LOD: - return "tg4_offset_lod"; - case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL: - return "tg4_offset_lod_logical"; - case SHADER_OPCODE_TG4_OFFSET_BIAS: - return "tg4_offset_bias"; - case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL: - return "tg4_offset_bias_logical"; - case SHADER_OPCODE_TG4_BIAS: - return "tg4_b"; - case SHADER_OPCODE_TG4_BIAS_LOGICAL: - return "tg4_b_logical"; - case SHADER_OPCODE_TG4_EXPLICIT_LOD: - return "tg4_l"; - case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: - return "tg4_l_logical"; - case SHADER_OPCODE_TG4_IMPLICIT_LOD: - return "tg4_i"; - case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: - return "tg4_i_logical"; - case SHADER_OPCODE_SAMPLEINFO: - return "sampleinfo"; - case SHADER_OPCODE_SAMPLEINFO_LOGICAL: - return "sampleinfo_logical"; - - case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: - return "image_size_logical"; - - case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: - return "untyped_atomic_logical"; - case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: - return "untyped_surface_read_logical"; - case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: - return "untyped_surface_write_logical"; - case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: - return "unaligned_oword_block_read_logical"; - case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: - return "oword_block_write_logical"; - case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: - return "a64_untyped_read_logical"; - case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL: - return "a64_oword_block_read_logical"; - case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: - return "a64_unaligned_oword_block_read_logical"; - case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: - return "a64_oword_block_write_logical"; - case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: - return "a64_untyped_write_logical"; - case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: - return "a64_byte_scattered_read_logical"; - case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: - return "a64_byte_scattered_write_logical"; - case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: - return "a64_untyped_atomic_logical"; - case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: - return "typed_atomic_logical"; - case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: - return "typed_surface_read_logical"; - case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: - return "typed_surface_write_logical"; - case SHADER_OPCODE_MEMORY_FENCE: - return "memory_fence"; - case FS_OPCODE_SCHEDULING_FENCE: - return "scheduling_fence"; - case SHADER_OPCODE_INTERLOCK: - /* For an interlock we actually issue a memory fence via sendc. */ - return "interlock"; - - case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: - return "byte_scattered_read_logical"; - case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: - return "byte_scattered_write_logical"; - case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: - return "dword_scattered_read_logical"; - case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: - return "dword_scattered_write_logical"; - - case SHADER_OPCODE_LOAD_PAYLOAD: - return "load_payload"; - case FS_OPCODE_PACK: - return "pack"; - - case SHADER_OPCODE_SCRATCH_HEADER: - return "scratch_header"; - - case SHADER_OPCODE_URB_WRITE_LOGICAL: - return "urb_write_logical"; - case SHADER_OPCODE_URB_READ_LOGICAL: - return "urb_read_logical"; - - case SHADER_OPCODE_FIND_LIVE_CHANNEL: - return "find_live_channel"; - case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: - return "find_last_live_channel"; - case SHADER_OPCODE_LOAD_LIVE_CHANNELS: - return "load_live_channels"; - case FS_OPCODE_LOAD_LIVE_CHANNELS: - return "fs_load_live_channels"; - - case SHADER_OPCODE_BROADCAST: - return "broadcast"; - case SHADER_OPCODE_SHUFFLE: - return "shuffle"; - case SHADER_OPCODE_SEL_EXEC: - return "sel_exec"; - case SHADER_OPCODE_QUAD_SWIZZLE: - return "quad_swizzle"; - case SHADER_OPCODE_CLUSTER_BROADCAST: - return "cluster_broadcast"; - - case SHADER_OPCODE_GET_BUFFER_SIZE: - return "get_buffer_size"; - - case FS_OPCODE_DDX_COARSE: - return "ddx_coarse"; - case FS_OPCODE_DDX_FINE: - return "ddx_fine"; - case FS_OPCODE_DDY_COARSE: - return "ddy_coarse"; - case FS_OPCODE_DDY_FINE: - return "ddy_fine"; - - case FS_OPCODE_LINTERP: - return "linterp"; - - case FS_OPCODE_PIXEL_X: - return "pixel_x"; - case FS_OPCODE_PIXEL_Y: - return "pixel_y"; - - case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: - return "uniform_pull_const"; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: - return "varying_pull_const_logical"; - - case FS_OPCODE_PACK_HALF_2x16_SPLIT: - return "pack_half_2x16_split"; - - case SHADER_OPCODE_HALT_TARGET: - return "halt_target"; - - case FS_OPCODE_INTERPOLATE_AT_SAMPLE: - return "interp_sample"; - case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: - return "interp_shared_offset"; - case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: - return "interp_per_slot_offset"; - - case CS_OPCODE_CS_TERMINATE: - return "cs_terminate"; - case SHADER_OPCODE_BARRIER: - return "barrier"; - case SHADER_OPCODE_MULH: - return "mulh"; - case SHADER_OPCODE_ISUB_SAT: - return "isub_sat"; - case SHADER_OPCODE_USUB_SAT: - return "usub_sat"; - case SHADER_OPCODE_MOV_INDIRECT: - return "mov_indirect"; - case SHADER_OPCODE_MOV_RELOC_IMM: - return "mov_reloc_imm"; - - case RT_OPCODE_TRACE_RAY_LOGICAL: - return "rt_trace_ray_logical"; - - case SHADER_OPCODE_RND_MODE: - return "rnd_mode"; - case SHADER_OPCODE_FLOAT_CONTROL_MODE: - return "float_control_mode"; - case SHADER_OPCODE_BTD_SPAWN_LOGICAL: - return "btd_spawn_logical"; - case SHADER_OPCODE_BTD_RETIRE_LOGICAL: - return "btd_retire_logical"; - case SHADER_OPCODE_READ_SR_REG: - return "read_sr_reg"; - } - - unreachable("not reached"); -} bool brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h deleted file mode 100644 index 00f5a73d92d..00000000000 --- a/src/intel/compiler/brw_shader.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef BRW_SHADER_H -#define BRW_SHADER_H - -#include -#include "brw_cfg.h" -#include "brw_compiler.h" -#include "compiler/nir/nir.h" - -#ifdef __cplusplus -enum instruction_scheduler_mode { - SCHEDULE_PRE, - SCHEDULE_PRE_NON_LIFO, - SCHEDULE_PRE_LIFO, - SCHEDULE_POST, - SCHEDULE_NONE, -}; - -#define UBO_START ((1 << 16) - 4) -#else -#endif /* __cplusplus */ - -enum brw_reg_type brw_type_for_base_type(const struct glsl_type *type); -uint32_t brw_math_function(enum opcode op); -const char *brw_instruction_name(const struct brw_isa_info *isa, - enum opcode op); -bool brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg); -bool brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg); -bool brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg); - -#ifdef __cplusplus -extern "C" { -#endif - -/* brw_fs_reg_allocate.cpp */ -void brw_fs_alloc_reg_sets(struct brw_compiler *compiler); - -/* brw_disasm.c */ -extern const char *const conditional_modifier[16]; -extern const char *const pred_ctrl_align16[16]; - -/* Per-thread scratch space is a power-of-two multiple of 1KB. */ -static inline unsigned -brw_get_scratch_size(int size) -{ - return MAX2(1024, util_next_power_of_two(size)); -} - - -static inline nir_variable_mode -brw_nir_no_indirect_mask(const struct brw_compiler *compiler, - gl_shader_stage stage) -{ - nir_variable_mode indirect_mask = (nir_variable_mode) 0; - - switch (stage) { - case MESA_SHADER_VERTEX: - case MESA_SHADER_FRAGMENT: - indirect_mask |= nir_var_shader_in; - break; - - default: - /* Everything else can handle indirect inputs */ - break; - } - - if (stage != MESA_SHADER_TESS_CTRL && - stage != MESA_SHADER_TASK && - stage != MESA_SHADER_MESH) - indirect_mask |= nir_var_shader_out; - - return indirect_mask; -} - -bool brw_texture_offset(const nir_tex_instr *tex, unsigned src, - uint32_t *offset_bits); - -/** - * Scratch data used when compiling a GLSL geometry shader. - */ -struct brw_gs_compile -{ - struct brw_gs_prog_key key; - struct intel_vue_map input_vue_map; - - unsigned control_data_bits_per_vertex; - unsigned control_data_header_size_bits; -}; - -#ifdef __cplusplus -} -#endif - -#endif /* BRW_SHADER_H */ diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index e9970673966..c23dbfc5dfa 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -120,7 +120,6 @@ libintel_compiler_brw_files = files( 'brw_rt.h', 'brw_schedule_instructions.cpp', 'brw_shader.cpp', - 'brw_shader.h', 'brw_simd_selection.cpp', 'brw_vue_map.c', )