/* * Copyright © 2010 Intel Corporation * SPDX-License-Identifier: MIT */ #include "brw_cfg.h" #include "brw_fs.h" #include "brw_private.h" #include "dev/intel_debug.h" #include "util/half_float.h" using namespace brw; void brw_print_instructions_to_file(const fs_visitor &s, FILE *file) { if (s.cfg && s.grf_used == 0) { const brw::def_analysis &defs = s.def_analysis.require(); const register_pressure *rp = INTEL_DEBUG(DEBUG_REG_PRESSURE) ? &s.regpressure_analysis.require() : NULL; unsigned ip = 0, max_pressure = 0; unsigned cf_count = 0; foreach_block_and_inst(block, fs_inst, inst, s.cfg) { if (inst->is_control_flow_end()) cf_count -= 1; if (rp) { max_pressure = MAX2(max_pressure, rp->regs_live_at_ip[ip]); fprintf(file, "{%3d} ", rp->regs_live_at_ip[ip]); } for (unsigned i = 0; i < cf_count; i++) fprintf(file, " "); brw_print_instruction(s, inst, file, &defs); ip++; if (inst->is_control_flow_begin()) cf_count += 1; } if (rp) fprintf(file, "Maximum %3d registers live at once.\n", max_pressure); } else if (s.cfg && exec_list_is_empty(&s.instructions)) { foreach_block_and_inst(block, fs_inst, inst, s.cfg) { brw_print_instruction(s, inst, file); } } else { foreach_in_list(fs_inst, inst, &s.instructions) { brw_print_instruction(s, inst, file); } } } void brw_print_instructions(const fs_visitor &s, const char *name) { FILE *file = stderr; if (name && __normal_user()) { file = fopen(name, "w"); if (!file) file = stderr; } brw_print_instructions_to_file(s, file); if (file != stderr) { fclose(file); } } static const char * brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) { const struct intel_device_info *devinfo = isa->devinfo; switch (op) { case 0 ... NUM_BRW_OPCODES - 1: /* The DO instruction doesn't exist on Gfx9+, but we use it to mark the * start of a loop in the IR. */ if (op == BRW_OPCODE_DO) return "do"; /* DPAS instructions may transiently exist on platforms that do not * support DPAS. They will eventually be lowered, but in the meantime it * must be possible to query the instruction name. */ if (devinfo->verx10 < 125 && op == BRW_OPCODE_DPAS) return "dpas"; assert(brw_opcode_desc(isa, op)->name); return brw_opcode_desc(isa, op)->name; case FS_OPCODE_FB_WRITE_LOGICAL: return "fb_write_logical"; case FS_OPCODE_FB_READ_LOGICAL: return "fb_read_logical"; case SHADER_OPCODE_RCP: return "rcp"; case SHADER_OPCODE_RSQ: return "rsq"; case SHADER_OPCODE_SQRT: return "sqrt"; case SHADER_OPCODE_EXP2: return "exp2"; case SHADER_OPCODE_LOG2: return "log2"; case SHADER_OPCODE_POW: return "pow"; case SHADER_OPCODE_INT_QUOTIENT: return "int_quot"; case SHADER_OPCODE_INT_REMAINDER: return "int_rem"; case SHADER_OPCODE_SIN: return "sin"; case SHADER_OPCODE_COS: return "cos"; case SHADER_OPCODE_SEND: return "send"; case SHADER_OPCODE_UNDEF: return "undef"; case SHADER_OPCODE_TEX_LOGICAL: return "tex_logical"; case SHADER_OPCODE_TXD_LOGICAL: return "txd_logical"; case SHADER_OPCODE_TXF_LOGICAL: return "txf_logical"; case SHADER_OPCODE_TXL_LOGICAL: return "txl_logical"; case SHADER_OPCODE_TXS_LOGICAL: return "txs_logical"; case FS_OPCODE_TXB_LOGICAL: return "txb_logical"; case SHADER_OPCODE_TXF_CMS_W_LOGICAL: return "txf_cms_w_logical"; case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL: return "txf_cms_w_gfx12_logical"; case SHADER_OPCODE_TXF_MCS_LOGICAL: return "txf_mcs_logical"; case SHADER_OPCODE_LOD_LOGICAL: return "lod_logical"; case SHADER_OPCODE_TG4_LOGICAL: return "tg4_logical"; case SHADER_OPCODE_TG4_OFFSET_LOGICAL: return "tg4_offset_logical"; case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL: return "tg4_offset_lod_logical"; case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL: return "tg4_offset_bias_logical"; case SHADER_OPCODE_TG4_BIAS_LOGICAL: return "tg4_b_logical"; case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: return "tg4_l_logical"; case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: return "tg4_i_logical"; case SHADER_OPCODE_SAMPLEINFO_LOGICAL: return "sampleinfo_logical"; case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: return "image_size_logical"; case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: return "untyped_atomic_logical"; case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: return "untyped_surface_read_logical"; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: return "untyped_surface_write_logical"; case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: return "unaligned_oword_block_read_logical"; case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: return "oword_block_write_logical"; case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: return "a64_untyped_read_logical"; case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL: return "a64_oword_block_read_logical"; case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: return "a64_unaligned_oword_block_read_logical"; case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: return "a64_oword_block_write_logical"; case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: return "a64_untyped_write_logical"; case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: return "a64_byte_scattered_read_logical"; case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: return "a64_byte_scattered_write_logical"; case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: return "a64_untyped_atomic_logical"; case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: return "typed_atomic_logical"; case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: return "typed_surface_read_logical"; case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: return "typed_surface_write_logical"; case SHADER_OPCODE_MEMORY_FENCE: return "memory_fence"; case FS_OPCODE_SCHEDULING_FENCE: return "scheduling_fence"; case SHADER_OPCODE_INTERLOCK: /* For an interlock we actually issue a memory fence via sendc. */ return "interlock"; case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: return "byte_scattered_read_logical"; case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: return "byte_scattered_write_logical"; case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: return "dword_scattered_read_logical"; case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: return "dword_scattered_write_logical"; case SHADER_OPCODE_LOAD_PAYLOAD: return "load_payload"; case FS_OPCODE_PACK: return "pack"; case SHADER_OPCODE_SCRATCH_HEADER: return "scratch_header"; case SHADER_OPCODE_URB_WRITE_LOGICAL: return "urb_write_logical"; case SHADER_OPCODE_URB_READ_LOGICAL: return "urb_read_logical"; case SHADER_OPCODE_FIND_LIVE_CHANNEL: return "find_live_channel"; case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: return "find_last_live_channel"; case SHADER_OPCODE_LOAD_LIVE_CHANNELS: return "load_live_channels"; case FS_OPCODE_LOAD_LIVE_CHANNELS: return "fs_load_live_channels"; case SHADER_OPCODE_BROADCAST: return "broadcast"; case SHADER_OPCODE_SHUFFLE: return "shuffle"; case SHADER_OPCODE_SEL_EXEC: return "sel_exec"; case SHADER_OPCODE_QUAD_SWIZZLE: return "quad_swizzle"; case SHADER_OPCODE_CLUSTER_BROADCAST: return "cluster_broadcast"; case SHADER_OPCODE_GET_BUFFER_SIZE: return "get_buffer_size"; case FS_OPCODE_DDX_COARSE: return "ddx_coarse"; case FS_OPCODE_DDX_FINE: return "ddx_fine"; case FS_OPCODE_DDY_COARSE: return "ddy_coarse"; case FS_OPCODE_DDY_FINE: return "ddy_fine"; case FS_OPCODE_PIXEL_X: return "pixel_x"; case FS_OPCODE_PIXEL_Y: return "pixel_y"; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: return "uniform_pull_const"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: return "varying_pull_const_logical"; case FS_OPCODE_PACK_HALF_2x16_SPLIT: return "pack_half_2x16_split"; case SHADER_OPCODE_HALT_TARGET: return "halt_target"; case FS_OPCODE_INTERPOLATE_AT_SAMPLE: return "interp_sample"; case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: return "interp_shared_offset"; case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: return "interp_per_slot_offset"; case SHADER_OPCODE_BARRIER: return "barrier"; case SHADER_OPCODE_MULH: return "mulh"; case SHADER_OPCODE_ISUB_SAT: return "isub_sat"; case SHADER_OPCODE_USUB_SAT: return "usub_sat"; case SHADER_OPCODE_MOV_INDIRECT: return "mov_indirect"; case SHADER_OPCODE_MOV_RELOC_IMM: return "mov_reloc_imm"; case RT_OPCODE_TRACE_RAY_LOGICAL: return "rt_trace_ray_logical"; case SHADER_OPCODE_RND_MODE: return "rnd_mode"; case SHADER_OPCODE_FLOAT_CONTROL_MODE: return "float_control_mode"; case SHADER_OPCODE_BTD_SPAWN_LOGICAL: return "btd_spawn_logical"; case SHADER_OPCODE_BTD_RETIRE_LOGICAL: return "btd_retire_logical"; case SHADER_OPCODE_READ_ARCH_REG: return "read_arch_reg"; case SHADER_OPCODE_LOAD_SUBGROUP_INVOCATION: return "load_subgroup_invocation"; } unreachable("not reached"); } void brw_print_instruction_to_file(const fs_visitor &s, const fs_inst *inst, FILE *file, const brw::def_analysis *defs) { if (inst->predicate) { fprintf(file, "(%cf%d.%d) ", inst->predicate_inverse ? '-' : '+', inst->flag_subreg / 2, inst->flag_subreg % 2); } fprintf(file, "%s", brw_instruction_name(&s.compiler->isa, inst->opcode)); if (inst->saturate) fprintf(file, ".sat"); if (inst->conditional_mod) { fprintf(file, "%s", conditional_modifier[inst->conditional_mod]); if (!inst->predicate && (inst->opcode != BRW_OPCODE_SEL && inst->opcode != BRW_OPCODE_CSEL && inst->opcode != BRW_OPCODE_IF && inst->opcode != BRW_OPCODE_WHILE)) { fprintf(file, ".f%d.%d", inst->flag_subreg / 2, inst->flag_subreg % 2); } } fprintf(file, "(%d) ", inst->exec_size); if (inst->mlen) { fprintf(file, "(mlen: %d) ", inst->mlen); } if (inst->ex_mlen) { fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen); } if (inst->eot) { fprintf(file, "(EOT) "); } switch (inst->dst.file) { case VGRF: if (defs && defs->get(inst->dst)) fprintf(file, "%%%d", inst->dst.nr); else fprintf(file, "v%d", inst->dst.nr); break; case FIXED_GRF: fprintf(file, "g%d", inst->dst.nr); if (inst->dst.subnr != 0) fprintf(file, ".%d", inst->dst.subnr / brw_type_size_bytes(inst->dst.type)); break; case BAD_FILE: fprintf(file, "(null)"); break; case UNIFORM: fprintf(file, "***u%d***", inst->dst.nr); break; case ATTR: fprintf(file, "***attr%d***", inst->dst.nr); break; case ARF: switch (inst->dst.nr & 0xF0) { case BRW_ARF_NULL: fprintf(file, "null"); break; case BRW_ARF_ADDRESS: fprintf(file, "a0.%d", inst->dst.subnr); break; case BRW_ARF_ACCUMULATOR: if (inst->dst.subnr == 0) fprintf(file, "acc%d", inst->dst.nr & 0x0F); else fprintf(file, "acc%d.%d", inst->dst.nr & 0x0F, inst->dst.subnr); break; case BRW_ARF_FLAG: fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr); break; default: fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr); break; } break; case IMM: unreachable("not reached"); } if (inst->dst.offset || (!s.grf_used && inst->dst.file == VGRF && s.alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written)) { const unsigned reg_size = (inst->dst.file == UNIFORM ? 4 : REG_SIZE); fprintf(file, "+%d.%d", inst->dst.offset / reg_size, inst->dst.offset % reg_size); } if (inst->dst.stride != 1) fprintf(file, "<%u>", inst->dst.stride); fprintf(file, ":%s", brw_reg_type_to_letters(inst->dst.type)); for (int i = 0; i < inst->sources; i++) { fprintf(file, ", "); if (inst->src[i].negate) fprintf(file, "-"); if (inst->src[i].abs) fprintf(file, "|"); switch (inst->src[i].file) { case VGRF: if (defs && defs->get(inst->src[i])) fprintf(file, "%%%d", inst->src[i].nr); else fprintf(file, "v%d", inst->src[i].nr); break; case FIXED_GRF: fprintf(file, "g%d", inst->src[i].nr); break; case ATTR: fprintf(file, "attr%d", inst->src[i].nr); break; case UNIFORM: fprintf(file, "u%d", inst->src[i].nr); break; case BAD_FILE: fprintf(file, "(null)"); break; case IMM: switch (inst->src[i].type) { case BRW_TYPE_HF: fprintf(file, "%-ghf", _mesa_half_to_float(inst->src[i].ud & 0xffff)); break; case BRW_TYPE_F: fprintf(file, "%-gf", inst->src[i].f); break; case BRW_TYPE_DF: fprintf(file, "%fdf", inst->src[i].df); break; case BRW_TYPE_W: fprintf(file, "%dw", (int)(int16_t)inst->src[i].d); break; case BRW_TYPE_D: fprintf(file, "%dd", inst->src[i].d); break; case BRW_TYPE_UW: fprintf(file, "%duw", inst->src[i].ud & 0xffff); break; case BRW_TYPE_UD: fprintf(file, "%uu", inst->src[i].ud); break; case BRW_TYPE_Q: fprintf(file, "%" PRId64 "q", inst->src[i].d64); break; case BRW_TYPE_UQ: fprintf(file, "%" PRIu64 "uq", inst->src[i].u64); break; case BRW_TYPE_VF: fprintf(file, "[%-gF, %-gF, %-gF, %-gF]", brw_vf_to_float((inst->src[i].ud >> 0) & 0xff), brw_vf_to_float((inst->src[i].ud >> 8) & 0xff), brw_vf_to_float((inst->src[i].ud >> 16) & 0xff), brw_vf_to_float((inst->src[i].ud >> 24) & 0xff)); break; case BRW_TYPE_V: case BRW_TYPE_UV: fprintf(file, "%08x%s", inst->src[i].ud, inst->src[i].type == BRW_TYPE_V ? "V" : "UV"); break; default: fprintf(file, "???"); break; } break; case ARF: switch (inst->src[i].nr & 0xF0) { case BRW_ARF_NULL: fprintf(file, "null"); break; case BRW_ARF_ADDRESS: fprintf(file, "a0.%d", inst->src[i].subnr); break; case BRW_ARF_ACCUMULATOR: if (inst->src[i].subnr == 0) fprintf(file, "acc%d", inst->src[i].nr & 0x0F); else fprintf(file, "acc%d.%d", inst->src[i].nr & 0x0F, inst->src[i].subnr); break; case BRW_ARF_FLAG: fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr); break; default: fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr); break; } break; } if (inst->src[i].file == FIXED_GRF && inst->src[i].subnr != 0) { assert(inst->src[i].offset == 0); fprintf(file, ".%d", inst->src[i].subnr / brw_type_size_bytes(inst->src[i].type)); } else if (inst->src[i].offset || (!s.grf_used && inst->src[i].file == VGRF && s.alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i))) { const unsigned reg_size = (inst->src[i].file == UNIFORM ? 4 : REG_SIZE); fprintf(file, "+%d.%d", inst->src[i].offset / reg_size, inst->src[i].offset % reg_size); } if (inst->src[i].abs) fprintf(file, "|"); if (inst->src[i].file != IMM) { unsigned stride; if (inst->src[i].file == ARF || inst->src[i].file == FIXED_GRF) { unsigned hstride = inst->src[i].hstride; stride = (hstride == 0 ? 0 : (1 << (hstride - 1))); } else { stride = inst->src[i].stride; } if (stride != 1) fprintf(file, "<%u>", stride); fprintf(file, ":%s", brw_reg_type_to_letters(inst->src[i].type)); } } fprintf(file, " "); if (inst->force_writemask_all) fprintf(file, "NoMask "); if (inst->exec_size != s.dispatch_width) fprintf(file, "group%d ", inst->group); if (inst->has_no_mask_send_params) fprintf(file, "NoMaskParams "); if (inst->sched.pipe != TGL_PIPE_NONE) { fprintf(file, "{ "); brw_print_swsb(file, s.devinfo, inst->sched); fprintf(file, " } "); } fprintf(file, "\n"); } void brw_print_swsb(FILE *f, const struct intel_device_info *devinfo, const tgl_swsb swsb) { if (swsb.pipe == TGL_PIPE_NONE) return; if (swsb.regdist) { fprintf(f, "%s@%d", (devinfo && devinfo->verx10 < 125 ? "" : swsb.pipe == TGL_PIPE_FLOAT ? "F" : swsb.pipe == TGL_PIPE_INT ? "I" : swsb.pipe == TGL_PIPE_LONG ? "L" : swsb.pipe == TGL_PIPE_ALL ? "A" : swsb.pipe == TGL_PIPE_MATH ? "M" : "" ), swsb.regdist); } if (swsb.mode) { if (swsb.regdist) fprintf(f, " "); fprintf(f, "$%d%s", swsb.sbid, (swsb.mode & TGL_SBID_SET ? "" : swsb.mode & TGL_SBID_DST ? ".dst" : ".src")); } }