From c5b3d488f9bed0616f42193073fa0014cb68284d Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Wed, 15 Feb 2023 14:47:53 +0100 Subject: [PATCH] mesa/main: make ffvertex output nir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's little point in going from fixed-function to ARB progs just to convert it to NIR in the end. So let's emit NIR code directly here instead. This all made sense back when we had DRI drivers that didn't use NIR at all in the tree, but these days we unconditionally call prog_to_nir. Since we're no longer generating something that resembles ARM asm shaders, we also no longer pass is_arb_asm as true to NewProgram. But we still require legacy math rules, so we set use_legacy_math_rules in the shader_info instead. This should do the same thing, but communicates exactly what we actually need rather than having a half- truth about the source of the shader. Acked-by: Marek Olšák Reviewed-by: Emma Anholt Part-of: --- src/mesa/main/ffvertex_prog.c | 1526 ++++++++++++++------------------- 1 file changed, 620 insertions(+), 906 deletions(-) diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index f524342e7cc..d2d8e30cf69 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -42,13 +42,14 @@ #include "main/ffvertex_prog.h" #include "program/program.h" #include "program/prog_cache.h" -#include "program/prog_instruction.h" -#include "program/prog_parameter.h" #include "program/prog_statevars.h" #include "util/bitscan.h" #include "state_tracker/st_program.h" +#include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_builtin_builder.h" + /** Max of number of lights and texture coord units */ #define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS) @@ -276,544 +277,323 @@ static void make_state_key( struct gl_context *ctx, struct state_key *key ) } } - - -/* Use uregs to represent registers internally, translate to Mesa's - * expected formats on emit. - * - * NOTE: These are passed by value extensively in this file rather - * than as usual by pointer reference. If this disturbs you, try - * remembering they are just 32bits in size. - * - * GCC is smart enough to deal with these dword-sized structures in - * much the same way as if I had defined them as dwords and was using - * macros to access and set the fields. This is much nicer and easier - * to evolve. - */ -struct ureg { - GLuint file:4; - GLint idx:9; /* relative addressing may be negative */ - /* sizeof(idx) should == sizeof(prog_src_reg::Index) */ - GLuint negate:1; - GLuint swz:12; - GLuint pad:6; -}; - - struct tnl_program { const struct state_key *state; - struct gl_program *program; struct gl_program_parameter_list *state_params; - GLuint max_inst; /** number of instructions allocated for program */ GLboolean mvp_with_dp4; - GLuint temp_in_use; - GLuint temp_reserved; + nir_builder *b; - struct ureg eye_position; - struct ureg eye_position_z; - struct ureg eye_position_normalized; - struct ureg transformed_normal; - struct ureg identity; + nir_ssa_def *eye_position; + nir_ssa_def *eye_position_z; + nir_ssa_def *eye_position_normalized; + nir_ssa_def *transformed_normal; GLuint materials; GLuint color_materials; }; - -static const struct ureg undef = { - PROGRAM_UNDEFINED, - 0, - 0, - 0, - 0 -}; - -/* Local shorthand: - */ -#define X SWIZZLE_X -#define Y SWIZZLE_Y -#define Z SWIZZLE_Z -#define W SWIZZLE_W - - -/* Construct a ureg: - */ -static struct ureg make_ureg(GLuint file, GLint idx) +static nir_variable * +find_state_var(nir_shader *s, + gl_state_index16 tokens[STATE_LENGTH]) { - struct ureg reg; - reg.file = file; - reg.idx = idx; - reg.negate = 0; - reg.swz = SWIZZLE_NOOP; - reg.pad = 0; - return reg; -} - - -static struct ureg negate( struct ureg reg ) -{ - reg.negate ^= 1; - return reg; -} - - -static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) -{ - reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), - GET_SWZ(reg.swz, y), - GET_SWZ(reg.swz, z), - GET_SWZ(reg.swz, w)); - return reg; -} - - -static struct ureg swizzle1( struct ureg reg, int x ) -{ - return swizzle(reg, x, x, x, x); -} - - -static struct ureg get_temp( struct tnl_program *p ) -{ - int bit = ffs( ~p->temp_in_use ); - if (!bit) { - _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); - exit(1); + nir_foreach_variable_with_modes(var, s, nir_var_uniform) { + if (var->num_state_slots == 1 && + !memcmp(var->state_slots[0].tokens, tokens, + sizeof(var->state_slots[0].tokens))) + return var; } - - if ((GLuint) bit > p->program->arb.NumTemporaries) - p->program->arb.NumTemporaries = bit; - - p->temp_in_use |= 1<<(bit-1); - return make_ureg(PROGRAM_TEMPORARY, bit-1); + return NULL; } - -static struct ureg reserve_temp( struct tnl_program *p ) -{ - struct ureg temp = get_temp( p ); - p->temp_reserved |= 1<temp_in_use &= ~(1<temp_in_use |= p->temp_reserved; /* can't release reserved temps */ - } -} - -static void release_temps( struct tnl_program *p ) -{ - p->temp_in_use = p->temp_reserved; -} - - -static struct ureg register_param4(struct tnl_program *p, - GLint s0, - GLint s1, - GLint s2, - GLint s3) +static nir_variable * +register_state_var(struct tnl_program *p, + gl_state_index s0, + gl_state_index s1, + gl_state_index s2, + gl_state_index s3, + const struct glsl_type *type) { gl_state_index16 tokens[STATE_LENGTH]; - GLint idx; tokens[0] = s0; tokens[1] = s1; tokens[2] = s2; tokens[3] = s3; - idx = _mesa_add_state_reference(p->state_params, tokens); - return make_ureg(PROGRAM_STATE_VAR, idx); + nir_variable *var = find_state_var(p->b->shader, tokens); + if (var) + return var; + + int loc = _mesa_add_state_reference(p->state_params, tokens); + + char *name = _mesa_program_state_string(tokens); + var = nir_variable_create(p->b->shader, nir_var_uniform, type, + name); + free(name); + + var->num_state_slots = 1; + var->state_slots = ralloc_array(var, nir_state_slot, 1); + var->data.driver_location = loc; + memcpy(var->state_slots[0].tokens, tokens, + sizeof(var->state_slots[0].tokens)); + + p->b->shader->num_uniforms++; + return var; } - -#define register_param1(p,s0) register_param4(p,s0,0,0,0) -#define register_param2(p,s0,s1) register_param4(p,s0,s1,0,0) -#define register_param3(p,s0,s1,s2) register_param4(p,s0,s1,s2,0) - - - -/** - * \param input one of VERT_ATTRIB_x tokens. - */ -static struct ureg register_input( struct tnl_program *p, GLuint input ) +static nir_ssa_def * +load_state_var(struct tnl_program *p, + gl_state_index s0, + gl_state_index s1, + gl_state_index s2, + gl_state_index s3, + const struct glsl_type *type) { - assert(input < VERT_ATTRIB_MAX); - - if (p->state->varying_vp_inputs & VERT_BIT(input)) { - p->program->info.inputs_read |= (uint64_t)VERT_BIT(input); - return make_ureg(PROGRAM_INPUT, input); - } - else { - return register_param2(p, STATE_CURRENT_ATTRIB, input); - } + nir_variable *var = register_state_var(p, s0, s1, s2, s3, type); + return nir_load_var(p->b, var); } - -/** - * \param input one of VARYING_SLOT_x tokens. - */ -static struct ureg register_output( struct tnl_program *p, GLuint output ) +static nir_ssa_def * +load_state_vec4(struct tnl_program *p, + gl_state_index s0, + gl_state_index s1, + gl_state_index s2, + gl_state_index s3) { - p->program->info.outputs_written |= BITFIELD64_BIT(output); - return make_ureg(PROGRAM_OUTPUT, output); + return load_state_var(p, s0, s1, s2, s3, glsl_vec4_type()); } - -static struct ureg register_const4f( struct tnl_program *p, - GLfloat s0, - GLfloat s1, - GLfloat s2, - GLfloat s3) +static void +load_state_mat4(struct tnl_program *p, nir_ssa_def *out[4], + gl_state_index state_index, unsigned tex_index) { - gl_constant_value values[4]; - GLint idx; - GLuint swizzle; - values[0].f = s0; - values[1].f = s1; - values[2].f = s2; - values[3].f = s3; - idx = _mesa_add_unnamed_constant(p->program->Parameters, values, 4, - &swizzle ); - assert(swizzle == SWIZZLE_NOOP); - return make_ureg(PROGRAM_CONSTANT, idx); + for (int i = 0; i < 4; ++i) + out[i] = load_state_vec4(p, state_index, tex_index, i, i); } -#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) -#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) -#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) -#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) - -static GLboolean is_undef( struct ureg reg ) +static nir_ssa_def * +load_input(struct tnl_program *p, gl_vert_attrib attr, + const struct glsl_type *type) { - return reg.file == PROGRAM_UNDEFINED; -} + if (p->state->varying_vp_inputs & VERT_BIT(attr)) { + nir_variable *var = + nir_find_variable_with_location(p->b->shader, + nir_var_shader_in, + attr); + if (!var) { + var = nir_variable_create(p->b->shader, + nir_var_shader_in, + type, + gl_vert_attrib_name(attr)); + var->data.location = attr; + var->data.driver_location = p->b->shader->num_inputs++; -static struct ureg get_identity_param( struct tnl_program *p ) -{ - if (is_undef(p->identity)) - p->identity = register_const4f(p, 0,0,0,1); - - return p->identity; -} - -static void register_matrix_param5( struct tnl_program *p, - GLint s0, /* modelview, projection, etc */ - GLint s1, /* texture matrix number */ - GLint s2, /* first row */ - GLint s3, /* last row */ - struct ureg *matrix ) -{ - GLint i; - - /* This is a bit sad as the support is there to pull the whole - * matrix out in one go: - */ - for (i = 0; i <= s3 - s2; i++) - matrix[i] = register_param4(p, s0, s1, i, i); -} - - -static void emit_arg( struct prog_src_register *src, - struct ureg reg ) -{ - src->File = reg.file; - src->Index = reg.idx; - src->Swizzle = reg.swz; - src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE; - src->RelAddr = 0; - /* Check that bitfield sizes aren't exceeded */ - assert(src->Index == reg.idx); -} - - -static void emit_dst( struct prog_dst_register *dst, - struct ureg reg, GLuint mask ) -{ - dst->File = reg.file; - dst->Index = reg.idx; - /* allow zero as a shorthand for xyzw */ - dst->WriteMask = mask ? mask : WRITEMASK_XYZW; - /* Check that bitfield sizes aren't exceeded */ - assert(dst->Index == reg.idx); -} - - -static void emit_op3fn(struct tnl_program *p, - enum prog_opcode op, - struct ureg dest, - GLuint mask, - struct ureg src0, - struct ureg src1, - struct ureg src2, - const char *fn, - GLuint line) -{ - GLuint nr; - struct prog_instruction *inst; - - assert(p->program->arb.NumInstructions <= p->max_inst); - - if (p->program->arb.NumInstructions == p->max_inst) { - /* need to extend the program's instruction array */ - struct prog_instruction *newInst; - - /* double the size */ - p->max_inst *= 2; - - newInst = - rzalloc_array(p->program, struct prog_instruction, p->max_inst); - if (!newInst) { - _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build"); - return; + p->b->shader->info.inputs_read |= (uint64_t)VERT_BIT(attr); } + return nir_load_var(p->b, var); + } else + return load_state_var(p, STATE_CURRENT_ATTRIB, attr, 0, 0, type); +} - _mesa_copy_instructions(newInst, p->program->arb.Instructions, - p->program->arb.NumInstructions); +static nir_ssa_def * +load_input_vec4(struct tnl_program *p, gl_vert_attrib attr) +{ + return load_input(p, attr, glsl_vec4_type()); +} - ralloc_free(p->program->arb.Instructions); +static nir_variable * +register_output(struct tnl_program *p, gl_varying_slot slot, + const struct glsl_type *type) +{ + nir_variable *var = + nir_find_variable_with_location(p->b->shader, + nir_var_shader_out, + slot); + if (var) + return var; - p->program->arb.Instructions = newInst; - } + const char *name = + gl_varying_slot_name_for_stage(slot, MESA_SHADER_VERTEX); + var = nir_variable_create(p->b->shader, nir_var_shader_out, type, name); - nr = p->program->arb.NumInstructions++; + var->data.location = slot; + var->data.driver_location = p->b->shader->num_outputs++; - inst = &p->program->arb.Instructions[nr]; - inst->Opcode = (enum prog_opcode) op; + p->b->shader->info.outputs_written |= BITFIELD64_BIT(slot); + return var; +} - emit_arg( &inst->SrcReg[0], src0 ); - emit_arg( &inst->SrcReg[1], src1 ); - emit_arg( &inst->SrcReg[2], src2 ); +static void +store_output_vec4_masked(struct tnl_program *p, gl_varying_slot slot, + nir_ssa_def *value, unsigned mask) +{ + assert(mask <= 0xf); + nir_variable *var = register_output(p, slot, glsl_vec4_type()); + nir_store_var(p->b, var, value, mask); +} - emit_dst( &inst->DstReg, dest, mask ); +static void +store_output_vec4(struct tnl_program *p, gl_varying_slot slot, + nir_ssa_def *value) +{ + store_output_vec4_masked(p, slot, value, 0xf); +} + +static void +store_output_float(struct tnl_program *p, gl_varying_slot slot, + nir_ssa_def *value) +{ + nir_variable *var = register_output(p, slot, glsl_float_type()); + nir_store_var(p->b, var, value, 0x1); } -#define emit_op3(p, op, dst, mask, src0, src1, src2) \ - emit_op3fn(p, op, dst, mask, src0, src1, src2, __func__, __LINE__) - -#define emit_op2(p, op, dst, mask, src0, src1) \ - emit_op3fn(p, op, dst, mask, src0, src1, undef, __func__, __LINE__) - -#define emit_op1(p, op, dst, mask, src0) \ - emit_op3fn(p, op, dst, mask, src0, undef, undef, __func__, __LINE__) - - -static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) +static nir_ssa_def * +emit_matrix_transform_vec4(nir_builder *b, + nir_ssa_def *mat[4], + nir_ssa_def *src) { - if (reg.file == PROGRAM_TEMPORARY && - !(p->temp_reserved & (1<eye_position)) { - struct ureg pos = register_input( p, VERT_ATTRIB_POS ); - struct ureg modelview[4]; - - p->eye_position = reserve_temp(p); - + if (!p->eye_position) { + nir_ssa_def *pos = + load_input_vec4(p, VERT_ATTRIB_POS); if (p->mvp_with_dp4) { - register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, - modelview ); - - emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); - } - else { - register_matrix_param5( p, STATE_MODELVIEW_MATRIX_TRANSPOSE, 0, 0, 3, - modelview ); - - emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); + nir_ssa_def *modelview[4]; + load_state_mat4(p, modelview, STATE_MODELVIEW_MATRIX, 0); + p->eye_position = + emit_matrix_transform_vec4(p->b, modelview, pos); + } else { + nir_ssa_def *modelview[4]; + load_state_mat4(p, modelview, + STATE_MODELVIEW_MATRIX_TRANSPOSE, 0); + p->eye_position = + emit_transpose_matrix_transform_vec4(p->b, modelview, pos); } } return p->eye_position; } - -static struct ureg get_eye_position_z( struct tnl_program *p ) +static nir_ssa_def * +get_eye_position_z(struct tnl_program *p) { - if (!is_undef(p->eye_position)) - return swizzle1(p->eye_position, Z); - - if (is_undef(p->eye_position_z)) { - struct ureg pos = register_input( p, VERT_ATTRIB_POS ); - struct ureg modelview[4]; - - p->eye_position_z = reserve_temp(p); - - register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, - modelview ); - - emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]); - } - - return p->eye_position_z; + return nir_channel(p->b, get_eye_position(p), 2); } - -static struct ureg get_eye_position_normalized( struct tnl_program *p ) +static nir_ssa_def * +get_eye_position_normalized(struct tnl_program *p) { - if (is_undef(p->eye_position_normalized)) { - struct ureg eye = get_eye_position(p); - p->eye_position_normalized = reserve_temp(p); - emit_normalize_vec3(p, p->eye_position_normalized, eye); + if (!p->eye_position_normalized) { + nir_ssa_def *eye = get_eye_position(p); + p->eye_position_normalized = emit_normalize_vec3(p->b, eye); } return p->eye_position_normalized; } - -static struct ureg get_transformed_normal( struct tnl_program *p ) +static nir_ssa_def * +get_transformed_normal(struct tnl_program *p) { - if (is_undef(p->transformed_normal) && + if (!p->transformed_normal && !p->state->need_eye_coords && !p->state->normalize && - !(p->state->need_eye_coords == p->state->rescale_normals)) - { - p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL ); - } - else if (is_undef(p->transformed_normal)) - { - struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); - struct ureg mvinv[3]; - struct ureg transformed_normal = reserve_temp(p); + !(p->state->need_eye_coords == p->state->rescale_normals)) { + p->transformed_normal = + load_input(p, VERT_ATTRIB_NORMAL, + glsl_vector_type(GLSL_TYPE_FLOAT, 3)); + } else if (!p->transformed_normal) { + nir_ssa_def *normal = + load_input(p, VERT_ATTRIB_NORMAL, + glsl_vector_type(GLSL_TYPE_FLOAT, 3)); if (p->state->need_eye_coords) { - register_matrix_param5( p, STATE_MODELVIEW_MATRIX_INVTRANS, 0, 0, 2, - mvinv ); - - /* Transform to eye space: - */ - emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal ); - normal = transformed_normal; + nir_ssa_def *mvinv[4]; + load_state_mat4(p, mvinv, STATE_MODELVIEW_MATRIX_INVTRANS, 0); + normal = emit_matrix_transform_vec3(p->b, mvinv, normal); } /* Normalize/Rescale: */ - if (p->state->normalize) { - emit_normalize_vec3( p, transformed_normal, normal ); - normal = transformed_normal; - } + if (p->state->normalize) + normal = emit_normalize_vec3(p->b, normal); else if (p->state->need_eye_coords == p->state->rescale_normals) { - /* This is already adjusted for eye/non-eye rendering: - */ - struct ureg rescale = register_param1(p, STATE_NORMAL_SCALE); - - emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale ); - normal = transformed_normal; + nir_ssa_def *scale = + load_state_var(p, STATE_NORMAL_SCALE, 0, 0, 0, + glsl_float_type()); + normal = nir_fmul(p->b, normal, scale); } - assert(normal.file == PROGRAM_TEMPORARY); p->transformed_normal = normal; } return p->transformed_normal; } - -static void build_hpos( struct tnl_program *p ) +static void +build_hpos(struct tnl_program *p) { - struct ureg pos = register_input( p, VERT_ATTRIB_POS ); - struct ureg hpos = register_output( p, VARYING_SLOT_POS ); - struct ureg mvp[4]; - + nir_ssa_def *pos = + load_input_vec4(p, VERT_ATTRIB_POS); if (p->mvp_with_dp4) { - register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, - mvp ); - emit_matrix_transform_vec4( p, hpos, mvp, pos ); + nir_ssa_def *mvp[4]; + load_state_mat4(p, mvp, STATE_MODELVIEW_MATRIX, 0); + pos = emit_matrix_transform_vec4(p->b, mvp, pos); + } else { + nir_ssa_def *mvp[4]; + load_state_mat4(p, mvp, STATE_MVP_MATRIX_TRANSPOSE, 0); + pos = emit_transpose_matrix_transform_vec4(p->b, mvp, pos); } - else { - register_matrix_param5( p, STATE_MVP_MATRIX_TRANSPOSE, 0, 0, 3, - mvp ); - emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); - } -} + store_output_vec4(p, VARYING_SLOT_POS, pos); +} static GLuint material_attrib( GLuint side, GLuint property ) { @@ -852,21 +632,22 @@ static void set_material_flags( struct tnl_program *p ) } -static struct ureg get_material( struct tnl_program *p, GLuint side, - GLuint property ) +static nir_ssa_def * +get_material(struct tnl_program *p, GLuint side, + GLuint property) { GLuint attrib = material_attrib(side, property); if (p->color_materials & (1<materials & (1<materials & SCENE_COLOR_BITS(side)) { - struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); - struct ureg material_emission = get_material(p, side, STATE_EMISSION); - struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); - struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); - struct ureg tmp = make_temp(p, material_diffuse); - emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, - material_ambient, material_emission); - return tmp; + nir_ssa_def *lm_ambient = + load_state_vec4(p, STATE_LIGHTMODEL_AMBIENT, 0, 0, 0); + nir_ssa_def *material_emission = + get_material(p, side, STATE_EMISSION); + nir_ssa_def *material_ambient = + get_material(p, side, STATE_AMBIENT); + nir_ssa_def *material_diffuse = + get_material(p, side, STATE_DIFFUSE); + + // rgb: material_emission + material_ambient * lm_ambient + // alpha: material_diffuse.a + return nir_vector_insert_imm(p->b, nir_fmad(p->b, + lm_ambient, + material_ambient, + material_emission), + nir_channel(p->b, + material_diffuse, + 3), + 3); } else - return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); + return load_state_vec4(p, STATE_LIGHTMODEL_SCENECOLOR, side, 0, 0); } - -static struct ureg get_lightprod( struct tnl_program *p, GLuint light, - GLuint side, GLuint property, bool *is_state_light ) +static nir_ssa_def * +get_lightprod(struct tnl_program *p, GLuint light, + GLuint side, GLuint property, bool *is_state_light) { GLuint attrib = material_attrib(side, property); if (p->materials & (1<state->unit[i].light_spotcutoff_is_180) { - struct ureg spot_dir_norm = register_param2(p, STATE_LIGHT_SPOT_DIR_NORMALIZED, i); - struct ureg spot = get_temp(p); - struct ureg slt = get_temp(p); + nir_ssa_def *spot_dir_norm = + load_state_vec4(p, STATE_LIGHT_SPOT_DIR_NORMALIZED, i, 0, 0); + attenuation = + load_state_vec4(p, STATE_LIGHT, i, STATE_ATTENUATION, 0); - attenuation = register_param3(p, STATE_LIGHT, i, STATE_ATTENUATION); - att = get_temp(p); - - emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); - emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); - emit_op1(p, OPCODE_ABS, spot, 0, spot); - emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); - emit_op2(p, OPCODE_MUL, att, 0, slt, spot); - - release_temp(p, spot); - release_temp(p, slt); + nir_ssa_def *spot = nir_fdot3(p->b, nir_fneg(p->b, VPpli), + spot_dir_norm); + nir_ssa_def *slt = nir_slt(p->b, nir_channel(p->b, spot_dir_norm, 3), + spot); + spot = nir_fabs(p->b, spot); + spot = nir_fpow(p->b, spot, nir_channel(p->b, attenuation, 3)); + att = nir_fmul(p->b, slt, spot); } /* Calculate distance attenuation(See formula (2.4) at glspec 2.1 page 62): * * Skip the calucation when _dist_ is undefined(light_eyepos3_is_zero) */ - if (p->state->unit[i].light_attenuated && !is_undef(dist)) { - if (is_undef(att)) - att = get_temp(p); - - if (is_undef(attenuation)) - attenuation = register_param3(p, STATE_LIGHT, i, STATE_ATTENUATION); - - /* 1/d,d,d,1/d */ - emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); - /* 1,d,d*d,1/d */ - emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); - /* 1/dist-atten */ - emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); - - if (!p->state->unit[i].light_spotcutoff_is_180) { - /* dist-atten */ - emit_op1(p, OPCODE_RCP, dist, 0, dist); - /* spot-atten * dist-atten */ - emit_op2(p, OPCODE_MUL, att, 0, dist, att); - } - else { - /* dist-atten */ - emit_op1(p, OPCODE_RCP, att, 0, dist); + if (p->state->unit[i].light_attenuated && dist) { + if (!attenuation) { + attenuation = load_state_vec4(p, STATE_LIGHT, i, + STATE_ATTENUATION, 0); } + + /* 1, d, d*d */ + nir_ssa_def *tmp = nir_vec3(p->b, + nir_imm_float(p->b, 1.0f), + dist, + nir_fmul(p->b, dist, dist) + ); + tmp = nir_frcp(p->b, nir_fdot3(p->b, tmp, attenuation)); + + if (!p->state->unit[i].light_spotcutoff_is_180) + return nir_fmul(p->b, tmp, att); + return tmp; } return att; } +static nir_ssa_def * +emit_lit(nir_builder *b, + nir_ssa_def *src) +{ + nir_ssa_def *zero = nir_imm_zero(b, 1, 32); + nir_ssa_def *one = nir_imm_float(b, 1.0f); + nir_ssa_def *src_x = nir_channel(b, src, 0); + nir_ssa_def *src_y = nir_channel(b, src, 1); + nir_ssa_def *src_w = nir_channel(b, src, 3); + + nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, src_w, + nir_imm_float(b, 128.0f)), + nir_imm_float(b, -128.0f)); + nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src_y, zero), wclamp); + + return nir_vec4(b, + one, + nir_fmax(b, src_x, zero), + nir_bcsel(b, + nir_fge(b, zero, src_x), + zero, + pow), + one); +} /** * Compute: * lit.y = MAX(0, dots.x) * lit.z = SLT(0, dots.x) */ -static void emit_degenerate_lit( struct tnl_program *p, - struct ureg lit, - struct ureg dots ) +static nir_ssa_def * +emit_degenerate_lit(nir_builder *b, + nir_ssa_def *dots) { - struct ureg id = get_identity_param(p); /* id = {0,0,0,1} */ + nir_ssa_def *id = nir_imm_vec4(b, 0.0f, 0.0f, 0.0f, 1.0f); /* Note that lit.x & lit.w will not be examined. Note also that * dots.xyzw == dots.xxxx. */ - /* MAX lit, id, dots; - */ - emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots); - - /* result[2] = (in > 0 ? 1 : 0) - * SLT lit.z, id.z, dots; # lit.z = (0 < dots.z) ? 1 : 0 - */ - emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots); + nir_ssa_def *zero = nir_imm_zero(b, 1, 32); + nir_ssa_def *dots_x = nir_channel(b, dots, 0); + nir_ssa_def *tmp = nir_fmax(b, id, dots); + return nir_vector_insert_imm(b, tmp, nir_slt(b, zero, dots_x), 2); } @@ -1016,11 +817,11 @@ static void build_lighting( struct tnl_program *p ) const GLboolean twoside = p->state->light_twoside; const GLboolean separate = p->state->separate_specular; GLuint nr_lights = 0, count = 0; - struct ureg normal = get_transformed_normal(p); - struct ureg lit = get_temp(p); - struct ureg dots = get_temp(p); - struct ureg _col0 = undef, _col1 = undef; - struct ureg _bfc0 = undef, _bfc1 = undef; + nir_ssa_def *lit = NULL; + nir_ssa_def *dots = nir_imm_zero(p->b, 4, 32); + nir_ssa_def *normal = get_transformed_normal(p); + nir_ssa_def *_col0 = NULL, *_col1 = NULL; + nir_ssa_def *_bfc0 = NULL, *_bfc1 = NULL; GLuint i; /* @@ -1039,16 +840,14 @@ static void build_lighting( struct tnl_program *p ) { if (!p->state->material_shininess_is_zero) { - struct ureg shininess = get_material(p, 0, STATE_SHININESS); - emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); - release_temp(p, shininess); + nir_ssa_def *shininess = get_material(p, 0, STATE_SHININESS); + nir_ssa_def *tmp = nir_channel(p->b, shininess, 0); + dots = nir_vector_insert_imm(p->b, dots, tmp, 3); } - _col0 = make_temp(p, get_scenecolor(p, 0)); + _col0 = get_scenecolor(p, 0); if (separate) - _col1 = make_temp(p, get_identity_param(p)); - else - _col1 = _col0; + _col1 = nir_imm_vec4(p->b, 0.0f, 0.0f, 0.0f, 1.0f); } if (twoside) { @@ -1056,51 +855,38 @@ static void build_lighting( struct tnl_program *p ) /* Note that we negate the back-face specular exponent here. * The negation will be un-done later in the back-face code below. */ - struct ureg shininess = get_material(p, 1, STATE_SHININESS); - emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, - negate(swizzle1(shininess,X))); - release_temp(p, shininess); + nir_ssa_def *shininess = get_material(p, 1, STATE_SHININESS); + nir_ssa_def *tmp = nir_channel(p->b, shininess, 0); + tmp = nir_fneg(p->b, tmp); + dots = nir_vector_insert_imm(p->b, dots, tmp, 2); } - _bfc0 = make_temp(p, get_scenecolor(p, 1)); + _bfc0 = get_scenecolor(p, 1); if (separate) - _bfc1 = make_temp(p, get_identity_param(p)); - else - _bfc1 = _bfc0; + _bfc1 = nir_imm_vec4(p->b, 0.0f, 0.0f, 0.0f, 1.0f); } /* If no lights, still need to emit the scenecolor. */ - { - struct ureg res0 = register_output( p, VARYING_SLOT_COL0 ); - emit_op1(p, OPCODE_MOV, res0, 0, _col0); - } + store_output_vec4(p, VARYING_SLOT_COL0, _col0); - if (separate) { - struct ureg res1 = register_output( p, VARYING_SLOT_COL1 ); - emit_op1(p, OPCODE_MOV, res1, 0, _col1); - } + if (separate) + store_output_vec4(p, VARYING_SLOT_COL1, _col1); - if (twoside) { - struct ureg res0 = register_output( p, VARYING_SLOT_BFC0 ); - emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); - } + if (twoside) + store_output_vec4(p, VARYING_SLOT_BFC0, _bfc0); - if (twoside && separate) { - struct ureg res1 = register_output( p, VARYING_SLOT_BFC1 ); - emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); - } + if (twoside && separate) + store_output_vec4(p, VARYING_SLOT_BFC1, _bfc1); - if (nr_lights == 0) { - release_temps(p); + if (nr_lights == 0) return; - } /* Declare light products first to place them sequentially next to each * other for optimal constant uploads. */ - struct ureg lightprod_front[MAX_LIGHTS][3]; - struct ureg lightprod_back[MAX_LIGHTS][3]; + nir_ssa_def *lightprod_front[MAX_LIGHTS][3]; + nir_ssa_def *lightprod_back[MAX_LIGHTS][3]; bool lightprod_front_is_state_light[MAX_LIGHTS][3]; bool lightprod_back_is_state_light[MAX_LIGHTS][3]; @@ -1132,9 +918,12 @@ static void build_lighting( struct tnl_program *p ) for (i = 0; i < MAX_LIGHTS; i++) { if (p->state->unit[i].light_enabled) { if (p->state->unit[i].light_eyepos3_is_zero) - register_param2(p, STATE_LIGHT_POSITION_NORMALIZED, i); + register_state_var(p, STATE_LIGHT_POSITION_NORMALIZED, + i, 0, 0, + glsl_vector_type(GLSL_TYPE_FLOAT, 3)); else - register_param2(p, STATE_LIGHT_POSITION, i); + register_state_var(p, STATE_LIGHT_POSITION, i, 0, 0, + glsl_vec4_type()); } } for (i = 0; i < MAX_LIGHTS; i++) { @@ -1142,68 +931,69 @@ static void build_lighting( struct tnl_program *p ) (!p->state->unit[i].light_spotcutoff_is_180 || (p->state->unit[i].light_attenuated && !p->state->unit[i].light_eyepos3_is_zero))) - register_param3(p, STATE_LIGHT, i, STATE_ATTENUATION); + register_state_var(p, STATE_LIGHT, i, STATE_ATTENUATION, 0, + glsl_vec4_type()); } for (i = 0; i < MAX_LIGHTS; i++) { if (p->state->unit[i].light_enabled) { - struct ureg half = undef; - struct ureg att = undef, VPpli = undef; - struct ureg dist = undef; + nir_ssa_def *half = NULL; + nir_ssa_def *att = NULL, *VPpli = NULL; + nir_ssa_def *dist = NULL; count++; if (p->state->unit[i].light_eyepos3_is_zero) { - VPpli = register_param2(p, STATE_LIGHT_POSITION_NORMALIZED, i); + VPpli = load_state_var(p, STATE_LIGHT_POSITION_NORMALIZED, + i, 0, 0, + glsl_vector_type(GLSL_TYPE_FLOAT, 3)); } else { - struct ureg Ppli = register_param2(p, STATE_LIGHT_POSITION, i); - struct ureg V = get_eye_position(p); + nir_ssa_def *Ppli = + load_state_vec4(p, STATE_LIGHT_POSITION, i, 0, 0); - VPpli = get_temp(p); - dist = get_temp(p); - - /* Calculate VPpli vector - */ - emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); + nir_ssa_def *V = get_eye_position(p); + VPpli = nir_fsub(p->b, Ppli, V); /* Normalize VPpli. The dist value also used in * attenuation below. */ - emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); - emit_op1(p, OPCODE_RSQ, dist, 0, dist); - emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); + dist = nir_frsq(p->b, nir_fdot3(p->b, VPpli, VPpli)); + VPpli = nir_fmul(p->b, VPpli, dist); } /* Calculate attenuation: */ att = calculate_light_attenuation(p, i, VPpli, dist); - release_temp(p, dist); /* Calculate viewer direction, or use infinite viewer: */ if (!p->state->material_shininess_is_zero) { if (p->state->light_local_viewer) { - struct ureg eye_hat = get_eye_position_normalized(p); - half = get_temp(p); - emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); - emit_normalize_vec3(p, half, half); + nir_ssa_def *eye_hat = get_eye_position_normalized(p); + half = emit_normalize_vec3(p->b, + nir_fsub(p->b, VPpli, eye_hat)); } else if (p->state->unit[i].light_eyepos3_is_zero) { - half = register_param2(p, STATE_LIGHT_HALF_VECTOR, i); + half = + load_state_var(p, STATE_LIGHT_HALF_VECTOR, + i, 0, 0, + glsl_vector_type(GLSL_TYPE_FLOAT, 3)); } else { - struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); - half = get_temp(p); - emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); - emit_normalize_vec3(p, half, half); + nir_ssa_def *tmp = + nir_fadd(p->b, + VPpli, + nir_imm_vec3(p->b, 0.0f, 0.0f, 1.0f)); + half = emit_normalize_vec3(p->b, tmp); } } /* Calculate dot products: */ + nir_ssa_def *dot = nir_fdot3(p->b, normal, VPpli); if (p->state->material_shininess_is_zero) { - emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli); - } - else { - emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); - emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); + dots = nir_vec4(p->b, dot, dot, dot, dot); + } else { + dots = nir_vector_insert_imm(p->b, dots, dot, 0); + dot = nir_fdot3(p->b, normal, half); + dots = nir_vector_insert_imm(p->b, dots, dot, 1); } /* Front face lighting: @@ -1214,215 +1004,156 @@ static void build_lighting( struct tnl_program *p ) */ for (int j = 0; j < 3; j++) { if (lightprod_front_is_state_light[i][j]) { - struct ureg material_value = get_material(p, 0, STATE_AMBIENT + j); - struct ureg tmp = get_temp(p); - emit_op2(p, OPCODE_MUL, tmp, 0, lightprod_front[i][j], material_value); - lightprod_front[i][j] = tmp; + nir_ssa_def *material = + get_material(p, 0, STATE_AMBIENT + j); + lightprod_front[i][j] = + nir_fmul(p->b, lightprod_front[i][j], material); } } - struct ureg ambient = lightprod_front[i][0]; - struct ureg diffuse = lightprod_front[i][1]; - struct ureg specular = lightprod_front[i][2]; - struct ureg res0, res1; - GLuint mask0, mask1; + nir_ssa_def *ambient = lightprod_front[i][0]; + nir_ssa_def *diffuse = lightprod_front[i][1]; + nir_ssa_def *specular = lightprod_front[i][2]; - if (count == nr_lights) { - if (separate) { - mask0 = WRITEMASK_XYZ; - mask1 = WRITEMASK_XYZ; - res0 = register_output( p, VARYING_SLOT_COL0 ); - res1 = register_output( p, VARYING_SLOT_COL1 ); - } - else { - mask0 = 0; - mask1 = WRITEMASK_XYZ; - res0 = _col0; - res1 = register_output( p, VARYING_SLOT_COL0 ); - } - } - else { - mask0 = 0; - mask1 = 0; - res0 = _col0; - res1 = _col1; - } - - if (!is_undef(att)) { + if (att) { /* light is attenuated by distance */ - emit_op1(p, OPCODE_LIT, lit, 0, dots); - emit_op2(p, OPCODE_MUL, lit, 0, lit, att); - emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); - } - else if (!p->state->material_shininess_is_zero) { + lit = emit_lit(p->b, dots); + lit = nir_fmul(p->b, lit, att); + _col0 = nir_fmad(p->b, nir_channel(p->b, lit, 0), ambient, _col0); + } else if (!p->state->material_shininess_is_zero) { /* there's a non-zero specular term */ - emit_op1(p, OPCODE_LIT, lit, 0, dots); - emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); - } - else { + lit = emit_lit(p->b, dots); + _col0 = nir_fadd(p->b, ambient, _col0); + } else { /* no attenutation, no specular */ - emit_degenerate_lit(p, lit, dots); - emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); + lit = emit_degenerate_lit(p->b, dots); + _col0 = nir_fadd(p->b, ambient, _col0); } - emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); - emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); - - release_temp(p, ambient); - release_temp(p, diffuse); - release_temp(p, specular); + _col0 = nir_fmad(p->b, nir_channel(p->b, lit, 1), + diffuse, _col0); + if (separate) + _col1 = nir_fmad(p->b, nir_channel(p->b, lit, 2), + specular, _col1); + else + _col0 = nir_fmad(p->b, nir_channel(p->b, lit, 2), + specular, _col0); } - /* Back face lighting: */ + nir_ssa_def *old_dots = dots; if (twoside) { /* Transform STATE_LIGHT into STATE_LIGHTPROD if needed. This isn't done in * get_lightprod to avoid using too many temps. */ for (int j = 0; j < 3; j++) { if (lightprod_back_is_state_light[i][j]) { - struct ureg material_value = get_material(p, 1, STATE_AMBIENT + j); - struct ureg tmp = get_temp(p); - emit_op2(p, OPCODE_MUL, tmp, 1, lightprod_back[i][j], material_value); - lightprod_back[i][j] = tmp; + nir_ssa_def *material = + get_material(p, 1, STATE_AMBIENT + j); + lightprod_back[i][j] = + nir_fmul(p->b, lightprod_back[i][j], material); } } - struct ureg ambient = lightprod_back[i][0]; - struct ureg diffuse = lightprod_back[i][1]; - struct ureg specular = lightprod_back[i][2]; - struct ureg res0, res1; - GLuint mask0, mask1; - - if (count == nr_lights) { - if (separate) { - mask0 = WRITEMASK_XYZ; - mask1 = WRITEMASK_XYZ; - res0 = register_output( p, VARYING_SLOT_BFC0 ); - res1 = register_output( p, VARYING_SLOT_BFC1 ); - } - else { - mask0 = 0; - mask1 = WRITEMASK_XYZ; - res0 = _bfc0; - res1 = register_output( p, VARYING_SLOT_BFC0 ); - } - } - else { - res0 = _bfc0; - res1 = _bfc1; - mask0 = 0; - mask1 = 0; - } + nir_ssa_def *ambient = lightprod_back[i][0]; + nir_ssa_def *diffuse = lightprod_back[i][1]; + nir_ssa_def *specular = lightprod_back[i][2]; /* For the back face we need to negate the X and Y component * dot products. dots.Z has the negated back-face specular * exponent. We swizzle that into the W position. This * negation makes the back-face specular term positive again. */ - dots = negate(swizzle(dots,X,Y,W,Z)); + unsigned swiz_xywz[] = {0, 1, 3, 2}; + nir_ssa_def *dots = + nir_fneg(p->b, nir_swizzle(p->b, old_dots, swiz_xywz, 4)); - if (!is_undef(att)) { - emit_op1(p, OPCODE_LIT, lit, 0, dots); - emit_op2(p, OPCODE_MUL, lit, 0, lit, att); - emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); - } - else if (!p->state->material_shininess_is_zero) { - emit_op1(p, OPCODE_LIT, lit, 0, dots); - emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/ - } - else { - emit_degenerate_lit(p, lit, dots); - emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); + if (att) { + /* light is attenuated by distance */ + lit = emit_lit(p->b, dots); + lit = nir_fmul(p->b, lit, att); + _bfc0 = nir_fmad(p->b, nir_channel(p->b, lit, 0), ambient, _bfc0); + } else if (!p->state->material_shininess_is_zero) { + /* there's a non-zero specular term */ + lit = emit_lit(p->b, dots); + _bfc0 = nir_fadd(p->b, ambient, _bfc0); + } else { + /* no attenutation, no specular */ + lit = emit_degenerate_lit(p->b, dots); + _bfc0 = nir_fadd(p->b, ambient, _bfc0); } - emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); - emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); - /* restore dots to its original state for subsequent lights - * by negating and swizzling again. - */ - dots = negate(swizzle(dots,X,Y,W,Z)); - - release_temp(p, ambient); - release_temp(p, diffuse); - release_temp(p, specular); - } - - release_temp(p, half); - release_temp(p, VPpli); - release_temp(p, att); + _bfc0 = nir_fmad(p->b, nir_channel(p->b, lit, 1), + diffuse, _bfc0); + if (separate) + _bfc1 = nir_fmad(p->b, nir_channel(p->b, lit, 2), + specular, _bfc1); + else + _bfc0 = nir_fmad(p->b, nir_channel(p->b, lit, 2), + specular, _bfc0); + } } } - release_temps( p ); + store_output_vec4_masked(p, VARYING_SLOT_COL0, _col0, 0x7); + if (separate) + store_output_vec4_masked(p, VARYING_SLOT_COL1, _col1, 0x7); + + if (twoside) { + store_output_vec4_masked(p, VARYING_SLOT_BFC0, _bfc0, 0x7); + if (separate) + store_output_vec4_masked(p, VARYING_SLOT_BFC1, _bfc1, 0x7); + } } static void build_fog( struct tnl_program *p ) { - struct ureg fog = register_output(p, VARYING_SLOT_FOGC); - struct ureg input; - + nir_ssa_def *fog; switch (p->state->fog_distance_mode) { - case FDM_EYE_RADIAL: { /* Z = sqrt(Xe*Xe + Ye*Ye + Ze*Ze) */ - struct ureg tmp = get_temp(p); - input = get_eye_position(p); - emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, input, input); - emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); - emit_op1(p, OPCODE_RCP, fog, WRITEMASK_X, tmp); + case FDM_EYE_RADIAL: + /* Z = sqrt(Xe*Xe + Ye*Ye + Ze*Ze) */ + fog = nir_fast_length(p->b, nir_channels(p->b, + get_eye_position(p), + 0x7)); break; - } case FDM_EYE_PLANE: /* Z = Ze */ - input = get_eye_position_z(p); - emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input); + fog = get_eye_position_z(p); break; case FDM_EYE_PLANE_ABS: /* Z = abs(Ze) */ - input = get_eye_position_z(p); - emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); + fog = nir_fabs(p->b, get_eye_position_z(p)); break; case FDM_FROM_ARRAY: - input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); - emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); + fog = load_input(p, VERT_ATTRIB_FOG, glsl_float_type()); break; default: assert(!"Bad fog mode in build_fog()"); break; } - emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p)); + store_output_float(p, VARYING_SLOT_FOGC, fog); } -static void build_reflect_texgen( struct tnl_program *p, - struct ureg dest, - GLuint writemask ) +static nir_ssa_def * +build_reflect_texgen(struct tnl_program *p) { - struct ureg normal = get_transformed_normal(p); - struct ureg eye_hat = get_eye_position_normalized(p); - struct ureg tmp = get_temp(p); - + nir_ssa_def *normal = get_transformed_normal(p); + nir_ssa_def *eye_hat = get_eye_position_normalized(p); /* n.u */ - emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); + nir_ssa_def *tmp = nir_fdot3(p->b, normal, eye_hat); /* 2n.u */ - emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); + tmp = nir_fadd(p->b, tmp, tmp); /* (-2n.u)n + u */ - emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); - - release_temp(p, tmp); + return nir_fmad(p->b, nir_fneg(p->b, tmp), normal, eye_hat); } -static void build_sphere_texgen( struct tnl_program *p, - struct ureg dest, - GLuint writemask ) +static nir_ssa_def * +build_sphere_texgen(struct tnl_program *p) { - struct ureg normal = get_transformed_normal(p); - struct ureg eye_hat = get_eye_position_normalized(p); - struct ureg tmp = get_temp(p); - struct ureg half = register_scalar_const(p, .5); - struct ureg r = get_temp(p); - struct ureg inv_m = get_temp(p); - struct ureg id = get_identity_param(p); + nir_ssa_def *normal = get_transformed_normal(p); + nir_ssa_def *eye_hat = get_eye_position_normalized(p); /* Could share the above calculations, but it would be * a fairly odd state for someone to set (both sphere and @@ -1433,28 +1164,23 @@ static void build_sphere_texgen( struct tnl_program *p, */ /* n.u */ - emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); + nir_ssa_def *tmp = nir_fdot3(p->b, normal, eye_hat); /* 2n.u */ - emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); + tmp = nir_fadd(p->b, tmp, tmp); /* (-2n.u)n + u */ - emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); + nir_ssa_def *r = nir_fmad(p->b, nir_fneg(p->b, tmp), normal, eye_hat); /* r + 0,0,1 */ - emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); + tmp = nir_fadd(p->b, r, nir_imm_vec4(p->b, 0.0f, 0.0f, 1.0f, 0.0f)); /* rx^2 + ry^2 + (rz+1)^2 */ - emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); + tmp = nir_fdot3(p->b, tmp, tmp); /* 2/m */ - emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); + tmp = nir_frsq(p->b, tmp); /* 1/m */ - emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); + nir_ssa_def *inv_m = nir_fmul_imm(p->b, tmp, 0.5f); /* r/m + 1/2 */ - emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); - - release_temp(p, tmp); - release_temp(p, r); - release_temp(p, inv_m); + return nir_fmad(p->b, r, inv_m, nir_imm_float(p->b, 0.5f)); } - static void build_texture_transform( struct tnl_program *p ) { GLuint i, j; @@ -1467,107 +1193,100 @@ static void build_texture_transform( struct tnl_program *p ) if (p->state->unit[i].coord_replace) continue; - if (p->state->unit[i].texgen_enabled || - p->state->unit[i].texmat_enabled) { + nir_ssa_def *texcoord; + if (p->state->unit[i].texgen_enabled) { + GLuint copy_mask = 0; + GLuint sphere_mask = 0; + GLuint reflect_mask = 0; + GLuint normal_mask = 0; + GLuint modes[4]; + nir_ssa_def *comps[4]; - GLuint texmat_enabled = p->state->unit[i].texmat_enabled; - struct ureg out = register_output(p, VARYING_SLOT_TEX0 + i); - struct ureg out_texgen = undef; + modes[0] = p->state->unit[i].texgen_mode0; + modes[1] = p->state->unit[i].texgen_mode1; + modes[2] = p->state->unit[i].texgen_mode2; + modes[3] = p->state->unit[i].texgen_mode3; - if (p->state->unit[i].texgen_enabled) { - GLuint copy_mask = 0; - GLuint sphere_mask = 0; - GLuint reflect_mask = 0; - GLuint normal_mask = 0; - GLuint modes[4]; - - if (texmat_enabled) - out_texgen = get_temp(p); - else - out_texgen = out; - - modes[0] = p->state->unit[i].texgen_mode0; - modes[1] = p->state->unit[i].texgen_mode1; - modes[2] = p->state->unit[i].texgen_mode2; - modes[3] = p->state->unit[i].texgen_mode3; - - for (j = 0; j < 4; j++) { - switch (modes[j]) { - case TXG_OBJ_LINEAR: { - struct ureg obj = register_input(p, VERT_ATTRIB_POS); - struct ureg plane = - register_param3(p, STATE_TEXGEN, i, - STATE_TEXGEN_OBJECT_S + j); - - emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, - obj, plane ); - break; - } - case TXG_EYE_LINEAR: { - struct ureg eye = get_eye_position(p); - struct ureg plane = - register_param3(p, STATE_TEXGEN, i, - STATE_TEXGEN_EYE_S + j); - - emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, - eye, plane ); - break; - } - case TXG_SPHERE_MAP: - sphere_mask |= WRITEMASK_X << j; - break; - case TXG_REFLECTION_MAP: - reflect_mask |= WRITEMASK_X << j; - break; - case TXG_NORMAL_MAP: - normal_mask |= WRITEMASK_X << j; - break; - case TXG_NONE: - copy_mask |= WRITEMASK_X << j; - } + for (j = 0; j < 4; j++) { + switch (modes[j]) { + case TXG_OBJ_LINEAR: { + nir_ssa_def *obj = load_input_vec4(p, VERT_ATTRIB_POS); + nir_ssa_def *plane = + load_state_vec4(p, STATE_TEXGEN, i, + STATE_TEXGEN_OBJECT_S + j, 0); + comps[j] = nir_fdot4(p->b, obj, plane); + break; } - - if (sphere_mask) { - build_sphere_texgen(p, out_texgen, sphere_mask); + case TXG_EYE_LINEAR: { + nir_ssa_def *eye = get_eye_position(p); + nir_ssa_def *plane = + load_state_vec4(p, STATE_TEXGEN, i, + STATE_TEXGEN_EYE_S + j, 0); + comps[j] = nir_fdot4(p->b, eye, plane); + break; } - - if (reflect_mask) { - build_reflect_texgen(p, out_texgen, reflect_mask); - } - - if (normal_mask) { - struct ureg normal = get_transformed_normal(p); - emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); - } - - if (copy_mask) { - struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); - emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); + case TXG_SPHERE_MAP: + sphere_mask |= 1u << j; + break; + case TXG_REFLECTION_MAP: + reflect_mask |= 1u << j; + break; + case TXG_NORMAL_MAP: + normal_mask |= 1u << j; + break; + case TXG_NONE: + copy_mask |= 1u << j; } } - if (texmat_enabled) { - struct ureg texmat[4]; - struct ureg in = (!is_undef(out_texgen) ? - out_texgen : - register_input(p, VERT_ATTRIB_TEX0+i)); - if (p->mvp_with_dp4) { - register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, - texmat ); - emit_matrix_transform_vec4( p, out, texmat, in ); - } - else { - register_matrix_param5( p, STATE_TEXTURE_MATRIX_TRANSPOSE, i, 0, 3, - texmat ); - emit_transpose_matrix_transform_vec4( p, out, texmat, in ); - } + if (sphere_mask) { + nir_ssa_def *sphere = build_sphere_texgen(p); + for (j = 0; j < 4; j++) + if (sphere_mask & (1 << j)) + comps[j] = nir_channel(p->b, sphere, j); } - release_temps(p); - } - else { - emit_passthrough(p, VERT_ATTRIB_TEX0+i, VARYING_SLOT_TEX0+i); + if (reflect_mask) { + nir_ssa_def *reflect = build_reflect_texgen(p); + for (j = 0; j < 4; j++) + if (reflect_mask & (1 << j)) + comps[j] = nir_channel(p->b, reflect, j); + } + + if (normal_mask) { + nir_ssa_def *normal = get_transformed_normal(p); + for (j = 0; j < 4; j++) + if (normal_mask & (1 << j)) + comps[j] = nir_channel(p->b, normal, j); + } + + if (copy_mask) { + nir_ssa_def *in = load_input_vec4(p, VERT_ATTRIB_TEX0 + i); + for (j = 0; j < 4; j++) + if (copy_mask & (1 << j)) + comps[j] = nir_channel(p->b, in, j); + } + + texcoord = nir_vec(p->b, comps, 4); + } else + texcoord = load_input_vec4(p, VERT_ATTRIB_TEX0 + i); + + if (p->state->unit[i].texmat_enabled) { + nir_ssa_def *texmat[4]; + if (p->mvp_with_dp4) { + load_state_mat4(p, texmat, STATE_TEXTURE_MATRIX, i); + texcoord = + emit_matrix_transform_vec4(p->b, texmat, texcoord); + } else { + load_state_mat4(p, texmat, + STATE_TEXTURE_MATRIX_TRANSPOSE, i); + texcoord = + emit_transpose_matrix_transform_vec4(p->b, texmat, + texcoord); + } } + + store_output_vec4(p, VARYING_SLOT_TEX0 + i, texcoord); } } @@ -1577,36 +1296,36 @@ static void build_texture_transform( struct tnl_program *p ) */ static void build_atten_pointsize( struct tnl_program *p ) { - struct ureg eye = get_eye_position_z(p); - struct ureg state_size = register_param1(p, STATE_POINT_SIZE_CLAMPED); - struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); - struct ureg out = register_output(p, VARYING_SLOT_PSIZ); - struct ureg ut = get_temp(p); + nir_ssa_def *eye = get_eye_position_z(p); + nir_ssa_def *in_size = + load_state_vec4(p, STATE_POINT_SIZE_CLAMPED, 0, 0, 0); + nir_ssa_def *att = + load_state_vec4(p, STATE_POINT_ATTENUATION, 0, 0, 0); /* dist = |eyez| */ - emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z)); + nir_ssa_def *dist = nir_fabs(p->b, eye); + /* p1 + dist * (p2 + dist * p3); */ - emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), - swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y)); - emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), - ut, swizzle1(state_attenuation, X)); + nir_ssa_def *factor = nir_fmad(p->b, dist, nir_channel(p->b, att, 2), + nir_channel(p->b, att, 1)); + factor = nir_fmad(p->b, dist, factor, nir_channel(p->b, att, 0)); /* 1 / sqrt(factor) */ - emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); + factor = nir_frsq(p->b, factor); -#if 0 - /* out = pointSize / sqrt(factor) */ - emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); -#else + /* pointSize / sqrt(factor) */ + nir_ssa_def *size = nir_fmul(p->b, factor, + nir_channel(p->b, in_size, 0)); + +#if 1 /* this is a good place to clamp the point size since there's likely * no hardware registers to clamp point size at rasterization time. */ - emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); - emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); - emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z)); + size = nir_fclamp(p->b, size, nir_channel(p->b, in_size, 1), + nir_channel(p->b, in_size, 2)); #endif - release_temp(p, ut); + store_output_float(p, VARYING_SLOT_PSIZ, size); } @@ -1615,9 +1334,9 @@ static void build_atten_pointsize( struct tnl_program *p ) */ static void build_array_pointsize( struct tnl_program *p ) { - struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE); - struct ureg out = register_output(p, VARYING_SLOT_PSIZ); - emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in); + nir_ssa_def *val = load_input(p, VERT_ATTRIB_POINT_SIZE, + glsl_float_type()); + store_output_float(p, VARYING_SLOT_PSIZ, val); } @@ -1629,7 +1348,8 @@ static void build_tnl_program( struct tnl_program *p ) /* Lighting calculations: */ - if (p->state->fragprog_inputs_read & (VARYING_BIT_COL0|VARYING_BIT_COL1)) { + if (p->state->fragprog_inputs_read & + (VARYING_BIT_COL0 | VARYING_BIT_COL1)) { if (p->state->light_global_enabled) build_lighting(p); else { @@ -1653,58 +1373,45 @@ static void build_tnl_program( struct tnl_program *p ) build_array_pointsize(p); if (p->state->varying_vp_inputs & VERT_BIT_SELECT_RESULT_OFFSET) - emit_passthrough(p, VERT_ATTRIB_SELECT_RESULT_OFFSET, VARYING_SLOT_VAR0); - - /* Finish up: - */ - emit_op1(p, OPCODE_END, undef, 0, undef); + emit_passthrough(p, VERT_ATTRIB_SELECT_RESULT_OFFSET, + VARYING_SLOT_VAR0); } -static void +static nir_shader * create_new_program( const struct state_key *key, struct gl_program *program, GLboolean mvp_with_dp4, - GLuint max_temps) + const nir_shader_compiler_options *options) { struct tnl_program p; memset(&p, 0, sizeof(p)); p.state = key; - p.program = program; - p.eye_position = undef; - p.eye_position_z = undef; - p.eye_position_normalized = undef; - p.transformed_normal = undef; - p.identity = undef; - p.temp_in_use = 0; p.mvp_with_dp4 = mvp_with_dp4; - if (max_temps >= sizeof(int) * 8) - p.temp_reserved = 0; - else - p.temp_reserved = ~((1<arb.Instructions = - rzalloc_array(program, struct prog_instruction, p.max_inst); - p.program->String = NULL; - p.program->arb.NumInstructions = - p.program->arb.NumTemporaries = - p.program->arb.NumParameters = - p.program->arb.NumAttributes = p.program->arb.NumAddressRegs = 0; - p.program->Parameters = _mesa_new_parameter_list(); - p.program->info.inputs_read = 0; - p.program->info.outputs_written = 0; + program->Parameters = _mesa_new_parameter_list(); p.state_params = _mesa_new_parameter_list(); + nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, + options, + "ff-vs"); + + nir_shader *s = b.shader; + + s->info.separate_shader = true; + s->info.use_legacy_math_rules = true; + + p.b = &b; + build_tnl_program( &p ); - _mesa_add_separate_state_parameters(p.program, p.state_params); + nir_validate_shader(b.shader, "after generating ff-vertex shader"); + + _mesa_add_separate_state_parameters(program, p.state_params); _mesa_free_parameter_list(p.state_params); + + return s; } @@ -1735,13 +1442,20 @@ _mesa_get_fixed_func_vertex_program(struct gl_context *ctx) if (0) printf("Build new TNL program\n"); - prog = ctx->Driver.NewProgram(ctx, MESA_SHADER_VERTEX, 0, true); + prog = ctx->Driver.NewProgram(ctx, MESA_SHADER_VERTEX, 0, false); if (!prog) return NULL; - create_new_program( &key, prog, - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS, - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTemps ); + const struct nir_shader_compiler_options *options = + st_get_nir_compiler_options(ctx->st, MESA_SHADER_VERTEX); + + nir_shader *s = + create_new_program( &key, prog, + ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS, + options); + + prog->state.type = PIPE_SHADER_IR_NIR; + prog->nir = s; st_program_string_notify(ctx, GL_VERTEX_PROGRAM_ARB, prog);