diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 18af1d90667..7664ecf8f3a 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -89,9 +89,9 @@ tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader, */ for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) { - unsigned num_verts_per_prim = machine->Primitives[prim_idx]; + unsigned num_verts_per_prim = machine->Primitives[prim_idx][0]; shader->primitive_lengths[prim_idx + shader->emitted_primitives] = - machine->Primitives[prim_idx]; + machine->Primitives[prim_idx][0]; shader->emitted_vertices += num_verts_per_prim; for (j = 0; j < num_verts_per_prim; j++, current_idx++) { int idx = current_idx * shader->info.num_outputs; @@ -696,7 +696,7 @@ draw_gs_init( struct draw_context *draw ) if (!draw->gs.tgsi.machine) return FALSE; - draw->gs.tgsi.machine->Primitives = align_malloc( + draw->gs.tgsi.machine->Primitives[0] = align_malloc( MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); if (!draw->gs.tgsi.machine->Primitives) return FALSE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index a6a4d556abf..fc8f9d2d612 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -948,7 +948,22 @@ enum tgsi_exec_datatype { #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_PRIMITIVE_S1_I TGSI_EXEC_TEMP_PRIMITIVE_S1_I +#define TEMP_PRIMITIVE_S1_C TGSI_EXEC_TEMP_PRIMITIVE_S1_C +#define TEMP_PRIMITIVE_S2_I TGSI_EXEC_TEMP_PRIMITIVE_S2_I +#define TEMP_PRIMITIVE_S2_C TGSI_EXEC_TEMP_PRIMITIVE_S2_C +#define TEMP_PRIMITIVE_S3_I TGSI_EXEC_TEMP_PRIMITIVE_S3_I +#define TEMP_PRIMITIVE_S3_C TGSI_EXEC_TEMP_PRIMITIVE_S3_C +static const struct { + int idx; + int chan; +} temp_prim_idxs[] = { + { TEMP_PRIMITIVE_I, TEMP_PRIMITIVE_C }, + { TEMP_PRIMITIVE_S1_I, TEMP_PRIMITIVE_S1_C }, + { TEMP_PRIMITIVE_S2_I, TEMP_PRIMITIVE_S2_C }, + { TEMP_PRIMITIVE_S3_I, TEMP_PRIMITIVE_S3_C }, +}; /** The execution mask depends on the conditional mask and the loop mask */ #define UPDATE_EXEC_MASK(MACH) \ @@ -2055,35 +2070,52 @@ exec_kill(struct tgsi_exec_machine *mach) } static void -emit_vertex(struct tgsi_exec_machine *mach) +emit_vertex(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { + union tgsi_exec_channel r[1]; + unsigned stream_id; + unsigned *prim_count; /* FIXME: check for exec mask correctly unsigned i; for (i = 0; i < TGSI_QUAD_SIZE; ++i) { if ((mach->ExecMask & (1 << i))) */ + IFETCH(&r[0], 0, TGSI_CHAN_X); + stream_id = r[0].u[0]; + prim_count = &mach->Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0]; if (mach->ExecMask) { - if (mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] >= mach->MaxOutputVertices) + if (mach->Primitives[stream_id][*prim_count] >= mach->MaxOutputVertices) return; + if (mach->Primitives[stream_id][*prim_count] == 0) + mach->PrimitiveOffsets[stream_id][*prim_count] = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]; mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + mach->Primitives[stream_id][*prim_count]++; } } static void -emit_primitive(struct tgsi_exec_machine *mach) +emit_primitive(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; + unsigned *prim_count; + union tgsi_exec_channel r[1]; + unsigned stream_id = 0; /* FIXME: check for exec mask correctly unsigned i; for (i = 0; i < TGSI_QUAD_SIZE; ++i) { if ((mach->ExecMask & (1 << i))) */ + if (inst) { + IFETCH(&r[0], 0, TGSI_CHAN_X); + stream_id = r[0].u[0]; + } + prim_count = &mach->Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0]; if (mach->ExecMask) { ++(*prim_count); debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); - mach->Primitives[*prim_count] = 0; + mach->Primitives[stream_id][*prim_count] = 0; } } @@ -2092,9 +2124,9 @@ conditional_emit_primitive(struct tgsi_exec_machine *mach) { if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { int emitted_verts = - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; + mach->Primitives[0][mach->Temps[temp_prim_idxs[0].idx].xyzw[temp_prim_idxs[0].chan].u[0]]; if (emitted_verts) { - emit_primitive(mach); + emit_primitive(mach, NULL); } } } @@ -5529,11 +5561,11 @@ exec_instruction( break; case TGSI_OPCODE_EMIT: - emit_vertex(mach); + emit_vertex(mach, inst); break; case TGSI_OPCODE_ENDPRIM: - emit_primitive(mach); + emit_primitive(mach, inst); break; case TGSI_OPCODE_BGNLOOP: @@ -6119,8 +6151,10 @@ tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach) mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; - mach->Primitives[0] = 0; + for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) { + mach->Temps[temp_prim_idxs[i].idx].xyzw[temp_prim_idxs[i].chan].u[0] = 0; + mach->Primitives[i][0] = 0; + } /* GS runs on a single primitive for now */ default_mask = 0x1; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 6d4ac381421..a11b79c6553 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -281,7 +281,14 @@ struct tgsi_sampler #define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8) #define TGSI_EXEC_NUM_ADDRS 3 -#define TGSI_EXEC_NUM_TEMP_EXTRAS 12 +#define TGSI_EXEC_TEMP_PRIMITIVE_S1_I (TGSI_EXEC_NUM_TEMPS + 11) +#define TGSI_EXEC_TEMP_PRIMITIVE_S1_C 0 +#define TGSI_EXEC_TEMP_PRIMITIVE_S2_I (TGSI_EXEC_NUM_TEMPS + 12) +#define TGSI_EXEC_TEMP_PRIMITIVE_S2_C 1 +#define TGSI_EXEC_TEMP_PRIMITIVE_S3_I (TGSI_EXEC_NUM_TEMPS + 13) +#define TGSI_EXEC_TEMP_PRIMITIVE_S3_C 2 + +#define TGSI_EXEC_NUM_TEMP_EXTRAS 14 @@ -312,6 +319,8 @@ struct tgsi_sampler #define TGSI_MAX_MISC_INPUTS 8 +#define TGSI_MAX_VERTEX_STREAMS 4 + /** function call/activation record */ struct tgsi_call_record { @@ -377,7 +386,8 @@ struct tgsi_exec_machine enum pipe_shader_type ShaderType; /**< PIPE_SHADER_x */ /* GEOMETRY processor only. */ - unsigned *Primitives; + unsigned *Primitives[TGSI_MAX_VERTEX_STREAMS]; + unsigned *PrimitiveOffsets[TGSI_MAX_VERTEX_STREAMS]; unsigned NumOutputs; unsigned MaxGeometryShaderOutputs; unsigned MaxOutputVertices;