i965: Convert WM_STATE to genxml on gen4-5.

The code doesn't get exactly a lot simpler but at least it is in a single
place, and we delete more than we add.

Another good point is that you get rid of struct brw_wm_unit_state
which was a third mechanism for encoding GEN state. We used to have
GENXML, manual packing and these bitfield structs. Now we're down to
just GENXML and some manual packing. (Khristian)

Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Rafael Antognolli 2017-06-21 11:13:48 -07:00 committed by Kenneth Graunke
parent e490382326
commit d883ec0400
6 changed files with 153 additions and 437 deletions

View file

@ -60,7 +60,6 @@ i965_FILES = \
brw_vs_surface_state.c \
brw_wm.c \
brw_wm.h \
brw_wm_state.c \
brw_wm_surface_state.c \
gen4_blorp_exec.h \
gen6_clip_state.c \

View file

@ -83,7 +83,6 @@ extern const struct brw_tracked_state brw_wm_image_surfaces;
extern const struct brw_tracked_state brw_cs_ubo_surfaces;
extern const struct brw_tracked_state brw_cs_abo_surfaces;
extern const struct brw_tracked_state brw_cs_image_surfaces;
extern const struct brw_tracked_state brw_wm_unit;
extern const struct brw_tracked_state brw_psp_urb_cbs;

View file

@ -65,127 +65,6 @@ struct brw_urb_fence
} bits1;
};
/* State structs for the various fixed function units:
*/
struct thread0
{
unsigned pad0:1;
unsigned grf_reg_count:3;
unsigned pad1:2;
unsigned kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
};
struct thread1
{
unsigned ext_halt_exception_enable:1;
unsigned sw_exception_enable:1;
unsigned mask_stack_exception_enable:1;
unsigned timeout_exception_enable:1;
unsigned illegal_op_exception_enable:1;
unsigned pad0:3;
unsigned depth_coef_urb_read_offset:6; /* WM only */
unsigned pad1:2;
unsigned floating_point_mode:1;
unsigned thread_priority:1;
unsigned binding_table_entry_count:8;
unsigned pad3:5;
unsigned single_program_flow:1;
};
struct thread2
{
unsigned per_thread_scratch_space:4;
unsigned pad0:6;
unsigned scratch_space_base_pointer:22;
};
struct thread3
{
unsigned dispatch_grf_start_reg:4;
unsigned urb_entry_read_offset:6;
unsigned pad0:1;
unsigned urb_entry_read_length:6;
unsigned pad1:1;
unsigned const_urb_entry_read_offset:6;
unsigned pad2:1;
unsigned const_urb_entry_read_length:6;
unsigned pad3:1;
};
struct brw_wm_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
struct {
unsigned stats_enable:1;
unsigned depth_buffer_clear:1;
unsigned sampler_count:3;
unsigned sampler_state_pointer:27;
} wm4;
struct
{
unsigned enable_8_pix:1;
unsigned enable_16_pix:1;
unsigned enable_32_pix:1;
unsigned enable_con_32_pix:1;
unsigned enable_con_64_pix:1;
unsigned pad0:1;
/* These next four bits are for Ironlake+ */
unsigned fast_span_coverage_enable:1;
unsigned depth_buffer_clear:1;
unsigned depth_buffer_resolve_enable:1;
unsigned hierarchical_depth_buffer_resolve_enable:1;
unsigned legacy_global_depth_bias:1;
unsigned line_stipple:1;
unsigned depth_offset:1;
unsigned polygon_stipple:1;
unsigned line_aa_region_width:2;
unsigned line_endcap_aa_region_width:2;
unsigned early_depth_test:1;
unsigned thread_dispatch_enable:1;
unsigned program_uses_depth:1;
unsigned program_computes_depth:1;
unsigned program_uses_killpixel:1;
unsigned legacy_line_rast: 1;
unsigned transposed_urb_read_enable:1;
unsigned max_threads:7;
} wm5;
float global_depth_offset_constant;
float global_depth_offset_scale;
/* for Ironlake only */
struct {
unsigned pad0:1;
unsigned grf_reg_count_1:3;
unsigned pad1:2;
unsigned kernel_start_pointer_1:26;
} wm8;
struct {
unsigned pad0:1;
unsigned grf_reg_count_2:3;
unsigned pad1:2;
unsigned kernel_start_pointer_2:26;
} wm9;
struct {
unsigned pad0:1;
unsigned grf_reg_count_3:3;
unsigned pad1:2;
unsigned kernel_start_pointer_3:26;
} wm10;
};
struct gen5_sampler_default_color {
uint8_t ub[4];
float f[4];

View file

@ -41,8 +41,6 @@
extern "C" {
#endif
bool brw_color_buffer_write_enabled(struct brw_context *brw);
void
brw_upload_wm_prog(struct brw_context *brw);

View file

@ -1,274 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#include "intel_batchbuffer.h"
#include "intel_fbo.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_wm.h"
#include "compiler/nir/nir.h"
/***********************************************************************
* WM unit - fragment programs and rasterization
*/
bool
brw_color_buffer_write_enabled(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct gl_program *fp = brw->fragment_program;
unsigned i;
/* _NEW_BUFFERS */
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
uint64_t outputs_written = fp->info.outputs_written;
/* _NEW_COLOR */
if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
(ctx->Color.ColorMask[i][0] ||
ctx->Color.ColorMask[i][1] ||
ctx->Color.ColorMask[i][2] ||
ctx->Color.ColorMask[i][3])) {
return true;
}
}
return false;
}
/**
* Setup wm hardware state. See page 225 of Volume 2
*/
static void
brw_upload_wm_unit(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct gl_program *fp = brw->fragment_program;
/* BRW_NEW_FS_PROG_DATA */
const struct brw_wm_prog_data *prog_data =
brw_wm_prog_data(brw->wm.base.prog_data);
struct brw_wm_unit_state *wm;
wm = brw_state_batch(brw, sizeof(*wm), 32, &brw->wm.base.state_offset);
memset(wm, 0, sizeof(*wm));
if (prog_data->dispatch_8 && prog_data->dispatch_16) {
/* These two fields should be the same pre-gen6, which is why we
* only have one hardware field to program for both dispatch
* widths.
*/
assert(prog_data->base.dispatch_grf_start_reg ==
prog_data->dispatch_grf_start_reg_2);
}
/* BRW_NEW_PROGRAM_CACHE | BRW_NEW_FS_PROG_DATA */
wm->wm5.enable_8_pix = prog_data->dispatch_8;
wm->wm5.enable_16_pix = prog_data->dispatch_16;
if (prog_data->dispatch_8 || prog_data->dispatch_16) {
wm->thread0.grf_reg_count = prog_data->reg_blocks_0;
wm->thread0.kernel_start_pointer =
brw_program_reloc(brw,
brw->wm.base.state_offset +
offsetof(struct brw_wm_unit_state, thread0),
brw->wm.base.prog_offset +
(wm->thread0.grf_reg_count << 1)) >> 6;
}
if (prog_data->prog_offset_2) {
wm->wm9.grf_reg_count_2 = prog_data->reg_blocks_2;
wm->wm9.kernel_start_pointer_2 =
brw_program_reloc(brw,
brw->wm.base.state_offset +
offsetof(struct brw_wm_unit_state, wm9),
brw->wm.base.prog_offset +
prog_data->prog_offset_2 +
(wm->wm9.grf_reg_count_2 << 1)) >> 6;
}
wm->thread1.depth_coef_urb_read_offset = 1;
if (prog_data->base.use_alt_mode)
wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
else
wm->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
wm->thread1.binding_table_entry_count =
prog_data->base.binding_table.size_bytes / 4;
if (prog_data->base.total_scratch != 0) {
wm->thread2.scratch_space_base_pointer =
brw->wm.base.scratch_bo->offset64 >> 10; /* reloc */
wm->thread2.per_thread_scratch_space =
ffs(brw->wm.base.per_thread_scratch) - 11;
} else {
wm->thread2.scratch_space_base_pointer = 0;
wm->thread2.per_thread_scratch_space = 0;
}
wm->thread3.dispatch_grf_start_reg =
prog_data->base.dispatch_grf_start_reg;
wm->thread3.urb_entry_read_length =
prog_data->num_varying_inputs * 2;
wm->thread3.urb_entry_read_offset = 0;
wm->thread3.const_urb_entry_read_length =
prog_data->base.curb_read_length;
/* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
if (brw->gen == 5)
wm->wm4.sampler_count = 0; /* hardware requirement */
else {
wm->wm4.sampler_count = (brw->wm.base.sampler_count + 1) / 4;
}
if (brw->wm.base.sampler_count) {
/* BRW_NEW_SAMPLER_STATE_TABLE - reloc */
wm->wm4.sampler_state_pointer = (brw->batch.bo->offset64 +
brw->wm.base.sampler_offset) >> 5;
} else {
wm->wm4.sampler_state_pointer = 0;
}
/* BRW_NEW_FRAGMENT_PROGRAM */
wm->wm5.program_uses_depth = prog_data->uses_src_depth;
wm->wm5.program_computes_depth = (fp->info.outputs_written &
BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0;
/* _NEW_BUFFERS
* Override for NULL depthbuffer case, required by the Pixel Shader Computed
* Depth field.
*/
if (!intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH))
wm->wm5.program_computes_depth = 0;
/* _NEW_COLOR */
wm->wm5.program_uses_killpixel =
prog_data->uses_kill || ctx->Color.AlphaEnabled;
wm->wm5.max_threads = devinfo->max_wm_threads - 1;
/* _NEW_BUFFERS | _NEW_COLOR */
if (brw_color_buffer_write_enabled(brw) ||
wm->wm5.program_uses_killpixel ||
wm->wm5.program_computes_depth) {
wm->wm5.thread_dispatch_enable = 1;
}
wm->wm5.legacy_line_rast = 0;
wm->wm5.legacy_global_depth_bias = 0;
wm->wm5.early_depth_test = 1; /* never need to disable */
wm->wm5.line_aa_region_width = 0;
wm->wm5.line_endcap_aa_region_width = 1;
/* _NEW_POLYGONSTIPPLE */
wm->wm5.polygon_stipple = ctx->Polygon.StippleFlag;
/* _NEW_POLYGON */
if (ctx->Polygon.OffsetFill) {
wm->wm5.depth_offset = 1;
/* Something weird going on with legacy_global_depth_bias,
* offset_constant, scaling and MRD. This value passes glean
* but gives some odd results elsewere (eg. the
* quad-offset-units test).
*/
wm->global_depth_offset_constant = ctx->Polygon.OffsetUnits * 2;
/* This is the only value that passes glean:
*/
wm->global_depth_offset_scale = ctx->Polygon.OffsetFactor;
}
/* _NEW_LINE */
wm->wm5.line_stipple = ctx->Line.StippleFlag;
/* BRW_NEW_STATS_WM */
if (brw->stats_wm)
wm->wm4.stats_enable = 1;
/* Emit scratch space relocation */
if (prog_data->base.total_scratch != 0) {
brw_emit_reloc(&brw->batch,
brw->wm.base.state_offset +
offsetof(struct brw_wm_unit_state, thread2),
brw->wm.base.scratch_bo,
wm->thread2.per_thread_scratch_space,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
}
/* Emit sampler state relocation */
if (brw->wm.base.sampler_count != 0) {
brw_emit_reloc(&brw->batch,
brw->wm.base.state_offset +
offsetof(struct brw_wm_unit_state, wm4),
brw->batch.bo,
brw->wm.base.sampler_offset | wm->wm4.stats_enable |
(wm->wm4.sampler_count << 2),
I915_GEM_DOMAIN_INSTRUCTION, 0);
}
brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
/* _NEW_POLGYON */
if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) {
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
OUT_BATCH_F(ctx->Polygon.OffsetClamp);
ADVANCE_BATCH();
brw->wm.offset_clamp = ctx->Polygon.OffsetClamp;
}
}
const struct brw_tracked_state brw_wm_unit = {
.dirty = {
.mesa = _NEW_BUFFERS |
_NEW_COLOR |
_NEW_LINE |
_NEW_POLYGON |
_NEW_POLYGONSTIPPLE,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_PUSH_CONSTANT_ALLOCATION |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA |
BRW_NEW_PROGRAM_CACHE |
BRW_NEW_SAMPLER_STATE_TABLE |
BRW_NEW_STATS_WM,
},
.emit = brw_upload_wm_unit,
};

View file

@ -1738,7 +1738,33 @@ static const struct brw_tracked_state genX(sf_state) = {
/* ---------------------------------------------------------------------- */
#if GEN_GEN >= 6
static bool
brw_color_buffer_write_enabled(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct gl_program *fp = brw->fragment_program;
unsigned i;
/* _NEW_BUFFERS */
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
uint64_t outputs_written = fp->info.outputs_written;
/* _NEW_COLOR */
if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
(ctx->Color.ColorMask[i][0] ||
ctx->Color.ColorMask[i][1] ||
ctx->Color.ColorMask[i][2] ||
ctx->Color.ColorMask[i][3])) {
return true;
}
}
return false;
}
static void
genX(upload_wm)(struct brw_context *brw)
{
@ -1750,11 +1776,10 @@ genX(upload_wm)(struct brw_context *brw)
UNUSED bool writes_depth =
wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
UNUSED struct brw_stage_state *stage_state = &brw->wm.base;
UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
#if GEN_GEN < 7
const struct brw_stage_state *stage_state = &brw->wm.base;
const struct gen_device_info *devinfo = &brw->screen->devinfo;
#if GEN_GEN == 6
/* We can't fold this into gen6_upload_wm_push_constants(), because
* according to the SNB PRM, vol 2 part 1 section 7.2.2
* (3DSTATE_CONSTANT_PS [DevSNB]):
@ -1773,27 +1798,94 @@ genX(upload_wm)(struct brw_context *brw)
}
#endif
#if GEN_GEN >= 6
brw_batch_emit(brw, GENX(3DSTATE_WM), wm) {
wm.StatisticsEnable = true;
wm.LineAntialiasingRegionWidth = _10pixels;
wm.LineEndCapAntialiasingRegionWidth = _05pixels;
wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
#else
ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
brw_state_emit(brw, GENX(WM_STATE), 64, &stage_state->state_offset, wm) {
if (wm_prog_data->dispatch_8 && wm_prog_data->dispatch_16) {
/* These two fields should be the same pre-gen6, which is why we
* only have one hardware field to program for both dispatch
* widths.
*/
assert(wm_prog_data->base.dispatch_grf_start_reg ==
wm_prog_data->dispatch_grf_start_reg_2);
}
if (wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16)
wm.GRFRegisterCount0 = wm_prog_data->reg_blocks_0;
if (stage_state->sampler_count)
wm.SamplerStatePointer =
instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset);
#if GEN_GEN == 5
if (wm_prog_data->prog_offset_2)
wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2;
#endif
wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2;
wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length;
/* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
wm.ConstantURBEntryReadOffset = brw->curbe.wm_start * 2;
wm.EarlyDepthTestEnable = true;
wm.LineAntialiasingRegionWidth = _05pixels;
wm.LineEndCapAntialiasingRegionWidth = _10pixels;
/* _NEW_POLYGON */
if (ctx->Polygon.OffsetFill) {
wm.GlobalDepthOffsetEnable = true;
/* Something weird going on with legacy_global_depth_bias,
* offset_constant, scaling and MRD. This value passes glean
* but gives some odd results elsewere (eg. the
* quad-offset-units test).
*/
wm.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
/* This is the only value that passes glean:
*/
wm.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
}
wm.DepthCoefficientURBReadOffset = 1;
#endif
/* BRW_NEW_STATS_WM */
wm.StatisticsEnable = GEN_GEN >= 6 || brw->stats_wm;
#if GEN_GEN < 7
if (wm_prog_data->base.use_alt_mode)
wm.FloatingPointMode = Alternate;
wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
wm.SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4);
wm.BindingTableEntryCount = wm_prog_data->base.binding_table.size_bytes / 4;
wm.SamplerCount = GEN_GEN == 5 ?
0 : DIV_ROUND_UP(stage_state->sampler_count, 4);
wm.BindingTableEntryCount =
wm_prog_data->base.binding_table.size_bytes / 4;
wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
wm.DispatchGRFStartRegisterForConstantSetupData0 =
wm_prog_data->base.dispatch_grf_start_reg;
wm.DispatchGRFStartRegisterForConstantSetupData2 =
wm_prog_data->dispatch_grf_start_reg_2;
wm.KernelStartPointer0 = stage_state->prog_offset;
wm.KernelStartPointer2 = stage_state->prog_offset +
wm_prog_data->prog_offset_2;
if (GEN_GEN == 6 ||
wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16) {
wm.KernelStartPointer0 = KSP_ro(brw,
stage_state->prog_offset);
}
#if GEN_GEN >= 5
if (GEN_GEN == 6 || wm_prog_data->prog_offset_2) {
wm.KernelStartPointer2 =
KSP_ro(brw, stage_state->prog_offset +
wm_prog_data->prog_offset_2);
}
#endif
#if GEN_GEN == 6
wm.DualSourceBlendEnable =
wm_prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) &&
ctx->Color.Blend[0]._UsesDualSrc;
@ -1817,42 +1909,34 @@ genX(upload_wm)(struct brw_context *brw)
else
wm.PositionXYOffsetSelect = POSOFFSET_NONE;
wm.DispatchGRFStartRegisterForConstantSetupData2 =
wm_prog_data->dispatch_grf_start_reg_2;
#endif
if (wm_prog_data->base.total_scratch) {
wm.ScratchSpaceBasePointer =
render_bo(stage_state->scratch_bo,
ffs(stage_state->per_thread_scratch) - 11);
render_bo(stage_state->scratch_bo, 0);
wm.PerThreadScratchSpace =
ffs(stage_state->per_thread_scratch) - 11;
}
wm.PixelShaderComputedDepth = writes_depth;
#endif
wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
/* _NEW_LINE */
wm.LineStippleEnable = ctx->Line.StippleFlag;
/* _NEW_POLYGON */
wm.PolygonStippleEnable = ctx->Polygon.StippleFlag;
wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
#if GEN_GEN < 8
#if GEN_GEN >= 6
wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
/* _NEW_BUFFERS */
const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
if (wm_prog_data->uses_kill ||
_mesa_is_alpha_test_enabled(ctx) ||
_mesa_is_alpha_to_coverage_enabled(ctx) ||
wm_prog_data->uses_omask) {
wm.PixelShaderKillsPixel = true;
}
/* _NEW_BUFFERS | _NEW_COLOR */
if (brw_color_buffer_write_enabled(brw) || writes_depth ||
wm_prog_data->has_side_effects || wm.PixelShaderKillsPixel) {
wm.ThreadDispatchEnable = true;
}
if (multisampled_fbo) {
/* _NEW_MULTISAMPLE */
if (ctx->Multisample.Enabled)
@ -1868,6 +1952,21 @@ genX(upload_wm)(struct brw_context *brw)
wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
}
#endif
wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
if (wm_prog_data->uses_kill ||
_mesa_is_alpha_test_enabled(ctx) ||
_mesa_is_alpha_to_coverage_enabled(ctx) ||
(GEN_GEN >= 6 && wm_prog_data->uses_omask)) {
wm.PixelShaderKillsPixel = true;
}
/* _NEW_BUFFERS | _NEW_COLOR */
if (brw_color_buffer_write_enabled(brw) || writes_depth ||
wm.PixelShaderKillsPixel ||
(GEN_GEN >= 6 && wm_prog_data->has_side_effects)) {
wm.ThreadDispatchEnable = true;
}
#if GEN_GEN >= 7
wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
@ -1898,6 +1997,16 @@ genX(upload_wm)(struct brw_context *brw)
wm.EarlyDepthStencilControl = EDSC_PSEXEC;
#endif
}
#if GEN_GEN <= 5
if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) {
brw_batch_emit(brw, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) {
clamp.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
}
brw->wm.offset_clamp = ctx->Polygon.OffsetClamp;
}
#endif
}
static const struct brw_tracked_state genX(wm_state) = {
@ -1905,17 +2014,23 @@ static const struct brw_tracked_state genX(wm_state) = {
.mesa = _NEW_LINE |
_NEW_POLYGON |
(GEN_GEN < 8 ? _NEW_BUFFERS |
_NEW_COLOR |
_NEW_MULTISAMPLE :
_NEW_COLOR :
0) |
(GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0),
(GEN_GEN == 6 ? _NEW_PROGRAM_CONSTANTS : 0) |
(GEN_GEN < 6 ? _NEW_POLYGONSTIPPLE : 0) |
(GEN_GEN < 8 && GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0),
.brw = BRW_NEW_BLORP |
BRW_NEW_FS_PROG_DATA |
(GEN_GEN < 6 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_PROGRAM_CACHE |
BRW_NEW_SAMPLER_STATE_TABLE |
BRW_NEW_STATS_WM
: 0) |
(GEN_GEN < 7 ? BRW_NEW_BATCH : BRW_NEW_CONTEXT),
},
.emit = genX(upload_wm),
};
#endif
/* ---------------------------------------------------------------------- */
@ -5217,7 +5332,7 @@ genX(init_atoms)(struct brw_context *brw)
&genX(vs_samplers),
/* These set up state for brw_psp_urb_cbs */
&brw_wm_unit,
&genX(wm_state),
&genX(sf_clip_viewport),
&genX(sf_state),
&genX(vs_state), /* always required, enabled or not */