i965: Always use the pre-computed offset for the relocation entry

We must be careful to only compute the address once based on the
per-context information (rather than accessing the unlocked global
bo->offset64) so that the value in the batch does match the
reloc.presumed_offset we declare to the kernel. Otherwise, highly
unlikely, but we may see GPU hangs in multithreaded users.

The only real complication here is isl_surf_fill_state() which needs to
adjust the reloc.delta to both general a tile offset and to encode state
into the lower 12 bits.

(Rebased on ISL changes by Ken.)

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Chris Wilson 2017-07-21 16:36:45 +01:00 committed by Kenneth Graunke
parent 1d0bd0d174
commit 2410deefff
2 changed files with 36 additions and 42 deletions

View file

@ -154,13 +154,13 @@ brw_emit_surface_state(struct brw_context *brw,
case ISL_AUX_USAGE_CCS_E:
aux_surf = &mt->mcs_buf->surf;
aux_bo = mt->mcs_buf->bo;
aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
aux_offset = mt->mcs_buf->offset;
break;
case ISL_AUX_USAGE_HIZ:
aux_surf = &mt->hiz_buf->surf;
aux_bo = mt->hiz_buf->bo;
aux_offset = mt->hiz_buf->bo->offset64;
aux_offset = 0;
break;
case ISL_AUX_USAGE_NONE:
@ -180,28 +180,29 @@ brw_emit_surface_state(struct brw_context *brw,
surf_offset);
isl_surf_fill_state(&brw->isl_dev, state, .surf = &mt->surf, .view = &view,
.address = mt->bo->offset64 + offset,
.address = brw_emit_reloc(&brw->batch,
*surf_offset + brw->isl_dev.ss.addr_offset,
mt->bo, offset, read_domains, write_domains),
.aux_surf = aux_surf, .aux_usage = aux_usage,
.aux_address = aux_offset,
.mocs = mocs, .clear_color = clear_color,
.x_offset_sa = tile_x, .y_offset_sa = tile_y);
brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
mt->bo, offset, read_domains, write_domains);
if (aux_surf) {
/* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
* upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
* contain other control information. Since buffer addresses are always
* on 4k boundaries (and thus have their lower 12 bits zero), we can use
* an ordinary reloc to do the necessary address translation.
*
* FIXME: move to the point of assignment.
*/
assert((aux_offset & 0xfff) == 0);
uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
brw_emit_reloc(&brw->batch,
*surf_offset + brw->isl_dev.ss.aux_addr_offset,
aux_bo, *aux_addr - aux_bo->offset64,
read_domains, write_domains);
*aux_addr = brw_emit_reloc(&brw->batch,
*surf_offset +
brw->isl_dev.ss.aux_addr_offset,
aux_bo, *aux_addr,
read_domains, write_domains);
}
}
@ -611,18 +612,16 @@ brw_emit_buffer_surface_state(struct brw_context *brw,
out_offset);
isl_buffer_fill_state(&brw->isl_dev, dw,
.address = (bo ? bo->offset64 : 0) + buffer_offset,
.address = !bo ? buffer_offset :
brw_emit_reloc(&brw->batch,
*out_offset + brw->isl_dev.ss.addr_offset,
bo, buffer_offset,
I915_GEM_DOMAIN_SAMPLER,
(rw ? I915_GEM_DOMAIN_SAMPLER : 0)),
.size = buffer_size,
.format = surface_format,
.stride = pitch,
.mocs = tex_mocs[brw->gen]);
if (bo) {
brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
bo, buffer_offset,
I915_GEM_DOMAIN_SAMPLER,
(rw ? I915_GEM_DOMAIN_SAMPLER : 0));
}
}
void
@ -785,17 +784,15 @@ brw_update_sol_surface(struct brw_context *brw,
BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
surface_format << BRW_SURFACE_FORMAT_SHIFT |
BRW_SURFACE_RC_READ_WRITE;
surf[1] = bo->offset64 + offset_bytes; /* reloc */
surf[1] = brw_emit_reloc(&brw->batch,
*out_offset + 4, bo, offset_bytes,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
height << BRW_SURFACE_HEIGHT_SHIFT);
surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
surf[4] = 0;
surf[5] = 0;
/* Emit relocation to surface contents. */
brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
}
/* Creates a new WM constant buffer reflecting the current fragment program's
@ -903,7 +900,9 @@ brw_emit_null_surface_state(struct brw_context *brw,
1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
}
surf[1] = bo ? bo->offset64 : 0;
surf[1] = !bo ? 0 :
brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
(height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
@ -916,11 +915,6 @@ brw_emit_null_surface_state(struct brw_context *brw,
pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
surf[4] = multisampling_state;
surf[5] = 0;
if (bo) {
brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
}
}
/**
@ -977,8 +971,12 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
/* reloc */
assert(mt->offset % mt->cpp == 0);
surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
mt->bo->offset64 + mt->offset);
surf[1] = brw_emit_reloc(&brw->batch, offset + 4, mt->bo,
mt->offset +
intel_renderbuffer_get_tile_offsets(irb,
&tile_x,
&tile_y),
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
(rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
@ -1021,9 +1019,6 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
}
}
brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
return offset;
}

View file

@ -5053,14 +5053,13 @@ genX(update_sampler_state)(struct brw_context *brw,
texObj->StencilSampling,
&border_color_offset);
}
samp_st.BorderColorPointer = border_color_offset;
if (GEN_GEN < 6) {
samp_st.BorderColorPointer += brw->batch.bo->offset64; /* reloc */
brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8,
brw->batch.bo, border_color_offset,
I915_GEM_DOMAIN_SAMPLER, 0);
samp_st.BorderColorPointer =
brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8,
brw->batch.bo, border_color_offset,
I915_GEM_DOMAIN_SAMPLER, 0);
} else {
samp_st.BorderColorPointer = border_color_offset;
}
#if GEN_GEN >= 8