2015-04-30 19:31:44 +03:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2013-2015 Intel Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
2016-04-15 16:38:18 -07:00
|
|
|
#include "isl/isl.h"
|
2015-04-30 19:31:44 +03:00
|
|
|
#include "brw_fs_surface_builder.h"
|
|
|
|
|
#include "brw_fs.h"
|
|
|
|
|
|
|
|
|
|
using namespace brw;
|
|
|
|
|
|
|
|
|
|
namespace brw {
|
|
|
|
|
namespace surface_access {
|
|
|
|
|
namespace {
|
|
|
|
|
/**
|
|
|
|
|
* Generate a logical send opcode for a surface message and return
|
|
|
|
|
* the result.
|
|
|
|
|
*/
|
|
|
|
|
fs_reg
|
|
|
|
|
emit_send(const fs_builder &bld, enum opcode opcode,
|
|
|
|
|
const fs_reg &addr, const fs_reg &src, const fs_reg &surface,
|
|
|
|
|
unsigned dims, unsigned arg, unsigned rsize,
|
|
|
|
|
brw_predicate pred = BRW_PREDICATE_NONE)
|
|
|
|
|
{
|
|
|
|
|
/* Reduce the dynamically uniform surface index to a single
|
|
|
|
|
* scalar.
|
|
|
|
|
*/
|
|
|
|
|
const fs_reg usurface = bld.emit_uniformize(surface);
|
|
|
|
|
const fs_reg srcs[] = {
|
2015-11-02 11:26:16 -08:00
|
|
|
addr, src, usurface, brw_imm_ud(dims), brw_imm_ud(arg)
|
2015-04-30 19:31:44 +03:00
|
|
|
};
|
|
|
|
|
const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, rsize);
|
|
|
|
|
fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
|
|
|
|
|
|
2016-09-01 18:43:48 -07:00
|
|
|
inst->size_written = rsize * dst.component_size(inst->exec_size);
|
2015-04-30 19:31:44 +03:00
|
|
|
inst->predicate = pred;
|
|
|
|
|
return dst;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Emit an untyped surface read opcode. \p dims determines the number
|
|
|
|
|
* of components of the address and \p size the number of components of
|
|
|
|
|
* the returned value.
|
|
|
|
|
*/
|
|
|
|
|
fs_reg
|
|
|
|
|
emit_untyped_read(const fs_builder &bld,
|
|
|
|
|
const fs_reg &surface, const fs_reg &addr,
|
|
|
|
|
unsigned dims, unsigned size,
|
|
|
|
|
brw_predicate pred)
|
|
|
|
|
{
|
|
|
|
|
return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
|
|
|
|
|
addr, fs_reg(), surface, dims, size, size, pred);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Emit an untyped surface write opcode. \p dims determines the number
|
|
|
|
|
* of components of the address and \p size the number of components of
|
|
|
|
|
* the argument.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
emit_untyped_write(const fs_builder &bld, const fs_reg &surface,
|
|
|
|
|
const fs_reg &addr, const fs_reg &src,
|
|
|
|
|
unsigned dims, unsigned size,
|
|
|
|
|
brw_predicate pred)
|
|
|
|
|
{
|
|
|
|
|
emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
|
|
|
|
|
addr, src, surface, dims, size, 0, pred);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Emit an untyped surface atomic opcode. \p dims determines the number
|
|
|
|
|
* of components of the address and \p rsize the number of components of
|
|
|
|
|
* the returned value (either zero or one).
|
|
|
|
|
*/
|
|
|
|
|
fs_reg
|
|
|
|
|
emit_untyped_atomic(const fs_builder &bld,
|
|
|
|
|
const fs_reg &surface, const fs_reg &addr,
|
|
|
|
|
const fs_reg &src0, const fs_reg &src1,
|
|
|
|
|
unsigned dims, unsigned rsize, unsigned op,
|
|
|
|
|
brw_predicate pred)
|
|
|
|
|
{
|
|
|
|
|
/* FINISHME: Factor out this frequently recurring pattern into a
|
|
|
|
|
* helper function.
|
|
|
|
|
*/
|
|
|
|
|
const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
|
|
|
|
|
const fs_reg srcs[] = { src0, src1 };
|
|
|
|
|
const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, n);
|
|
|
|
|
bld.LOAD_PAYLOAD(tmp, srcs, n, 0);
|
|
|
|
|
|
|
|
|
|
return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
|
|
|
|
|
addr, tmp, surface, dims, op, rsize, pred);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Emit a typed surface read opcode. \p dims determines the number of
|
|
|
|
|
* components of the address and \p size the number of components of the
|
|
|
|
|
* returned value.
|
|
|
|
|
*/
|
|
|
|
|
fs_reg
|
|
|
|
|
emit_typed_read(const fs_builder &bld, const fs_reg &surface,
|
|
|
|
|
const fs_reg &addr, unsigned dims, unsigned size)
|
|
|
|
|
{
|
|
|
|
|
return emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
|
|
|
|
|
addr, fs_reg(), surface, dims, size, size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Emit a typed surface write opcode. \p dims determines the number of
|
|
|
|
|
* components of the address and \p size the number of components of the
|
|
|
|
|
* argument.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
emit_typed_write(const fs_builder &bld, const fs_reg &surface,
|
|
|
|
|
const fs_reg &addr, const fs_reg &src,
|
|
|
|
|
unsigned dims, unsigned size)
|
|
|
|
|
{
|
|
|
|
|
emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
|
|
|
|
|
addr, src, surface, dims, size, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Emit a typed surface atomic opcode. \p dims determines the number of
|
|
|
|
|
* components of the address and \p rsize the number of components of
|
|
|
|
|
* the returned value (either zero or one).
|
|
|
|
|
*/
|
|
|
|
|
fs_reg
|
|
|
|
|
emit_typed_atomic(const fs_builder &bld, const fs_reg &surface,
|
|
|
|
|
const fs_reg &addr,
|
|
|
|
|
const fs_reg &src0, const fs_reg &src1,
|
|
|
|
|
unsigned dims, unsigned rsize, unsigned op,
|
|
|
|
|
brw_predicate pred)
|
|
|
|
|
{
|
|
|
|
|
/* FINISHME: Factor out this frequently recurring pattern into a
|
|
|
|
|
* helper function.
|
|
|
|
|
*/
|
|
|
|
|
const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
|
|
|
|
|
const fs_reg srcs[] = { src0, src1 };
|
|
|
|
|
const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, n);
|
|
|
|
|
bld.LOAD_PAYLOAD(tmp, srcs, n, 0);
|
|
|
|
|
|
|
|
|
|
return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
|
|
|
|
|
addr, tmp, surface, dims, op, rsize);
|
|
|
|
|
}
|
2017-07-01 08:16:01 +02:00
|
|
|
|
2017-07-01 08:19:17 +02:00
|
|
|
fs_reg
|
|
|
|
|
emit_byte_scattered_read(const fs_builder &bld,
|
|
|
|
|
const fs_reg &surface, const fs_reg &addr,
|
|
|
|
|
unsigned dims, unsigned size,
|
|
|
|
|
unsigned bit_size, brw_predicate pred)
|
|
|
|
|
{
|
|
|
|
|
return emit_send(bld, SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL,
|
|
|
|
|
addr, fs_reg(), surface, dims, bit_size, size, pred);
|
|
|
|
|
}
|
|
|
|
|
|
2017-07-01 08:16:01 +02:00
|
|
|
void
|
|
|
|
|
emit_byte_scattered_write(const fs_builder &bld, const fs_reg &surface,
|
|
|
|
|
const fs_reg &addr, const fs_reg &src,
|
|
|
|
|
unsigned dims, unsigned size,
|
|
|
|
|
unsigned bit_size, brw_predicate pred)
|
|
|
|
|
{
|
|
|
|
|
emit_send(bld, SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL,
|
|
|
|
|
addr, src, surface, dims, bit_size, 0, pred);
|
|
|
|
|
}
|
2015-04-30 19:31:44 +03:00
|
|
|
}
|
|
|
|
|
}
|
2015-04-22 16:43:51 +03:00
|
|
|
|
|
|
|
|
namespace {
|
2015-04-22 16:45:28 +03:00
|
|
|
namespace image_format_info {
|
2016-04-15 16:38:18 -07:00
|
|
|
/* The higher compiler layers use the GL enums for image formats even if
|
|
|
|
|
* they come in from SPIR-V or Vulkan. We need to turn them into an ISL
|
|
|
|
|
* enum before we can use them.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static enum isl_format
|
2016-04-15 16:38:18 -07:00
|
|
|
isl_format_for_gl_format(uint32_t gl_format)
|
|
|
|
|
{
|
|
|
|
|
switch (gl_format) {
|
|
|
|
|
case GL_R8: return ISL_FORMAT_R8_UNORM;
|
|
|
|
|
case GL_R8_SNORM: return ISL_FORMAT_R8_SNORM;
|
|
|
|
|
case GL_R8UI: return ISL_FORMAT_R8_UINT;
|
|
|
|
|
case GL_R8I: return ISL_FORMAT_R8_SINT;
|
|
|
|
|
case GL_RG8: return ISL_FORMAT_R8G8_UNORM;
|
|
|
|
|
case GL_RG8_SNORM: return ISL_FORMAT_R8G8_SNORM;
|
|
|
|
|
case GL_RG8UI: return ISL_FORMAT_R8G8_UINT;
|
|
|
|
|
case GL_RG8I: return ISL_FORMAT_R8G8_SINT;
|
|
|
|
|
case GL_RGBA8: return ISL_FORMAT_R8G8B8A8_UNORM;
|
|
|
|
|
case GL_RGBA8_SNORM: return ISL_FORMAT_R8G8B8A8_SNORM;
|
|
|
|
|
case GL_RGBA8UI: return ISL_FORMAT_R8G8B8A8_UINT;
|
|
|
|
|
case GL_RGBA8I: return ISL_FORMAT_R8G8B8A8_SINT;
|
|
|
|
|
case GL_R11F_G11F_B10F: return ISL_FORMAT_R11G11B10_FLOAT;
|
|
|
|
|
case GL_RGB10_A2: return ISL_FORMAT_R10G10B10A2_UNORM;
|
|
|
|
|
case GL_RGB10_A2UI: return ISL_FORMAT_R10G10B10A2_UINT;
|
|
|
|
|
case GL_R16: return ISL_FORMAT_R16_UNORM;
|
|
|
|
|
case GL_R16_SNORM: return ISL_FORMAT_R16_SNORM;
|
|
|
|
|
case GL_R16F: return ISL_FORMAT_R16_FLOAT;
|
|
|
|
|
case GL_R16UI: return ISL_FORMAT_R16_UINT;
|
|
|
|
|
case GL_R16I: return ISL_FORMAT_R16_SINT;
|
|
|
|
|
case GL_RG16: return ISL_FORMAT_R16G16_UNORM;
|
|
|
|
|
case GL_RG16_SNORM: return ISL_FORMAT_R16G16_SNORM;
|
|
|
|
|
case GL_RG16F: return ISL_FORMAT_R16G16_FLOAT;
|
|
|
|
|
case GL_RG16UI: return ISL_FORMAT_R16G16_UINT;
|
|
|
|
|
case GL_RG16I: return ISL_FORMAT_R16G16_SINT;
|
|
|
|
|
case GL_RGBA16: return ISL_FORMAT_R16G16B16A16_UNORM;
|
|
|
|
|
case GL_RGBA16_SNORM: return ISL_FORMAT_R16G16B16A16_SNORM;
|
|
|
|
|
case GL_RGBA16F: return ISL_FORMAT_R16G16B16A16_FLOAT;
|
|
|
|
|
case GL_RGBA16UI: return ISL_FORMAT_R16G16B16A16_UINT;
|
|
|
|
|
case GL_RGBA16I: return ISL_FORMAT_R16G16B16A16_SINT;
|
|
|
|
|
case GL_R32F: return ISL_FORMAT_R32_FLOAT;
|
|
|
|
|
case GL_R32UI: return ISL_FORMAT_R32_UINT;
|
|
|
|
|
case GL_R32I: return ISL_FORMAT_R32_SINT;
|
|
|
|
|
case GL_RG32F: return ISL_FORMAT_R32G32_FLOAT;
|
|
|
|
|
case GL_RG32UI: return ISL_FORMAT_R32G32_UINT;
|
|
|
|
|
case GL_RG32I: return ISL_FORMAT_R32G32_SINT;
|
|
|
|
|
case GL_RGBA32F: return ISL_FORMAT_R32G32B32A32_FLOAT;
|
|
|
|
|
case GL_RGBA32UI: return ISL_FORMAT_R32G32B32A32_UINT;
|
|
|
|
|
case GL_RGBA32I: return ISL_FORMAT_R32G32B32A32_SINT;
|
|
|
|
|
case GL_NONE: return ISL_FORMAT_UNSUPPORTED;
|
|
|
|
|
default:
|
|
|
|
|
assert(!"Invalid image format");
|
|
|
|
|
return ISL_FORMAT_UNSUPPORTED;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-22 16:45:28 +03:00
|
|
|
/**
|
|
|
|
|
* Simple 4-tuple of scalars used to pass around per-color component
|
|
|
|
|
* values.
|
|
|
|
|
*/
|
|
|
|
|
struct color_u {
|
|
|
|
|
color_u(unsigned x = 0) : r(x), g(x), b(x), a(x)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
color_u(unsigned r, unsigned g, unsigned b, unsigned a) :
|
|
|
|
|
r(r), g(g), b(b), a(a)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned
|
|
|
|
|
operator[](unsigned i) const
|
|
|
|
|
{
|
|
|
|
|
const unsigned xs[] = { r, g, b, a };
|
|
|
|
|
return xs[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned r, g, b, a;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the per-channel bitfield widths for a given image format.
|
|
|
|
|
*/
|
|
|
|
|
inline color_u
|
2016-04-15 21:55:02 -07:00
|
|
|
get_bit_widths(isl_format format)
|
2015-04-22 16:45:28 +03:00
|
|
|
{
|
2016-04-15 21:55:02 -07:00
|
|
|
const isl_format_layout *fmtl = isl_format_get_layout(format);
|
|
|
|
|
|
|
|
|
|
return color_u(fmtl->channels.r.bits,
|
|
|
|
|
fmtl->channels.g.bits,
|
|
|
|
|
fmtl->channels.b.bits,
|
|
|
|
|
fmtl->channels.a.bits);
|
2015-04-22 16:45:28 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the per-channel bitfield shifts for a given image format.
|
|
|
|
|
*/
|
|
|
|
|
inline color_u
|
2016-04-15 21:55:02 -07:00
|
|
|
get_bit_shifts(isl_format format)
|
2015-04-22 16:45:28 +03:00
|
|
|
{
|
|
|
|
|
const color_u widths = get_bit_widths(format);
|
|
|
|
|
return color_u(0, widths.r, widths.r + widths.g,
|
|
|
|
|
widths.r + widths.g + widths.b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return true if all present components have the same bit width.
|
|
|
|
|
*/
|
|
|
|
|
inline bool
|
2016-04-15 21:55:02 -07:00
|
|
|
is_homogeneous(isl_format format)
|
2015-04-22 16:45:28 +03:00
|
|
|
{
|
|
|
|
|
const color_u widths = get_bit_widths(format);
|
|
|
|
|
return ((widths.g == 0 || widths.g == widths.r) &&
|
|
|
|
|
(widths.b == 0 || widths.b == widths.r) &&
|
|
|
|
|
(widths.a == 0 || widths.a == widths.r));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return true if the format conversion boils down to a trivial copy.
|
|
|
|
|
*/
|
|
|
|
|
inline bool
|
2016-08-22 15:01:08 -07:00
|
|
|
is_conversion_trivial(const gen_device_info *devinfo, isl_format format)
|
2015-04-22 16:45:28 +03:00
|
|
|
{
|
|
|
|
|
return (get_bit_widths(format).r == 32 && is_homogeneous(format)) ||
|
2016-04-15 21:55:02 -07:00
|
|
|
format == isl_lower_storage_image_format(devinfo, format);
|
2015-04-22 16:45:28 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return true if the hardware natively supports some format with
|
|
|
|
|
* compatible bitfield layout, but possibly different data types.
|
|
|
|
|
*/
|
|
|
|
|
inline bool
|
2016-08-22 15:01:08 -07:00
|
|
|
has_supported_bit_layout(const gen_device_info *devinfo,
|
2016-04-15 21:55:02 -07:00
|
|
|
isl_format format)
|
2015-04-22 16:45:28 +03:00
|
|
|
{
|
|
|
|
|
const color_u widths = get_bit_widths(format);
|
|
|
|
|
const color_u lower_widths = get_bit_widths(
|
2016-04-15 21:55:02 -07:00
|
|
|
isl_lower_storage_image_format(devinfo, format));
|
2015-04-22 16:45:28 +03:00
|
|
|
|
|
|
|
|
return (widths.r == lower_widths.r &&
|
|
|
|
|
widths.g == lower_widths.g &&
|
|
|
|
|
widths.b == lower_widths.b &&
|
|
|
|
|
widths.a == lower_widths.a);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return true if we are required to spread individual components over
|
|
|
|
|
* several components of the format used by the hardware (RG32 and
|
|
|
|
|
* friends implemented as RGBA16UI).
|
|
|
|
|
*/
|
|
|
|
|
inline bool
|
2016-08-22 15:01:08 -07:00
|
|
|
has_split_bit_layout(const gen_device_info *devinfo, isl_format format)
|
2015-04-22 16:45:28 +03:00
|
|
|
{
|
2016-04-15 21:55:02 -07:00
|
|
|
const isl_format lower_format =
|
|
|
|
|
isl_lower_storage_image_format(devinfo, format);
|
2015-04-22 16:45:28 +03:00
|
|
|
|
2016-04-15 21:55:02 -07:00
|
|
|
return (isl_format_get_num_channels(format) <
|
|
|
|
|
isl_format_get_num_channels(lower_format));
|
2015-04-22 16:45:28 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return true if the hardware returns garbage in the unused high bits
|
|
|
|
|
* of each component. This may happen on IVB because we rely on the
|
|
|
|
|
* undocumented behavior that typed reads from surfaces of the
|
|
|
|
|
* unsupported R8 and R16 formats return useful data in their least
|
|
|
|
|
* significant bits.
|
|
|
|
|
*/
|
|
|
|
|
inline bool
|
2016-08-22 15:01:08 -07:00
|
|
|
has_undefined_high_bits(const gen_device_info *devinfo,
|
2016-04-15 21:55:02 -07:00
|
|
|
isl_format format)
|
2015-04-22 16:45:28 +03:00
|
|
|
{
|
2016-04-15 21:55:02 -07:00
|
|
|
const isl_format lower_format =
|
|
|
|
|
isl_lower_storage_image_format(devinfo, format);
|
2015-04-22 16:45:28 +03:00
|
|
|
|
|
|
|
|
return (devinfo->gen == 7 && !devinfo->is_haswell &&
|
2016-04-15 21:55:02 -07:00
|
|
|
(lower_format == ISL_FORMAT_R16_UINT ||
|
|
|
|
|
lower_format == ISL_FORMAT_R8_UINT));
|
2015-04-22 16:45:28 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return true if the format represents values as signed integers
|
|
|
|
|
* requiring sign extension when unpacking.
|
|
|
|
|
*/
|
|
|
|
|
inline bool
|
2016-04-15 21:55:02 -07:00
|
|
|
needs_sign_extension(isl_format format)
|
2015-04-22 16:45:28 +03:00
|
|
|
{
|
2016-04-15 21:55:02 -07:00
|
|
|
return isl_format_has_snorm_channel(format) ||
|
|
|
|
|
isl_format_has_sint_channel(format);
|
2015-04-22 16:45:28 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-22 16:43:51 +03:00
|
|
|
namespace image_validity {
|
2015-08-26 21:59:46 +03:00
|
|
|
/**
|
|
|
|
|
* Check whether the bound image is suitable for untyped access.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static brw_predicate
|
2015-08-26 21:59:46 +03:00
|
|
|
emit_untyped_image_check(const fs_builder &bld, const fs_reg &image,
|
|
|
|
|
brw_predicate pred)
|
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
const gen_device_info *devinfo = bld.shader->devinfo;
|
2015-08-26 21:59:46 +03:00
|
|
|
const fs_reg stride = offset(image, bld, BRW_IMAGE_PARAM_STRIDE_OFFSET);
|
|
|
|
|
|
|
|
|
|
if (devinfo->gen == 7 && !devinfo->is_haswell) {
|
|
|
|
|
/* Check whether the first stride component (i.e. the Bpp value)
|
|
|
|
|
* is greater than four, what on Gen7 indicates that a surface of
|
|
|
|
|
* type RAW has been bound for untyped access. Reading or writing
|
|
|
|
|
* to a surface of type other than RAW using untyped surface
|
|
|
|
|
* messages causes a hang on IVB and VLV.
|
|
|
|
|
*/
|
|
|
|
|
set_predicate(pred,
|
2015-11-02 11:26:16 -08:00
|
|
|
bld.CMP(bld.null_reg_ud(), stride, brw_imm_d(4),
|
2015-08-26 21:59:46 +03:00
|
|
|
BRW_CONDITIONAL_G));
|
|
|
|
|
|
|
|
|
|
return BRW_PREDICATE_NORMAL;
|
|
|
|
|
} else {
|
|
|
|
|
/* More recent generations handle the format mismatch
|
|
|
|
|
* gracefully.
|
|
|
|
|
*/
|
|
|
|
|
return pred;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-22 16:43:51 +03:00
|
|
|
/**
|
|
|
|
|
* Check whether there is an image bound at the given index and write
|
|
|
|
|
* the comparison result to f0.0. Returns an appropriate predication
|
|
|
|
|
* mode to use on subsequent image operations.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static brw_predicate
|
2015-08-26 21:59:46 +03:00
|
|
|
emit_typed_atomic_check(const fs_builder &bld, const fs_reg &image)
|
2015-04-22 16:43:51 +03:00
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
const gen_device_info *devinfo = bld.shader->devinfo;
|
2015-04-22 16:43:51 +03:00
|
|
|
const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
|
|
|
|
|
|
|
|
|
|
if (devinfo->gen == 7 && !devinfo->is_haswell) {
|
|
|
|
|
/* Check the first component of the size field to find out if the
|
|
|
|
|
* image is bound. Necessary on IVB for typed atomics because
|
|
|
|
|
* they don't seem to respect null surfaces and will happily
|
|
|
|
|
* corrupt or read random memory when no image is bound.
|
|
|
|
|
*/
|
|
|
|
|
bld.CMP(bld.null_reg_ud(),
|
|
|
|
|
retype(size, BRW_REGISTER_TYPE_UD),
|
2015-11-02 11:26:16 -08:00
|
|
|
brw_imm_d(0), BRW_CONDITIONAL_NZ);
|
2015-04-22 16:43:51 +03:00
|
|
|
|
|
|
|
|
return BRW_PREDICATE_NORMAL;
|
|
|
|
|
} else {
|
|
|
|
|
/* More recent platforms implement compliant behavior when a null
|
|
|
|
|
* surface is bound.
|
|
|
|
|
*/
|
|
|
|
|
return BRW_PREDICATE_NONE;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Check whether the provided coordinates are within the image bounds
|
|
|
|
|
* and write the comparison result to f0.0. Returns an appropriate
|
|
|
|
|
* predication mode to use on subsequent image operations.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static brw_predicate
|
2015-04-22 16:43:51 +03:00
|
|
|
emit_bounds_check(const fs_builder &bld, const fs_reg &image,
|
|
|
|
|
const fs_reg &addr, unsigned dims)
|
|
|
|
|
{
|
|
|
|
|
const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
|
|
|
|
|
|
|
|
|
|
for (unsigned c = 0; c < dims; ++c)
|
|
|
|
|
set_predicate(c == 0 ? BRW_PREDICATE_NONE : BRW_PREDICATE_NORMAL,
|
|
|
|
|
bld.CMP(bld.null_reg_ud(),
|
|
|
|
|
offset(retype(addr, BRW_REGISTER_TYPE_UD), bld, c),
|
|
|
|
|
offset(size, bld, c),
|
|
|
|
|
BRW_CONDITIONAL_L));
|
|
|
|
|
|
|
|
|
|
return BRW_PREDICATE_NORMAL;
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-04-22 16:44:18 +03:00
|
|
|
|
|
|
|
|
namespace image_coordinates {
|
2015-07-23 19:32:08 +03:00
|
|
|
/**
|
|
|
|
|
* Return the total number of coordinates needed to address a texel of
|
|
|
|
|
* the surface, which may be more than the sum of \p surf_dims and \p
|
|
|
|
|
* arr_dims if padding is required.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static unsigned
|
2015-07-23 19:32:08 +03:00
|
|
|
num_image_coordinates(const fs_builder &bld,
|
|
|
|
|
unsigned surf_dims, unsigned arr_dims,
|
2016-04-15 21:55:02 -07:00
|
|
|
isl_format format)
|
2015-07-23 19:32:08 +03:00
|
|
|
{
|
|
|
|
|
/* HSW in vec4 mode and our software coordinate handling for untyped
|
|
|
|
|
* reads want the array index to be at the Z component.
|
|
|
|
|
*/
|
|
|
|
|
const bool array_index_at_z =
|
2016-04-15 21:55:02 -07:00
|
|
|
format != ISL_FORMAT_UNSUPPORTED &&
|
|
|
|
|
!isl_has_matching_typed_storage_image_format(
|
2015-07-23 19:32:08 +03:00
|
|
|
bld.shader->devinfo, format);
|
|
|
|
|
const unsigned zero_dims =
|
|
|
|
|
((surf_dims == 1 && arr_dims == 1 && array_index_at_z) ? 1 : 0);
|
|
|
|
|
|
|
|
|
|
return surf_dims + zero_dims + arr_dims;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Transform image coordinates into the form expected by the
|
|
|
|
|
* implementation.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static fs_reg
|
2015-07-23 19:32:08 +03:00
|
|
|
emit_image_coordinates(const fs_builder &bld, const fs_reg &addr,
|
|
|
|
|
unsigned surf_dims, unsigned arr_dims,
|
2016-04-15 21:55:02 -07:00
|
|
|
isl_format format)
|
2015-07-23 19:32:08 +03:00
|
|
|
{
|
|
|
|
|
const unsigned dims =
|
|
|
|
|
num_image_coordinates(bld, surf_dims, arr_dims, format);
|
|
|
|
|
|
|
|
|
|
if (dims > surf_dims + arr_dims) {
|
|
|
|
|
assert(surf_dims == 1 && arr_dims == 1 && dims == 3);
|
|
|
|
|
/* The array index is required to be passed in as the Z component,
|
|
|
|
|
* insert a zero at the Y component to shift it to the right
|
|
|
|
|
* position.
|
|
|
|
|
*
|
|
|
|
|
* FINISHME: Factor out this frequently recurring pattern into a
|
|
|
|
|
* helper function.
|
|
|
|
|
*/
|
2015-11-02 11:26:16 -08:00
|
|
|
const fs_reg srcs[] = { addr, brw_imm_d(0), offset(addr, bld, 1) };
|
2015-07-23 19:32:08 +03:00
|
|
|
const fs_reg dst = bld.vgrf(addr.type, dims);
|
|
|
|
|
bld.LOAD_PAYLOAD(dst, srcs, dims, 0);
|
|
|
|
|
return dst;
|
|
|
|
|
} else {
|
|
|
|
|
return addr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-22 16:44:18 +03:00
|
|
|
/**
|
|
|
|
|
* Calculate the offset in memory of the texel given by \p coord.
|
|
|
|
|
*
|
|
|
|
|
* This is meant to be used with untyped surface messages to access a
|
|
|
|
|
* tiled surface, what involves taking into account the tiling and
|
|
|
|
|
* swizzling modes of the surface manually so it will hopefully not
|
|
|
|
|
* happen very often.
|
|
|
|
|
*
|
|
|
|
|
* The tiling algorithm implemented here matches either the X or Y
|
|
|
|
|
* tiling layouts supported by the hardware depending on the tiling
|
|
|
|
|
* coefficients passed to the program as uniforms. See Volume 1 Part 2
|
|
|
|
|
* Section 4.5 "Address Tiling Function" of the IVB PRM for an in-depth
|
|
|
|
|
* explanation of the hardware tiling format.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static fs_reg
|
2015-04-22 16:44:18 +03:00
|
|
|
emit_address_calculation(const fs_builder &bld, const fs_reg &image,
|
|
|
|
|
const fs_reg &coord, unsigned dims)
|
|
|
|
|
{
|
2016-08-22 15:01:08 -07:00
|
|
|
const gen_device_info *devinfo = bld.shader->devinfo;
|
2015-04-22 16:44:18 +03:00
|
|
|
const fs_reg off = offset(image, bld, BRW_IMAGE_PARAM_OFFSET_OFFSET);
|
|
|
|
|
const fs_reg stride = offset(image, bld, BRW_IMAGE_PARAM_STRIDE_OFFSET);
|
|
|
|
|
const fs_reg tile = offset(image, bld, BRW_IMAGE_PARAM_TILING_OFFSET);
|
|
|
|
|
const fs_reg swz = offset(image, bld, BRW_IMAGE_PARAM_SWIZZLING_OFFSET);
|
|
|
|
|
const fs_reg addr = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
|
|
|
|
const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
|
|
|
|
const fs_reg minor = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
|
|
|
|
const fs_reg major = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
|
|
|
|
const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
|
|
|
/* Shift the coordinates by the fixed surface offset. It may be
|
|
|
|
|
* non-zero if the image is a single slice of a higher-dimensional
|
|
|
|
|
* surface, or if a non-zero mipmap level of the surface is bound to
|
|
|
|
|
* the pipeline. The offset needs to be applied here rather than at
|
|
|
|
|
* surface state set-up time because the desired slice-level may
|
|
|
|
|
* start mid-tile, so simply shifting the surface base address
|
|
|
|
|
* wouldn't give a well-formed tiled surface in the general case.
|
|
|
|
|
*/
|
|
|
|
|
for (unsigned c = 0; c < 2; ++c)
|
|
|
|
|
bld.ADD(offset(addr, bld, c), offset(off, bld, c),
|
|
|
|
|
(c < dims ?
|
|
|
|
|
offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, c) :
|
2015-11-02 11:26:16 -08:00
|
|
|
fs_reg(brw_imm_d(0))));
|
2015-04-22 16:44:18 +03:00
|
|
|
|
|
|
|
|
/* The layout of 3-D textures in memory is sort-of like a tiling
|
|
|
|
|
* format. At each miplevel, the slices are arranged in rows of
|
|
|
|
|
* 2^level slices per row. The slice row is stored in tmp.y and
|
|
|
|
|
* the slice within the row is stored in tmp.x.
|
|
|
|
|
*
|
|
|
|
|
* The layout of 2-D array textures and cubemaps is much simpler:
|
|
|
|
|
* Depending on whether the ARYSPC_LOD0 layout is in use it will be
|
|
|
|
|
* stored in memory as an array of slices, each one being a 2-D
|
|
|
|
|
* arrangement of miplevels, or as a 2D arrangement of miplevels,
|
|
|
|
|
* each one being an array of slices. In either case the separation
|
|
|
|
|
* between slices of the same LOD is equal to the qpitch value
|
|
|
|
|
* provided as stride.w.
|
|
|
|
|
*
|
|
|
|
|
* This code can be made to handle either 2D arrays and 3D textures
|
|
|
|
|
* by passing in the miplevel as tile.z for 3-D textures and 0 in
|
|
|
|
|
* tile.z for 2-D array textures.
|
|
|
|
|
*
|
|
|
|
|
* See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface
|
|
|
|
|
* Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
|
|
|
|
|
* of the hardware 3D texture and 2D array layouts.
|
|
|
|
|
*/
|
|
|
|
|
if (dims > 2) {
|
|
|
|
|
/* Decompose z into a major (tmp.y) and a minor (tmp.x)
|
|
|
|
|
* index.
|
|
|
|
|
*/
|
2015-11-02 11:26:16 -08:00
|
|
|
bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), brw_imm_d(0),
|
2015-04-22 16:44:18 +03:00
|
|
|
offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2));
|
|
|
|
|
bld.SHR(offset(tmp, bld, 1),
|
|
|
|
|
offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2),
|
|
|
|
|
offset(tile, bld, 2));
|
|
|
|
|
|
|
|
|
|
/* Take into account the horizontal (tmp.x) and vertical (tmp.y)
|
|
|
|
|
* slice offset.
|
|
|
|
|
*/
|
|
|
|
|
for (unsigned c = 0; c < 2; ++c) {
|
|
|
|
|
bld.MUL(offset(tmp, bld, c),
|
|
|
|
|
offset(stride, bld, 2 + c), offset(tmp, bld, c));
|
|
|
|
|
bld.ADD(offset(addr, bld, c),
|
|
|
|
|
offset(addr, bld, c), offset(tmp, bld, c));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (dims > 1) {
|
|
|
|
|
/* Calculate the major/minor x and y indices. In order to
|
|
|
|
|
* accommodate both X and Y tiling, the Y-major tiling format is
|
|
|
|
|
* treated as being a bunch of narrow X-tiles placed next to each
|
|
|
|
|
* other. This means that the tile width for Y-tiling is actually
|
|
|
|
|
* the width of one sub-column of the Y-major tile where each 4K
|
|
|
|
|
* tile has 8 512B sub-columns.
|
|
|
|
|
*
|
|
|
|
|
* The major Y value is the row of tiles in which the pixel lives.
|
|
|
|
|
* The major X value is the tile sub-column in which the pixel
|
|
|
|
|
* lives; for X tiling, this is the same as the tile column, for Y
|
|
|
|
|
* tiling, each tile has 8 sub-columns. The minor X and Y indices
|
|
|
|
|
* are the position within the sub-column.
|
|
|
|
|
*/
|
|
|
|
|
for (unsigned c = 0; c < 2; ++c) {
|
|
|
|
|
/* Calculate the minor x and y indices. */
|
|
|
|
|
bld.BFE(offset(minor, bld, c), offset(tile, bld, c),
|
2015-11-02 11:26:16 -08:00
|
|
|
brw_imm_d(0), offset(addr, bld, c));
|
2015-04-22 16:44:18 +03:00
|
|
|
|
|
|
|
|
/* Calculate the major x and y indices. */
|
|
|
|
|
bld.SHR(offset(major, bld, c),
|
|
|
|
|
offset(addr, bld, c), offset(tile, bld, c));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Calculate the texel index from the start of the tile row and
|
|
|
|
|
* the vertical coordinate of the row.
|
|
|
|
|
* Equivalent to:
|
|
|
|
|
* tmp.x = (major.x << tile.y << tile.x) +
|
|
|
|
|
* (minor.y << tile.x) + minor.x
|
|
|
|
|
* tmp.y = major.y << tile.y
|
|
|
|
|
*/
|
|
|
|
|
bld.SHL(tmp, major, offset(tile, bld, 1));
|
|
|
|
|
bld.ADD(tmp, tmp, offset(minor, bld, 1));
|
|
|
|
|
bld.SHL(tmp, tmp, offset(tile, bld, 0));
|
|
|
|
|
bld.ADD(tmp, tmp, minor);
|
|
|
|
|
bld.SHL(offset(tmp, bld, 1),
|
|
|
|
|
offset(major, bld, 1), offset(tile, bld, 1));
|
|
|
|
|
|
|
|
|
|
/* Add it to the start of the tile row. */
|
|
|
|
|
bld.MUL(offset(tmp, bld, 1),
|
|
|
|
|
offset(tmp, bld, 1), offset(stride, bld, 1));
|
|
|
|
|
bld.ADD(tmp, tmp, offset(tmp, bld, 1));
|
|
|
|
|
|
|
|
|
|
/* Multiply by the Bpp value. */
|
|
|
|
|
bld.MUL(dst, tmp, stride);
|
|
|
|
|
|
|
|
|
|
if (devinfo->gen < 8 && !devinfo->is_baytrail) {
|
|
|
|
|
/* Take into account the two dynamically specified shifts.
|
|
|
|
|
* Both need are used to implement swizzling of X-tiled
|
|
|
|
|
* surfaces. For Y-tiled surfaces only one bit needs to be
|
|
|
|
|
* XOR-ed with bit 6 of the memory address, so a swz value of
|
|
|
|
|
* 0xff (actually interpreted as 31 by the hardware) will be
|
|
|
|
|
* provided to cause the relevant bit of tmp.y to be zero and
|
|
|
|
|
* turn the first XOR into the identity. For linear surfaces
|
|
|
|
|
* or platforms lacking address swizzling both shifts will be
|
|
|
|
|
* 0xff causing the relevant bits of both tmp.x and .y to be
|
|
|
|
|
* zero, what effectively disables swizzling.
|
|
|
|
|
*/
|
|
|
|
|
for (unsigned c = 0; c < 2; ++c)
|
|
|
|
|
bld.SHR(offset(tmp, bld, c), dst, offset(swz, bld, c));
|
|
|
|
|
|
|
|
|
|
/* XOR tmp.x and tmp.y with bit 6 of the memory address. */
|
|
|
|
|
bld.XOR(tmp, tmp, offset(tmp, bld, 1));
|
2015-11-02 11:26:16 -08:00
|
|
|
bld.AND(tmp, tmp, brw_imm_d(1 << 6));
|
2015-04-22 16:44:18 +03:00
|
|
|
bld.XOR(dst, dst, tmp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
/* Multiply by the Bpp/stride value. Note that the addr.y may be
|
|
|
|
|
* non-zero even if the image is one-dimensional because a
|
|
|
|
|
* vertical offset may have been applied above to select a
|
|
|
|
|
* non-zero slice or level of a higher-dimensional texture.
|
|
|
|
|
*/
|
|
|
|
|
bld.MUL(offset(addr, bld, 1),
|
|
|
|
|
offset(addr, bld, 1), offset(stride, bld, 1));
|
|
|
|
|
bld.ADD(addr, addr, offset(addr, bld, 1));
|
|
|
|
|
bld.MUL(dst, addr, stride);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return dst;
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-07-30 15:51:58 +03:00
|
|
|
|
|
|
|
|
namespace image_format_conversion {
|
|
|
|
|
using image_format_info::color_u;
|
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
/**
|
|
|
|
|
* Maximum representable value in an unsigned integer with the given
|
|
|
|
|
* number of bits.
|
|
|
|
|
*/
|
|
|
|
|
inline unsigned
|
|
|
|
|
scale(unsigned n)
|
|
|
|
|
{
|
|
|
|
|
return (1 << n) - 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Pack the vector \p src in a bitfield given the per-component bit
|
|
|
|
|
* shifts and widths. Note that bitfield components are not allowed to
|
|
|
|
|
* cross 32-bit boundaries.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static fs_reg
|
2015-07-30 15:51:58 +03:00
|
|
|
emit_pack(const fs_builder &bld, const fs_reg &src,
|
|
|
|
|
const color_u &shifts, const color_u &widths)
|
|
|
|
|
{
|
|
|
|
|
const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
|
|
|
|
|
bool seen[4] = {};
|
|
|
|
|
|
|
|
|
|
for (unsigned c = 0; c < 4; ++c) {
|
|
|
|
|
if (widths[c]) {
|
|
|
|
|
const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
|
|
|
/* Shift each component left to the correct bitfield position. */
|
2015-11-02 11:26:16 -08:00
|
|
|
bld.SHL(tmp, offset(src, bld, c), brw_imm_ud(shifts[c] % 32));
|
2015-07-30 15:51:58 +03:00
|
|
|
|
|
|
|
|
/* Add everything up. */
|
|
|
|
|
if (seen[shifts[c] / 32]) {
|
|
|
|
|
bld.OR(offset(dst, bld, shifts[c] / 32),
|
|
|
|
|
offset(dst, bld, shifts[c] / 32), tmp);
|
|
|
|
|
} else {
|
|
|
|
|
bld.MOV(offset(dst, bld, shifts[c] / 32), tmp);
|
|
|
|
|
seen[shifts[c] / 32] = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return dst;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Unpack a vector from the bitfield \p src given the per-component bit
|
|
|
|
|
* shifts and widths. Note that bitfield components are not allowed to
|
|
|
|
|
* cross 32-bit boundaries.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static fs_reg
|
2015-07-30 15:51:58 +03:00
|
|
|
emit_unpack(const fs_builder &bld, const fs_reg &src,
|
|
|
|
|
const color_u &shifts, const color_u &widths)
|
|
|
|
|
{
|
|
|
|
|
const fs_reg dst = bld.vgrf(src.type, 4);
|
|
|
|
|
|
|
|
|
|
for (unsigned c = 0; c < 4; ++c) {
|
|
|
|
|
if (widths[c]) {
|
|
|
|
|
/* Shift left to discard the most significant bits. */
|
|
|
|
|
bld.SHL(offset(dst, bld, c),
|
|
|
|
|
offset(src, bld, shifts[c] / 32),
|
2015-11-02 11:26:16 -08:00
|
|
|
brw_imm_ud(32 - shifts[c] % 32 - widths[c]));
|
2015-07-30 15:51:58 +03:00
|
|
|
|
|
|
|
|
/* Shift back to the least significant bits using an arithmetic
|
|
|
|
|
* shift to get sign extension on signed types.
|
|
|
|
|
*/
|
|
|
|
|
bld.ASR(offset(dst, bld, c),
|
2015-11-02 11:26:16 -08:00
|
|
|
offset(dst, bld, c), brw_imm_ud(32 - widths[c]));
|
2015-07-30 15:51:58 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return dst;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert an integer vector into another integer vector of the
|
|
|
|
|
* specified bit widths, properly handling overflow.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static fs_reg
|
2015-07-30 15:51:58 +03:00
|
|
|
emit_convert_to_integer(const fs_builder &bld, const fs_reg &src,
|
|
|
|
|
const color_u &widths, bool is_signed)
|
|
|
|
|
{
|
|
|
|
|
const unsigned s = (is_signed ? 1 : 0);
|
|
|
|
|
const fs_reg dst = bld.vgrf(
|
|
|
|
|
is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
|
|
|
|
|
assert(src.type == dst.type);
|
|
|
|
|
|
|
|
|
|
for (unsigned c = 0; c < 4; ++c) {
|
|
|
|
|
if (widths[c]) {
|
|
|
|
|
/* Clamp to the maximum value. */
|
|
|
|
|
bld.emit_minmax(offset(dst, bld, c), offset(src, bld, c),
|
2015-11-02 11:26:16 -08:00
|
|
|
brw_imm_d((int)scale(widths[c] - s)),
|
2015-07-30 15:51:58 +03:00
|
|
|
BRW_CONDITIONAL_L);
|
|
|
|
|
|
|
|
|
|
/* Clamp to the minimum value. */
|
|
|
|
|
if (is_signed)
|
|
|
|
|
bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
|
2015-11-02 11:26:16 -08:00
|
|
|
brw_imm_d(-(int)scale(widths[c] - s) - 1),
|
2015-08-28 17:10:00 -07:00
|
|
|
BRW_CONDITIONAL_GE);
|
2016-01-18 17:30:59 -08:00
|
|
|
|
|
|
|
|
/* Mask off all but the bits we actually want. Otherwise, if
|
|
|
|
|
* we pass a negative number into the hardware when it's
|
|
|
|
|
* expecting something like UINT8, it will happily clamp it to
|
|
|
|
|
* +255 for us.
|
|
|
|
|
*/
|
|
|
|
|
if (is_signed && widths[c] < 32)
|
|
|
|
|
bld.AND(offset(dst, bld, c), offset(dst, bld, c),
|
|
|
|
|
brw_imm_d(scale(widths[c])));
|
2015-07-30 15:51:58 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return dst;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert a normalized fixed-point vector of the specified signedness
|
|
|
|
|
* and bit widths into a floating point vector.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static fs_reg
|
2015-07-30 15:51:58 +03:00
|
|
|
emit_convert_from_scaled(const fs_builder &bld, const fs_reg &src,
|
|
|
|
|
const color_u &widths, bool is_signed)
|
|
|
|
|
{
|
|
|
|
|
const unsigned s = (is_signed ? 1 : 0);
|
|
|
|
|
const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
|
|
|
|
|
|
|
|
|
|
for (unsigned c = 0; c < 4; ++c) {
|
|
|
|
|
if (widths[c]) {
|
|
|
|
|
/* Convert to float. */
|
|
|
|
|
bld.MOV(offset(dst, bld, c), offset(src, bld, c));
|
|
|
|
|
|
|
|
|
|
/* Divide by the normalization constants. */
|
|
|
|
|
bld.MUL(offset(dst, bld, c), offset(dst, bld, c),
|
2015-11-02 11:26:16 -08:00
|
|
|
brw_imm_f(1.0f / scale(widths[c] - s)));
|
2015-07-30 15:51:58 +03:00
|
|
|
|
|
|
|
|
/* Clamp to the minimum value. */
|
|
|
|
|
if (is_signed)
|
|
|
|
|
bld.emit_minmax(offset(dst, bld, c),
|
2015-11-02 11:26:16 -08:00
|
|
|
offset(dst, bld, c), brw_imm_f(-1.0f),
|
2015-08-28 17:10:00 -07:00
|
|
|
BRW_CONDITIONAL_GE);
|
2015-07-30 15:51:58 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return dst;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert a floating-point vector into a normalized fixed-point vector
|
|
|
|
|
* of the specified signedness and bit widths.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static fs_reg
|
2015-07-30 15:51:58 +03:00
|
|
|
emit_convert_to_scaled(const fs_builder &bld, const fs_reg &src,
|
|
|
|
|
const color_u &widths, bool is_signed)
|
|
|
|
|
{
|
|
|
|
|
const unsigned s = (is_signed ? 1 : 0);
|
|
|
|
|
const fs_reg dst = bld.vgrf(
|
|
|
|
|
is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
|
|
|
|
|
const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
|
|
|
|
|
|
|
|
|
|
for (unsigned c = 0; c < 4; ++c) {
|
|
|
|
|
if (widths[c]) {
|
|
|
|
|
/* Clamp the normalized floating-point argument. */
|
|
|
|
|
if (is_signed) {
|
|
|
|
|
bld.emit_minmax(offset(fdst, bld, c), offset(src, bld, c),
|
2015-11-02 11:26:16 -08:00
|
|
|
brw_imm_f(-1.0f), BRW_CONDITIONAL_GE);
|
2015-07-30 15:51:58 +03:00
|
|
|
|
|
|
|
|
bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
|
2015-11-02 11:26:16 -08:00
|
|
|
brw_imm_f(1.0f), BRW_CONDITIONAL_L);
|
2015-07-30 15:51:58 +03:00
|
|
|
} else {
|
|
|
|
|
set_saturate(true, bld.MOV(offset(fdst, bld, c),
|
|
|
|
|
offset(src, bld, c)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Multiply by the normalization constants. */
|
|
|
|
|
bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c),
|
2015-11-02 11:26:16 -08:00
|
|
|
brw_imm_f((float)scale(widths[c] - s)));
|
2015-07-30 15:51:58 +03:00
|
|
|
|
|
|
|
|
/* Convert to integer. */
|
|
|
|
|
bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c));
|
|
|
|
|
bld.MOV(offset(dst, bld, c), offset(fdst, bld, c));
|
2016-01-18 17:30:59 -08:00
|
|
|
|
|
|
|
|
/* Mask off all but the bits we actually want. Otherwise, if
|
|
|
|
|
* we pass a negative number into the hardware when it's
|
|
|
|
|
* expecting something like UINT8, it will happily clamp it to
|
|
|
|
|
* +255 for us.
|
|
|
|
|
*/
|
|
|
|
|
if (is_signed && widths[c] < 32)
|
|
|
|
|
bld.AND(offset(dst, bld, c), offset(dst, bld, c),
|
|
|
|
|
brw_imm_d(scale(widths[c])));
|
2015-07-30 15:51:58 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return dst;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert a floating point vector of the specified bit widths into a
|
|
|
|
|
* 32-bit floating point vector.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static fs_reg
|
2015-07-30 15:51:58 +03:00
|
|
|
emit_convert_from_float(const fs_builder &bld, const fs_reg &src,
|
|
|
|
|
const color_u &widths)
|
|
|
|
|
{
|
|
|
|
|
const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
|
|
|
|
|
const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
|
|
|
|
|
|
|
|
|
|
for (unsigned c = 0; c < 4; ++c) {
|
|
|
|
|
if (widths[c]) {
|
|
|
|
|
bld.MOV(offset(dst, bld, c), offset(src, bld, c));
|
|
|
|
|
|
|
|
|
|
/* Extend 10-bit and 11-bit floating point numbers to 15 bits.
|
|
|
|
|
* This works because they have a 5-bit exponent just like the
|
|
|
|
|
* 16-bit floating point format, and they have no sign bit.
|
|
|
|
|
*/
|
|
|
|
|
if (widths[c] < 16)
|
|
|
|
|
bld.SHL(offset(dst, bld, c),
|
2015-11-02 11:26:16 -08:00
|
|
|
offset(dst, bld, c), brw_imm_ud(15 - widths[c]));
|
2015-07-30 15:51:58 +03:00
|
|
|
|
|
|
|
|
/* Convert to 32-bit floating point. */
|
|
|
|
|
bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return fdst;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert a vector into a floating point vector of the specified bit
|
|
|
|
|
* widths.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static fs_reg
|
2015-07-30 15:51:58 +03:00
|
|
|
emit_convert_to_float(const fs_builder &bld, const fs_reg &src,
|
|
|
|
|
const color_u &widths)
|
|
|
|
|
{
|
|
|
|
|
const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
|
|
|
|
|
const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
|
|
|
|
|
|
|
|
|
|
for (unsigned c = 0; c < 4; ++c) {
|
|
|
|
|
if (widths[c]) {
|
|
|
|
|
bld.MOV(offset(fdst, bld, c), offset(src, bld, c));
|
|
|
|
|
|
|
|
|
|
/* Clamp to the minimum value. */
|
|
|
|
|
if (widths[c] < 16)
|
|
|
|
|
bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
|
2015-11-02 11:26:16 -08:00
|
|
|
brw_imm_f(0.0f), BRW_CONDITIONAL_GE);
|
2015-07-30 15:51:58 +03:00
|
|
|
|
|
|
|
|
/* Convert to 16-bit floating-point. */
|
|
|
|
|
bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c));
|
|
|
|
|
|
|
|
|
|
/* Discard the least significant bits to get floating point
|
|
|
|
|
* numbers of the requested width. This works because the
|
|
|
|
|
* 10-bit and 11-bit floating point formats have a 5-bit
|
|
|
|
|
* exponent just like the 16-bit format, and they have no sign
|
|
|
|
|
* bit.
|
|
|
|
|
*/
|
|
|
|
|
if (widths[c] < 16)
|
|
|
|
|
bld.SHR(offset(dst, bld, c), offset(dst, bld, c),
|
2015-11-02 11:26:16 -08:00
|
|
|
brw_imm_ud(15 - widths[c]));
|
2015-07-30 15:51:58 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return dst;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Fill missing components of a vector with 0, 0, 0, 1.
|
|
|
|
|
*/
|
2017-07-09 14:11:02 -07:00
|
|
|
static fs_reg
|
2015-07-30 15:51:58 +03:00
|
|
|
emit_pad(const fs_builder &bld, const fs_reg &src,
|
|
|
|
|
const color_u &widths)
|
|
|
|
|
{
|
|
|
|
|
const fs_reg dst = bld.vgrf(src.type, 4);
|
|
|
|
|
const unsigned pad[] = { 0, 0, 0, 1 };
|
|
|
|
|
|
|
|
|
|
for (unsigned c = 0; c < 4; ++c)
|
|
|
|
|
bld.MOV(offset(dst, bld, c),
|
2015-11-02 11:26:16 -08:00
|
|
|
widths[c] ? offset(src, bld, c)
|
|
|
|
|
: fs_reg(brw_imm_ud(pad[c])));
|
2015-07-30 15:51:58 +03:00
|
|
|
|
|
|
|
|
return dst;
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-04-22 16:43:51 +03:00
|
|
|
}
|
2015-07-30 15:46:40 +03:00
|
|
|
|
|
|
|
|
namespace brw {
|
|
|
|
|
namespace image_access {
|
|
|
|
|
/**
|
|
|
|
|
* Load a vector from a surface of the given format and dimensionality
|
|
|
|
|
* at the given coordinates. \p surf_dims and \p arr_dims give the
|
|
|
|
|
* number of non-array and array coordinates of the image respectively.
|
|
|
|
|
*/
|
|
|
|
|
fs_reg
|
|
|
|
|
emit_image_load(const fs_builder &bld,
|
|
|
|
|
const fs_reg &image, const fs_reg &addr,
|
|
|
|
|
unsigned surf_dims, unsigned arr_dims,
|
2016-04-15 16:36:59 -07:00
|
|
|
unsigned gl_format)
|
2015-07-30 15:46:40 +03:00
|
|
|
{
|
|
|
|
|
using namespace image_format_info;
|
|
|
|
|
using namespace image_format_conversion;
|
|
|
|
|
using namespace image_validity;
|
|
|
|
|
using namespace image_coordinates;
|
|
|
|
|
using namespace surface_access;
|
2016-08-22 15:01:08 -07:00
|
|
|
const gen_device_info *devinfo = bld.shader->devinfo;
|
2016-04-15 21:55:02 -07:00
|
|
|
const isl_format format = isl_format_for_gl_format(gl_format);
|
|
|
|
|
const isl_format lower_format =
|
|
|
|
|
isl_lower_storage_image_format(devinfo, format);
|
2015-07-30 15:46:40 +03:00
|
|
|
fs_reg tmp;
|
|
|
|
|
|
|
|
|
|
/* Transform the image coordinates into actual surface coordinates. */
|
|
|
|
|
const fs_reg saddr =
|
|
|
|
|
emit_image_coordinates(bld, addr, surf_dims, arr_dims, format);
|
|
|
|
|
const unsigned dims =
|
|
|
|
|
num_image_coordinates(bld, surf_dims, arr_dims, format);
|
|
|
|
|
|
2016-04-15 21:55:02 -07:00
|
|
|
if (isl_has_matching_typed_storage_image_format(devinfo, format)) {
|
2015-07-30 15:46:40 +03:00
|
|
|
/* Hopefully we get here most of the time... */
|
|
|
|
|
tmp = emit_typed_read(bld, image, saddr, dims,
|
2016-04-15 21:55:02 -07:00
|
|
|
isl_format_get_num_channels(lower_format));
|
2015-07-30 15:46:40 +03:00
|
|
|
} else {
|
|
|
|
|
/* Untyped surface reads return 32 bits of the surface per
|
|
|
|
|
* component, without any sort of unpacking or type conversion,
|
|
|
|
|
*/
|
2016-07-08 22:10:11 -07:00
|
|
|
const unsigned size = isl_format_get_layout(format)->bpb / 32;
|
2015-07-30 15:46:40 +03:00
|
|
|
/* they don't properly handle out of bounds access, so we have to
|
|
|
|
|
* check manually if the coordinates are valid and predicate the
|
|
|
|
|
* surface read on the result,
|
|
|
|
|
*/
|
|
|
|
|
const brw_predicate pred =
|
2015-08-26 21:59:46 +03:00
|
|
|
emit_untyped_image_check(bld, image,
|
|
|
|
|
emit_bounds_check(bld, image,
|
|
|
|
|
saddr, dims));
|
2015-07-30 15:46:40 +03:00
|
|
|
|
|
|
|
|
/* and they don't know about surface coordinates, we need to
|
|
|
|
|
* convert them to a raw memory offset.
|
|
|
|
|
*/
|
|
|
|
|
const fs_reg laddr = emit_address_calculation(bld, image, saddr, dims);
|
|
|
|
|
|
|
|
|
|
tmp = emit_untyped_read(bld, image, laddr, 1, size, pred);
|
|
|
|
|
|
|
|
|
|
/* An out of bounds surface access should give zero as result. */
|
2015-09-14 15:36:24 -07:00
|
|
|
for (unsigned c = 0; c < size; ++c)
|
2015-07-30 15:46:40 +03:00
|
|
|
set_predicate(pred, bld.SEL(offset(tmp, bld, c),
|
2015-11-02 11:26:16 -08:00
|
|
|
offset(tmp, bld, c), brw_imm_d(0)));
|
2015-07-30 15:46:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Set the register type to D instead of UD if the data type is
|
|
|
|
|
* represented as a signed integer in memory so that sign extension
|
|
|
|
|
* is handled correctly by unpack.
|
|
|
|
|
*/
|
|
|
|
|
if (needs_sign_extension(format))
|
|
|
|
|
tmp = retype(tmp, BRW_REGISTER_TYPE_D);
|
|
|
|
|
|
|
|
|
|
if (!has_supported_bit_layout(devinfo, format)) {
|
|
|
|
|
/* Unpack individual vector components from the bitfield if the
|
|
|
|
|
* hardware is unable to do it for us.
|
|
|
|
|
*/
|
|
|
|
|
if (has_split_bit_layout(devinfo, format))
|
|
|
|
|
tmp = emit_pack(bld, tmp, get_bit_shifts(lower_format),
|
|
|
|
|
get_bit_widths(lower_format));
|
|
|
|
|
else
|
|
|
|
|
tmp = emit_unpack(bld, tmp, get_bit_shifts(format),
|
|
|
|
|
get_bit_widths(format));
|
|
|
|
|
|
|
|
|
|
} else if ((needs_sign_extension(format) &&
|
|
|
|
|
!is_conversion_trivial(devinfo, format)) ||
|
|
|
|
|
has_undefined_high_bits(devinfo, format)) {
|
|
|
|
|
/* Perform a trivial unpack even though the bit layout matches in
|
|
|
|
|
* order to get the most significant bits of each component
|
|
|
|
|
* initialized properly.
|
|
|
|
|
*/
|
|
|
|
|
tmp = emit_unpack(bld, tmp, color_u(0, 32, 64, 96),
|
|
|
|
|
get_bit_widths(format));
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-15 21:55:02 -07:00
|
|
|
if (!isl_format_has_int_channel(format)) {
|
2015-07-30 15:46:40 +03:00
|
|
|
if (is_conversion_trivial(devinfo, format)) {
|
|
|
|
|
/* Just need to cast the vector to the target type. */
|
|
|
|
|
tmp = retype(tmp, BRW_REGISTER_TYPE_F);
|
|
|
|
|
} else {
|
|
|
|
|
/* Do the right sort of type conversion to float. */
|
2016-04-15 21:55:02 -07:00
|
|
|
if (isl_format_has_float_channel(format))
|
2015-07-30 15:46:40 +03:00
|
|
|
tmp = emit_convert_from_float(
|
|
|
|
|
bld, tmp, get_bit_widths(format));
|
|
|
|
|
else
|
|
|
|
|
tmp = emit_convert_from_scaled(
|
|
|
|
|
bld, tmp, get_bit_widths(format),
|
2016-04-15 21:55:02 -07:00
|
|
|
isl_format_has_snorm_channel(format));
|
2015-07-30 15:46:40 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Initialize missing components of the result. */
|
|
|
|
|
return emit_pad(bld, tmp, get_bit_widths(format));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Store a vector in a surface of the given format and dimensionality at
|
|
|
|
|
* the given coordinates. \p surf_dims and \p arr_dims give the number
|
|
|
|
|
* of non-array and array coordinates of the image respectively.
|
|
|
|
|
*/
|
|
|
|
|
void
|
|
|
|
|
emit_image_store(const fs_builder &bld, const fs_reg &image,
|
|
|
|
|
const fs_reg &addr, const fs_reg &src,
|
|
|
|
|
unsigned surf_dims, unsigned arr_dims,
|
2016-04-15 16:36:59 -07:00
|
|
|
unsigned gl_format)
|
2015-07-30 15:46:40 +03:00
|
|
|
{
|
|
|
|
|
using namespace image_format_info;
|
|
|
|
|
using namespace image_format_conversion;
|
|
|
|
|
using namespace image_validity;
|
|
|
|
|
using namespace image_coordinates;
|
|
|
|
|
using namespace surface_access;
|
2016-04-15 21:55:02 -07:00
|
|
|
const isl_format format = isl_format_for_gl_format(gl_format);
|
2016-08-22 15:01:08 -07:00
|
|
|
const gen_device_info *devinfo = bld.shader->devinfo;
|
2015-07-30 15:46:40 +03:00
|
|
|
|
|
|
|
|
/* Transform the image coordinates into actual surface coordinates. */
|
|
|
|
|
const fs_reg saddr =
|
|
|
|
|
emit_image_coordinates(bld, addr, surf_dims, arr_dims, format);
|
|
|
|
|
const unsigned dims =
|
|
|
|
|
num_image_coordinates(bld, surf_dims, arr_dims, format);
|
|
|
|
|
|
2016-04-15 21:55:02 -07:00
|
|
|
if (gl_format == GL_NONE) {
|
2015-07-30 15:46:40 +03:00
|
|
|
/* We don't know what the format is, but that's fine because it
|
|
|
|
|
* implies write-only access, and typed surface writes are always
|
|
|
|
|
* able to take care of type conversion and packing for us.
|
|
|
|
|
*/
|
|
|
|
|
emit_typed_write(bld, image, saddr, src, dims, 4);
|
|
|
|
|
|
|
|
|
|
} else {
|
2016-04-15 21:55:02 -07:00
|
|
|
const isl_format lower_format =
|
|
|
|
|
isl_lower_storage_image_format(devinfo, format);
|
2015-07-30 15:46:40 +03:00
|
|
|
fs_reg tmp = src;
|
|
|
|
|
|
|
|
|
|
if (!is_conversion_trivial(devinfo, format)) {
|
|
|
|
|
/* Do the right sort of type conversion. */
|
2016-04-15 21:55:02 -07:00
|
|
|
if (isl_format_has_float_channel(format))
|
2015-07-30 15:46:40 +03:00
|
|
|
tmp = emit_convert_to_float(bld, tmp, get_bit_widths(format));
|
|
|
|
|
|
2016-04-15 21:55:02 -07:00
|
|
|
else if (isl_format_has_int_channel(format))
|
2015-07-30 15:46:40 +03:00
|
|
|
tmp = emit_convert_to_integer(bld, tmp, get_bit_widths(format),
|
2016-04-15 21:55:02 -07:00
|
|
|
isl_format_has_sint_channel(format));
|
2015-07-30 15:46:40 +03:00
|
|
|
|
|
|
|
|
else
|
|
|
|
|
tmp = emit_convert_to_scaled(bld, tmp, get_bit_widths(format),
|
2016-04-15 21:55:02 -07:00
|
|
|
isl_format_has_snorm_channel(format));
|
2015-07-30 15:46:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* We're down to bit manipulation at this point. */
|
|
|
|
|
tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
|
|
|
|
|
|
|
|
|
|
if (!has_supported_bit_layout(devinfo, format)) {
|
|
|
|
|
/* Pack the vector components into a bitfield if the hardware
|
|
|
|
|
* is unable to do it for us.
|
|
|
|
|
*/
|
|
|
|
|
if (has_split_bit_layout(devinfo, format))
|
|
|
|
|
tmp = emit_unpack(bld, tmp, get_bit_shifts(lower_format),
|
|
|
|
|
get_bit_widths(lower_format));
|
|
|
|
|
|
|
|
|
|
else
|
|
|
|
|
tmp = emit_pack(bld, tmp, get_bit_shifts(format),
|
|
|
|
|
get_bit_widths(format));
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-15 21:55:02 -07:00
|
|
|
if (isl_has_matching_typed_storage_image_format(devinfo, format)) {
|
2015-07-30 15:46:40 +03:00
|
|
|
/* Hopefully we get here most of the time... */
|
|
|
|
|
emit_typed_write(bld, image, saddr, tmp, dims,
|
2016-04-15 21:55:02 -07:00
|
|
|
isl_format_get_num_channels(lower_format));
|
2015-07-30 15:46:40 +03:00
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
/* Untyped surface writes store 32 bits of the surface per
|
|
|
|
|
* component, without any sort of packing or type conversion,
|
|
|
|
|
*/
|
2016-07-08 22:10:11 -07:00
|
|
|
const unsigned size = isl_format_get_layout(format)->bpb / 32;
|
2015-07-30 15:46:40 +03:00
|
|
|
|
|
|
|
|
/* they don't properly handle out of bounds access, so we have
|
|
|
|
|
* to check manually if the coordinates are valid and predicate
|
|
|
|
|
* the surface write on the result,
|
|
|
|
|
*/
|
|
|
|
|
const brw_predicate pred =
|
2015-08-26 21:59:46 +03:00
|
|
|
emit_untyped_image_check(bld, image,
|
|
|
|
|
emit_bounds_check(bld, image,
|
|
|
|
|
saddr, dims));
|
2015-07-30 15:46:40 +03:00
|
|
|
|
|
|
|
|
/* and, phew, they don't know about surface coordinates, we
|
|
|
|
|
* need to convert them to a raw memory offset.
|
|
|
|
|
*/
|
|
|
|
|
const fs_reg laddr = emit_address_calculation(
|
|
|
|
|
bld, image, saddr, dims);
|
|
|
|
|
|
|
|
|
|
emit_untyped_write(bld, image, laddr, tmp, 1, size, pred);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Perform an atomic read-modify-write operation in a surface of the
|
|
|
|
|
* given dimensionality at the given coordinates. \p surf_dims and \p
|
|
|
|
|
* arr_dims give the number of non-array and array coordinates of the
|
|
|
|
|
* image respectively. Main building block of the imageAtomic GLSL
|
|
|
|
|
* built-ins.
|
|
|
|
|
*/
|
|
|
|
|
fs_reg
|
|
|
|
|
emit_image_atomic(const fs_builder &bld,
|
|
|
|
|
const fs_reg &image, const fs_reg &addr,
|
|
|
|
|
const fs_reg &src0, const fs_reg &src1,
|
|
|
|
|
unsigned surf_dims, unsigned arr_dims,
|
|
|
|
|
unsigned rsize, unsigned op)
|
|
|
|
|
{
|
|
|
|
|
using namespace image_validity;
|
|
|
|
|
using namespace image_coordinates;
|
|
|
|
|
using namespace surface_access;
|
|
|
|
|
/* Avoid performing an atomic operation on an unbound surface. */
|
2015-08-26 21:59:46 +03:00
|
|
|
const brw_predicate pred = emit_typed_atomic_check(bld, image);
|
2015-07-30 15:46:40 +03:00
|
|
|
|
|
|
|
|
/* Transform the image coordinates into actual surface coordinates. */
|
|
|
|
|
const fs_reg saddr =
|
|
|
|
|
emit_image_coordinates(bld, addr, surf_dims, arr_dims,
|
2016-04-15 21:55:02 -07:00
|
|
|
ISL_FORMAT_R32_UINT);
|
2015-07-30 15:46:40 +03:00
|
|
|
const unsigned dims =
|
|
|
|
|
num_image_coordinates(bld, surf_dims, arr_dims,
|
2016-04-15 21:55:02 -07:00
|
|
|
ISL_FORMAT_R32_UINT);
|
2015-07-30 15:46:40 +03:00
|
|
|
|
|
|
|
|
/* Thankfully we can do without untyped atomics here. */
|
|
|
|
|
const fs_reg tmp = emit_typed_atomic(bld, image, saddr, src0, src1,
|
|
|
|
|
dims, rsize, op, pred);
|
|
|
|
|
|
|
|
|
|
/* An unbound surface access should give zero as result. */
|
2016-02-06 18:43:45 -08:00
|
|
|
if (rsize && pred)
|
2015-11-02 11:26:16 -08:00
|
|
|
set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0)));
|
2015-07-30 15:46:40 +03:00
|
|
|
|
2015-08-17 01:52:19 +03:00
|
|
|
return retype(tmp, src0.type);
|
2015-07-30 15:46:40 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|