mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-21 09:50:36 +02:00
amd/common: switch to 3-spaces style
Follow-up of !4319 using the same clang-format config. Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Acked-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5310>
This commit is contained in:
parent
82d2d73e03
commit
e5fb9dca2a
22 changed files with 7198 additions and 7379 deletions
|
|
@ -1,3 +0,0 @@
|
|||
[*.{c,h}]
|
||||
indent_style = tab
|
||||
indent_size = tab
|
||||
|
|
@ -21,132 +21,129 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "ac_gpu_info.h"
|
||||
#include "ac_binary.h"
|
||||
|
||||
#include "ac_gpu_info.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_memory.h"
|
||||
|
||||
#include <gelf.h>
|
||||
#include <libelf.h>
|
||||
#include <sid.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <sid.h>
|
||||
|
||||
#define SPILLED_SGPRS 0x4
|
||||
#define SPILLED_VGPRS 0x8
|
||||
#define SPILLED_SGPRS 0x4
|
||||
#define SPILLED_VGPRS 0x8
|
||||
|
||||
/* Parse configuration data in .AMDGPU.config section format. */
|
||||
void ac_parse_shader_binary_config(const char *data, size_t nbytes,
|
||||
unsigned wave_size,
|
||||
bool really_needs_scratch,
|
||||
const struct radeon_info *info,
|
||||
struct ac_shader_config *conf)
|
||||
void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wave_size,
|
||||
bool really_needs_scratch, const struct radeon_info *info,
|
||||
struct ac_shader_config *conf)
|
||||
{
|
||||
uint32_t scratch_size = 0;
|
||||
uint32_t scratch_size = 0;
|
||||
|
||||
for (size_t i = 0; i < nbytes; i += 8) {
|
||||
unsigned reg = util_le32_to_cpu(*(uint32_t*)(data + i));
|
||||
unsigned value = util_le32_to_cpu(*(uint32_t*)(data + i + 4));
|
||||
switch (reg) {
|
||||
case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
|
||||
case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
|
||||
case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
|
||||
case R_00B848_COMPUTE_PGM_RSRC1:
|
||||
case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
|
||||
if (wave_size == 32)
|
||||
conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 8);
|
||||
else
|
||||
conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
|
||||
for (size_t i = 0; i < nbytes; i += 8) {
|
||||
unsigned reg = util_le32_to_cpu(*(uint32_t *)(data + i));
|
||||
unsigned value = util_le32_to_cpu(*(uint32_t *)(data + i + 4));
|
||||
switch (reg) {
|
||||
case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
|
||||
case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
|
||||
case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
|
||||
case R_00B848_COMPUTE_PGM_RSRC1:
|
||||
case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
|
||||
if (wave_size == 32)
|
||||
conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 8);
|
||||
else
|
||||
conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
|
||||
|
||||
conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
|
||||
/* TODO: LLVM doesn't set FLOAT_MODE for non-compute shaders */
|
||||
conf->float_mode = G_00B028_FLOAT_MODE(value);
|
||||
conf->rsrc1 = value;
|
||||
break;
|
||||
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
|
||||
conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
|
||||
/* TODO: LLVM doesn't set SHARED_VGPR_CNT for all shader types */
|
||||
conf->num_shared_vgprs = G_00B02C_SHARED_VGPR_CNT(value);
|
||||
conf->rsrc2 = value;
|
||||
break;
|
||||
case R_00B12C_SPI_SHADER_PGM_RSRC2_VS:
|
||||
conf->num_shared_vgprs = G_00B12C_SHARED_VGPR_CNT(value);
|
||||
conf->rsrc2 = value;
|
||||
break;
|
||||
case R_00B22C_SPI_SHADER_PGM_RSRC2_GS:
|
||||
conf->num_shared_vgprs = G_00B22C_SHARED_VGPR_CNT(value);
|
||||
conf->rsrc2 = value;
|
||||
break;
|
||||
case R_00B42C_SPI_SHADER_PGM_RSRC2_HS:
|
||||
conf->num_shared_vgprs = G_00B42C_SHARED_VGPR_CNT(value);
|
||||
conf->rsrc2 = value;
|
||||
break;
|
||||
case R_00B84C_COMPUTE_PGM_RSRC2:
|
||||
conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
|
||||
conf->rsrc2 = value;
|
||||
break;
|
||||
case R_00B8A0_COMPUTE_PGM_RSRC3:
|
||||
conf->num_shared_vgprs = G_00B8A0_SHARED_VGPR_CNT(value);
|
||||
conf->rsrc3 = value;
|
||||
break;
|
||||
case R_0286CC_SPI_PS_INPUT_ENA:
|
||||
conf->spi_ps_input_ena = value;
|
||||
break;
|
||||
case R_0286D0_SPI_PS_INPUT_ADDR:
|
||||
conf->spi_ps_input_addr = value;
|
||||
break;
|
||||
case R_0286E8_SPI_TMPRING_SIZE:
|
||||
case R_00B860_COMPUTE_TMPRING_SIZE:
|
||||
/* WAVESIZE is in units of 256 dwords. */
|
||||
scratch_size = value;
|
||||
break;
|
||||
case SPILLED_SGPRS:
|
||||
conf->spilled_sgprs = value;
|
||||
break;
|
||||
case SPILLED_VGPRS:
|
||||
conf->spilled_vgprs = value;
|
||||
break;
|
||||
default:
|
||||
{
|
||||
static bool printed;
|
||||
conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
|
||||
/* TODO: LLVM doesn't set FLOAT_MODE for non-compute shaders */
|
||||
conf->float_mode = G_00B028_FLOAT_MODE(value);
|
||||
conf->rsrc1 = value;
|
||||
break;
|
||||
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
|
||||
conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
|
||||
/* TODO: LLVM doesn't set SHARED_VGPR_CNT for all shader types */
|
||||
conf->num_shared_vgprs = G_00B02C_SHARED_VGPR_CNT(value);
|
||||
conf->rsrc2 = value;
|
||||
break;
|
||||
case R_00B12C_SPI_SHADER_PGM_RSRC2_VS:
|
||||
conf->num_shared_vgprs = G_00B12C_SHARED_VGPR_CNT(value);
|
||||
conf->rsrc2 = value;
|
||||
break;
|
||||
case R_00B22C_SPI_SHADER_PGM_RSRC2_GS:
|
||||
conf->num_shared_vgprs = G_00B22C_SHARED_VGPR_CNT(value);
|
||||
conf->rsrc2 = value;
|
||||
break;
|
||||
case R_00B42C_SPI_SHADER_PGM_RSRC2_HS:
|
||||
conf->num_shared_vgprs = G_00B42C_SHARED_VGPR_CNT(value);
|
||||
conf->rsrc2 = value;
|
||||
break;
|
||||
case R_00B84C_COMPUTE_PGM_RSRC2:
|
||||
conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
|
||||
conf->rsrc2 = value;
|
||||
break;
|
||||
case R_00B8A0_COMPUTE_PGM_RSRC3:
|
||||
conf->num_shared_vgprs = G_00B8A0_SHARED_VGPR_CNT(value);
|
||||
conf->rsrc3 = value;
|
||||
break;
|
||||
case R_0286CC_SPI_PS_INPUT_ENA:
|
||||
conf->spi_ps_input_ena = value;
|
||||
break;
|
||||
case R_0286D0_SPI_PS_INPUT_ADDR:
|
||||
conf->spi_ps_input_addr = value;
|
||||
break;
|
||||
case R_0286E8_SPI_TMPRING_SIZE:
|
||||
case R_00B860_COMPUTE_TMPRING_SIZE:
|
||||
/* WAVESIZE is in units of 256 dwords. */
|
||||
scratch_size = value;
|
||||
break;
|
||||
case SPILLED_SGPRS:
|
||||
conf->spilled_sgprs = value;
|
||||
break;
|
||||
case SPILLED_VGPRS:
|
||||
conf->spilled_vgprs = value;
|
||||
break;
|
||||
default: {
|
||||
static bool printed;
|
||||
|
||||
if (!printed) {
|
||||
fprintf(stderr, "Warning: LLVM emitted unknown "
|
||||
"config register: 0x%x\n", reg);
|
||||
printed = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!printed) {
|
||||
fprintf(stderr,
|
||||
"Warning: LLVM emitted unknown "
|
||||
"config register: 0x%x\n",
|
||||
reg);
|
||||
printed = true;
|
||||
}
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!conf->spi_ps_input_addr)
|
||||
conf->spi_ps_input_addr = conf->spi_ps_input_ena;
|
||||
if (!conf->spi_ps_input_addr)
|
||||
conf->spi_ps_input_addr = conf->spi_ps_input_ena;
|
||||
|
||||
if (really_needs_scratch) {
|
||||
/* sgprs spills aren't spilling */
|
||||
conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4;
|
||||
}
|
||||
if (really_needs_scratch) {
|
||||
/* sgprs spills aren't spilling */
|
||||
conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4;
|
||||
}
|
||||
|
||||
/* GFX 10.3 internally:
|
||||
* - aligns VGPRS to 16 for Wave32 and 8 for Wave64
|
||||
* - aligns LDS to 1024
|
||||
*
|
||||
* For shader-db stats, set num_vgprs that the hw actually uses.
|
||||
*/
|
||||
if (info->chip_class >= GFX10_3) {
|
||||
conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8);
|
||||
}
|
||||
/* GFX 10.3 internally:
|
||||
* - aligns VGPRS to 16 for Wave32 and 8 for Wave64
|
||||
* - aligns LDS to 1024
|
||||
*
|
||||
* For shader-db stats, set num_vgprs that the hw actually uses.
|
||||
*/
|
||||
if (info->chip_class >= GFX10_3) {
|
||||
conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8);
|
||||
}
|
||||
|
||||
/* Enable 64-bit and 16-bit denormals, because there is no performance
|
||||
* cost.
|
||||
*
|
||||
* Don't enable denormals for 32-bit floats, because:
|
||||
* - denormals disable output modifiers
|
||||
* - denormals break v_mad_f32
|
||||
* - GFX6 & GFX7 would be very slow
|
||||
*/
|
||||
conf->float_mode &= ~V_00B028_FP_ALL_DENORMS;
|
||||
conf->float_mode |= V_00B028_FP_64_DENORMS;
|
||||
/* Enable 64-bit and 16-bit denormals, because there is no performance
|
||||
* cost.
|
||||
*
|
||||
* Don't enable denormals for 32-bit floats, because:
|
||||
* - denormals disable output modifiers
|
||||
* - denormals break v_mad_f32
|
||||
* - GFX6 & GFX7 would be very slow
|
||||
*/
|
||||
conf->float_mode &= ~V_00B028_FP_ALL_DENORMS;
|
||||
conf->float_mode |= V_00B028_FP_64_DENORMS;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,9 +24,9 @@
|
|||
#ifndef AC_BINARY_H
|
||||
#define AC_BINARY_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
@ -35,26 +35,24 @@ extern "C" {
|
|||
struct radeon_info;
|
||||
|
||||
struct ac_shader_config {
|
||||
unsigned num_sgprs;
|
||||
unsigned num_vgprs;
|
||||
unsigned num_shared_vgprs; /* GFX10: number of VGPRs shared between half-waves */
|
||||
unsigned spilled_sgprs;
|
||||
unsigned spilled_vgprs;
|
||||
unsigned lds_size; /* in HW allocation units; i.e 256 bytes on SI, 512 bytes on CI+ */
|
||||
unsigned spi_ps_input_ena;
|
||||
unsigned spi_ps_input_addr;
|
||||
unsigned float_mode;
|
||||
unsigned scratch_bytes_per_wave;
|
||||
unsigned rsrc1;
|
||||
unsigned rsrc2;
|
||||
unsigned rsrc3;
|
||||
unsigned num_sgprs;
|
||||
unsigned num_vgprs;
|
||||
unsigned num_shared_vgprs; /* GFX10: number of VGPRs shared between half-waves */
|
||||
unsigned spilled_sgprs;
|
||||
unsigned spilled_vgprs;
|
||||
unsigned lds_size; /* in HW allocation units; i.e 256 bytes on SI, 512 bytes on CI+ */
|
||||
unsigned spi_ps_input_ena;
|
||||
unsigned spi_ps_input_addr;
|
||||
unsigned float_mode;
|
||||
unsigned scratch_bytes_per_wave;
|
||||
unsigned rsrc1;
|
||||
unsigned rsrc2;
|
||||
unsigned rsrc3;
|
||||
};
|
||||
|
||||
void ac_parse_shader_binary_config(const char *data, size_t nbytes,
|
||||
unsigned wave_size,
|
||||
bool really_needs_scratch,
|
||||
const struct radeon_info *info,
|
||||
struct ac_shader_config *conf);
|
||||
void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wave_size,
|
||||
bool really_needs_scratch, const struct radeon_info *info,
|
||||
struct ac_shader_config *conf);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -24,15 +24,15 @@
|
|||
#ifndef AC_DEBUG_H
|
||||
#define AC_DEBUG_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "amd_family.h"
|
||||
|
||||
#define AC_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id) & 0xffff))
|
||||
#define AC_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000)
|
||||
#define AC_GET_TRACE_POINT_ID(x) ((x) & 0xffff)
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define AC_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id)&0xffff))
|
||||
#define AC_IS_TRACE_POINT(x) (((x)&0xcafe0000) == 0xcafe0000)
|
||||
#define AC_GET_TRACE_POINT_ID(x) ((x)&0xffff)
|
||||
|
||||
#define AC_MAX_WAVES_PER_CHIP (64 * 40)
|
||||
|
||||
|
|
@ -41,36 +41,36 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
struct ac_wave_info {
|
||||
unsigned se; /* shader engine */
|
||||
unsigned sh; /* shader array */
|
||||
unsigned cu; /* compute unit */
|
||||
unsigned simd;
|
||||
unsigned wave;
|
||||
uint32_t status;
|
||||
uint64_t pc; /* program counter */
|
||||
uint32_t inst_dw0;
|
||||
uint32_t inst_dw1;
|
||||
uint64_t exec;
|
||||
bool matched; /* whether the wave is used by a currently-bound shader */
|
||||
unsigned se; /* shader engine */
|
||||
unsigned sh; /* shader array */
|
||||
unsigned cu; /* compute unit */
|
||||
unsigned simd;
|
||||
unsigned wave;
|
||||
uint32_t status;
|
||||
uint64_t pc; /* program counter */
|
||||
uint32_t inst_dw0;
|
||||
uint32_t inst_dw1;
|
||||
uint64_t exec;
|
||||
bool matched; /* whether the wave is used by a currently-bound shader */
|
||||
};
|
||||
|
||||
typedef void *(*ac_debug_addr_callback)(void *data, uint64_t addr);
|
||||
|
||||
const char *ac_get_register_name(enum chip_class chip_class, unsigned offset);
|
||||
void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset,
|
||||
uint32_t value, uint32_t field_mask);
|
||||
void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset, uint32_t value,
|
||||
uint32_t field_mask);
|
||||
void ac_parse_ib_chunk(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
|
||||
unsigned trace_id_count, enum chip_class chip_class,
|
||||
ac_debug_addr_callback addr_callback, void *addr_callback_data);
|
||||
void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
|
||||
unsigned trace_id_count, const char *name, enum chip_class chip_class,
|
||||
ac_debug_addr_callback addr_callback, void *addr_callback_data);
|
||||
unsigned trace_id_count, enum chip_class chip_class,
|
||||
ac_debug_addr_callback addr_callback, void *addr_callback_data);
|
||||
void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, unsigned trace_id_count,
|
||||
const char *name, enum chip_class chip_class, ac_debug_addr_callback addr_callback,
|
||||
void *addr_callback_data);
|
||||
|
||||
bool ac_vm_fault_occured(enum chip_class chip_class,
|
||||
uint64_t *old_dmesg_timestamp, uint64_t *out_addr);
|
||||
bool ac_vm_fault_occured(enum chip_class chip_class, uint64_t *old_dmesg_timestamp,
|
||||
uint64_t *out_addr);
|
||||
|
||||
unsigned ac_get_wave_info(enum chip_class chip_class,
|
||||
struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]);
|
||||
struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,16 +25,17 @@
|
|||
#ifndef AC_EXP_PARAM_H
|
||||
#define AC_EXP_PARAM_H
|
||||
|
||||
enum {
|
||||
/* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
|
||||
AC_EXP_PARAM_OFFSET_0 = 0,
|
||||
AC_EXP_PARAM_OFFSET_31 = 31,
|
||||
/* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
|
||||
AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
|
||||
AC_EXP_PARAM_DEFAULT_VAL_0001,
|
||||
AC_EXP_PARAM_DEFAULT_VAL_1110,
|
||||
AC_EXP_PARAM_DEFAULT_VAL_1111,
|
||||
AC_EXP_PARAM_UNDEFINED = 255,
|
||||
enum
|
||||
{
|
||||
/* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
|
||||
AC_EXP_PARAM_OFFSET_0 = 0,
|
||||
AC_EXP_PARAM_OFFSET_31 = 31,
|
||||
/* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
|
||||
AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
|
||||
AC_EXP_PARAM_DEFAULT_VAL_0001,
|
||||
AC_EXP_PARAM_DEFAULT_VAL_1110,
|
||||
AC_EXP_PARAM_DEFAULT_VAL_1111,
|
||||
AC_EXP_PARAM_UNDEFINED = 255,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -26,10 +26,11 @@
|
|||
#ifndef AC_GPU_INFO_H
|
||||
#define AC_GPU_INFO_H
|
||||
|
||||
#include "amd_family.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include "amd_family.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
@ -38,186 +39,179 @@ extern "C" {
|
|||
struct amdgpu_gpu_info;
|
||||
|
||||
struct radeon_info {
|
||||
/* PCI info: domain:bus:dev:func */
|
||||
uint32_t pci_domain;
|
||||
uint32_t pci_bus;
|
||||
uint32_t pci_dev;
|
||||
uint32_t pci_func;
|
||||
/* PCI info: domain:bus:dev:func */
|
||||
uint32_t pci_domain;
|
||||
uint32_t pci_bus;
|
||||
uint32_t pci_dev;
|
||||
uint32_t pci_func;
|
||||
|
||||
/* Device info. */
|
||||
const char *name;
|
||||
const char *marketing_name;
|
||||
bool is_pro_graphics;
|
||||
uint32_t pci_id;
|
||||
uint32_t pci_rev_id;
|
||||
enum radeon_family family;
|
||||
enum chip_class chip_class;
|
||||
uint32_t family_id;
|
||||
uint32_t chip_external_rev;
|
||||
uint32_t clock_crystal_freq;
|
||||
/* Device info. */
|
||||
const char *name;
|
||||
const char *marketing_name;
|
||||
bool is_pro_graphics;
|
||||
uint32_t pci_id;
|
||||
uint32_t pci_rev_id;
|
||||
enum radeon_family family;
|
||||
enum chip_class chip_class;
|
||||
uint32_t family_id;
|
||||
uint32_t chip_external_rev;
|
||||
uint32_t clock_crystal_freq;
|
||||
|
||||
/* Features. */
|
||||
bool has_graphics; /* false if the chip is compute-only */
|
||||
uint32_t num_rings[NUM_RING_TYPES];
|
||||
uint32_t ib_pad_dw_mask[NUM_RING_TYPES];
|
||||
bool has_clear_state;
|
||||
bool has_distributed_tess;
|
||||
bool has_dcc_constant_encode;
|
||||
bool has_rbplus; /* if RB+ registers exist */
|
||||
bool rbplus_allowed; /* if RB+ is allowed */
|
||||
bool has_load_ctx_reg_pkt;
|
||||
bool has_out_of_order_rast;
|
||||
bool has_packed_math_16bit;
|
||||
bool cpdma_prefetch_writes_memory;
|
||||
bool has_gfx9_scissor_bug;
|
||||
bool has_tc_compat_zrange_bug;
|
||||
bool has_msaa_sample_loc_bug;
|
||||
bool has_ls_vgpr_init_bug;
|
||||
/* Features. */
|
||||
bool has_graphics; /* false if the chip is compute-only */
|
||||
uint32_t num_rings[NUM_RING_TYPES];
|
||||
uint32_t ib_pad_dw_mask[NUM_RING_TYPES];
|
||||
bool has_clear_state;
|
||||
bool has_distributed_tess;
|
||||
bool has_dcc_constant_encode;
|
||||
bool has_rbplus; /* if RB+ registers exist */
|
||||
bool rbplus_allowed; /* if RB+ is allowed */
|
||||
bool has_load_ctx_reg_pkt;
|
||||
bool has_out_of_order_rast;
|
||||
bool has_packed_math_16bit;
|
||||
bool cpdma_prefetch_writes_memory;
|
||||
bool has_gfx9_scissor_bug;
|
||||
bool has_tc_compat_zrange_bug;
|
||||
bool has_msaa_sample_loc_bug;
|
||||
bool has_ls_vgpr_init_bug;
|
||||
|
||||
/* Display features. */
|
||||
/* There are 2 display DCC codepaths, because display expects unaligned DCC. */
|
||||
/* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */
|
||||
bool use_display_dcc_unaligned;
|
||||
/* Allocate both aligned and unaligned DCC and use the retile blit. */
|
||||
bool use_display_dcc_with_retile_blit;
|
||||
/* Display features. */
|
||||
/* There are 2 display DCC codepaths, because display expects unaligned DCC. */
|
||||
/* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */
|
||||
bool use_display_dcc_unaligned;
|
||||
/* Allocate both aligned and unaligned DCC and use the retile blit. */
|
||||
bool use_display_dcc_with_retile_blit;
|
||||
|
||||
/* Memory info. */
|
||||
uint32_t pte_fragment_size;
|
||||
uint32_t gart_page_size;
|
||||
uint64_t gart_size;
|
||||
uint64_t vram_size;
|
||||
uint64_t vram_vis_size;
|
||||
uint32_t vram_bit_width;
|
||||
uint32_t vram_type;
|
||||
unsigned gds_size;
|
||||
unsigned gds_gfx_partition_size;
|
||||
uint64_t max_alloc_size;
|
||||
uint32_t min_alloc_size;
|
||||
uint32_t address32_hi;
|
||||
bool has_dedicated_vram;
|
||||
bool has_l2_uncached;
|
||||
bool r600_has_virtual_memory;
|
||||
uint32_t num_sdp_interfaces;
|
||||
uint32_t num_tcc_blocks;
|
||||
uint32_t tcc_cache_line_size;
|
||||
bool tcc_harvested;
|
||||
unsigned pc_lines;
|
||||
uint32_t lds_size_per_workgroup;
|
||||
uint32_t lds_granularity;
|
||||
uint32_t max_memory_clock;
|
||||
uint32_t ce_ram_size;
|
||||
uint32_t l1_cache_size;
|
||||
uint32_t l2_cache_size;
|
||||
/* Memory info. */
|
||||
uint32_t pte_fragment_size;
|
||||
uint32_t gart_page_size;
|
||||
uint64_t gart_size;
|
||||
uint64_t vram_size;
|
||||
uint64_t vram_vis_size;
|
||||
uint32_t vram_bit_width;
|
||||
uint32_t vram_type;
|
||||
unsigned gds_size;
|
||||
unsigned gds_gfx_partition_size;
|
||||
uint64_t max_alloc_size;
|
||||
uint32_t min_alloc_size;
|
||||
uint32_t address32_hi;
|
||||
bool has_dedicated_vram;
|
||||
bool has_l2_uncached;
|
||||
bool r600_has_virtual_memory;
|
||||
uint32_t num_sdp_interfaces;
|
||||
uint32_t num_tcc_blocks;
|
||||
uint32_t tcc_cache_line_size;
|
||||
bool tcc_harvested;
|
||||
unsigned pc_lines;
|
||||
uint32_t lds_size_per_workgroup;
|
||||
uint32_t lds_granularity;
|
||||
uint32_t max_memory_clock;
|
||||
uint32_t ce_ram_size;
|
||||
uint32_t l1_cache_size;
|
||||
uint32_t l2_cache_size;
|
||||
|
||||
/* CP info. */
|
||||
bool gfx_ib_pad_with_type2;
|
||||
unsigned ib_alignment; /* both start and size alignment */
|
||||
uint32_t me_fw_version;
|
||||
uint32_t me_fw_feature;
|
||||
uint32_t pfp_fw_version;
|
||||
uint32_t pfp_fw_feature;
|
||||
uint32_t ce_fw_version;
|
||||
uint32_t ce_fw_feature;
|
||||
/* CP info. */
|
||||
bool gfx_ib_pad_with_type2;
|
||||
unsigned ib_alignment; /* both start and size alignment */
|
||||
uint32_t me_fw_version;
|
||||
uint32_t me_fw_feature;
|
||||
uint32_t pfp_fw_version;
|
||||
uint32_t pfp_fw_feature;
|
||||
uint32_t ce_fw_version;
|
||||
uint32_t ce_fw_feature;
|
||||
|
||||
/* Multimedia info. */
|
||||
bool has_hw_decode;
|
||||
bool uvd_enc_supported;
|
||||
uint32_t uvd_fw_version;
|
||||
uint32_t vce_fw_version;
|
||||
uint32_t vce_harvest_config;
|
||||
/* Multimedia info. */
|
||||
bool has_hw_decode;
|
||||
bool uvd_enc_supported;
|
||||
uint32_t uvd_fw_version;
|
||||
uint32_t vce_fw_version;
|
||||
uint32_t vce_harvest_config;
|
||||
|
||||
/* Kernel & winsys capabilities. */
|
||||
uint32_t drm_major; /* version */
|
||||
uint32_t drm_minor;
|
||||
uint32_t drm_patchlevel;
|
||||
bool is_amdgpu;
|
||||
bool has_userptr;
|
||||
bool has_syncobj;
|
||||
bool has_syncobj_wait_for_submit;
|
||||
bool has_timeline_syncobj;
|
||||
bool has_fence_to_handle;
|
||||
bool has_ctx_priority;
|
||||
bool has_local_buffers;
|
||||
bool kernel_flushes_hdp_before_ib;
|
||||
bool htile_cmask_support_1d_tiling;
|
||||
bool si_TA_CS_BC_BASE_ADDR_allowed;
|
||||
bool has_bo_metadata;
|
||||
bool has_gpu_reset_status_query;
|
||||
bool has_eqaa_surface_allocator;
|
||||
bool has_format_bc1_through_bc7;
|
||||
bool kernel_flushes_tc_l2_after_ib;
|
||||
bool has_indirect_compute_dispatch;
|
||||
bool has_unaligned_shader_loads;
|
||||
bool has_sparse_vm_mappings;
|
||||
bool has_2d_tiling;
|
||||
bool has_read_registers_query;
|
||||
bool has_gds_ordered_append;
|
||||
bool has_scheduled_fence_dependency;
|
||||
/* Whether SR-IOV is enabled or amdgpu.mcbp=1 was set on the kernel command line. */
|
||||
bool mid_command_buffer_preemption_enabled;
|
||||
/* Kernel & winsys capabilities. */
|
||||
uint32_t drm_major; /* version */
|
||||
uint32_t drm_minor;
|
||||
uint32_t drm_patchlevel;
|
||||
bool is_amdgpu;
|
||||
bool has_userptr;
|
||||
bool has_syncobj;
|
||||
bool has_syncobj_wait_for_submit;
|
||||
bool has_timeline_syncobj;
|
||||
bool has_fence_to_handle;
|
||||
bool has_ctx_priority;
|
||||
bool has_local_buffers;
|
||||
bool kernel_flushes_hdp_before_ib;
|
||||
bool htile_cmask_support_1d_tiling;
|
||||
bool si_TA_CS_BC_BASE_ADDR_allowed;
|
||||
bool has_bo_metadata;
|
||||
bool has_gpu_reset_status_query;
|
||||
bool has_eqaa_surface_allocator;
|
||||
bool has_format_bc1_through_bc7;
|
||||
bool kernel_flushes_tc_l2_after_ib;
|
||||
bool has_indirect_compute_dispatch;
|
||||
bool has_unaligned_shader_loads;
|
||||
bool has_sparse_vm_mappings;
|
||||
bool has_2d_tiling;
|
||||
bool has_read_registers_query;
|
||||
bool has_gds_ordered_append;
|
||||
bool has_scheduled_fence_dependency;
|
||||
/* Whether SR-IOV is enabled or amdgpu.mcbp=1 was set on the kernel command line. */
|
||||
bool mid_command_buffer_preemption_enabled;
|
||||
|
||||
/* Shader cores. */
|
||||
uint32_t cu_mask[4][2];
|
||||
uint32_t r600_max_quad_pipes; /* wave size / 16 */
|
||||
uint32_t max_shader_clock;
|
||||
uint32_t num_good_compute_units;
|
||||
uint32_t max_good_cu_per_sa;
|
||||
uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */
|
||||
uint32_t max_se; /* shader engines */
|
||||
uint32_t max_sh_per_se; /* shader arrays per shader engine */
|
||||
uint32_t max_wave64_per_simd;
|
||||
uint32_t num_physical_sgprs_per_simd;
|
||||
uint32_t num_physical_wave64_vgprs_per_simd;
|
||||
uint32_t num_simd_per_compute_unit;
|
||||
uint32_t min_sgpr_alloc;
|
||||
uint32_t max_sgpr_alloc;
|
||||
uint32_t sgpr_alloc_granularity;
|
||||
uint32_t min_wave64_vgpr_alloc;
|
||||
uint32_t max_vgpr_alloc;
|
||||
uint32_t wave64_vgpr_alloc_granularity;
|
||||
bool use_late_alloc; /* VS and GS: late pos/param allocation */
|
||||
/* Shader cores. */
|
||||
uint32_t cu_mask[4][2];
|
||||
uint32_t r600_max_quad_pipes; /* wave size / 16 */
|
||||
uint32_t max_shader_clock;
|
||||
uint32_t num_good_compute_units;
|
||||
uint32_t max_good_cu_per_sa;
|
||||
uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */
|
||||
uint32_t max_se; /* shader engines */
|
||||
uint32_t max_sh_per_se; /* shader arrays per shader engine */
|
||||
uint32_t max_wave64_per_simd;
|
||||
uint32_t num_physical_sgprs_per_simd;
|
||||
uint32_t num_physical_wave64_vgprs_per_simd;
|
||||
uint32_t num_simd_per_compute_unit;
|
||||
uint32_t min_sgpr_alloc;
|
||||
uint32_t max_sgpr_alloc;
|
||||
uint32_t sgpr_alloc_granularity;
|
||||
uint32_t min_wave64_vgpr_alloc;
|
||||
uint32_t max_vgpr_alloc;
|
||||
uint32_t wave64_vgpr_alloc_granularity;
|
||||
bool use_late_alloc; /* VS and GS: late pos/param allocation */
|
||||
|
||||
/* Render backends (color + depth blocks). */
|
||||
uint32_t r300_num_gb_pipes;
|
||||
uint32_t r300_num_z_pipes;
|
||||
uint32_t r600_gb_backend_map; /* R600 harvest config */
|
||||
bool r600_gb_backend_map_valid;
|
||||
uint32_t r600_num_banks;
|
||||
uint32_t gb_addr_config;
|
||||
uint32_t pa_sc_tile_steering_override; /* CLEAR_STATE also sets this */
|
||||
uint32_t num_render_backends;
|
||||
uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */
|
||||
uint32_t pipe_interleave_bytes;
|
||||
uint32_t enabled_rb_mask; /* GCN harvest config */
|
||||
uint64_t max_alignment; /* from addrlib */
|
||||
uint32_t pbb_max_alloc_count;
|
||||
/* Render backends (color + depth blocks). */
|
||||
uint32_t r300_num_gb_pipes;
|
||||
uint32_t r300_num_z_pipes;
|
||||
uint32_t r600_gb_backend_map; /* R600 harvest config */
|
||||
bool r600_gb_backend_map_valid;
|
||||
uint32_t r600_num_banks;
|
||||
uint32_t gb_addr_config;
|
||||
uint32_t pa_sc_tile_steering_override; /* CLEAR_STATE also sets this */
|
||||
uint32_t num_render_backends;
|
||||
uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */
|
||||
uint32_t pipe_interleave_bytes;
|
||||
uint32_t enabled_rb_mask; /* GCN harvest config */
|
||||
uint64_t max_alignment; /* from addrlib */
|
||||
uint32_t pbb_max_alloc_count;
|
||||
|
||||
/* Tile modes. */
|
||||
uint32_t si_tile_mode_array[32];
|
||||
uint32_t cik_macrotile_mode_array[16];
|
||||
/* Tile modes. */
|
||||
uint32_t si_tile_mode_array[32];
|
||||
uint32_t cik_macrotile_mode_array[16];
|
||||
};
|
||||
|
||||
bool ac_query_gpu_info(int fd, void *dev_p,
|
||||
struct radeon_info *info,
|
||||
struct amdgpu_gpu_info *amdinfo);
|
||||
bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
||||
struct amdgpu_gpu_info *amdinfo);
|
||||
|
||||
void ac_compute_driver_uuid(char *uuid, size_t size);
|
||||
|
||||
void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size);
|
||||
void ac_print_gpu_info(struct radeon_info *info);
|
||||
int ac_get_gs_table_depth(enum chip_class chip_class, enum radeon_family family);
|
||||
void ac_get_raster_config(struct radeon_info *info,
|
||||
uint32_t *raster_config_p,
|
||||
uint32_t *raster_config_1_p,
|
||||
uint32_t *se_tile_repeat_p);
|
||||
void ac_get_harvested_configs(struct radeon_info *info,
|
||||
unsigned raster_config,
|
||||
unsigned *cik_raster_config_1_p,
|
||||
unsigned *raster_config_se);
|
||||
unsigned ac_get_compute_resource_limits(struct radeon_info *info,
|
||||
unsigned waves_per_threadgroup,
|
||||
unsigned max_waves_per_sh,
|
||||
unsigned threadgroups_per_cu);
|
||||
void ac_get_raster_config(struct radeon_info *info, uint32_t *raster_config_p,
|
||||
uint32_t *raster_config_1_p, uint32_t *se_tile_repeat_p);
|
||||
void ac_get_harvested_configs(struct radeon_info *info, unsigned raster_config,
|
||||
unsigned *cik_raster_config_1_p, unsigned *raster_config_se);
|
||||
unsigned ac_get_compute_resource_limits(struct radeon_info *info, unsigned waves_per_threadgroup,
|
||||
unsigned max_waves_per_sh, unsigned threadgroups_per_cu);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -24,12 +24,12 @@
|
|||
#ifndef AC_RTLD_H
|
||||
#define AC_RTLD_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "util/u_dynarray.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
@ -40,37 +40,37 @@ struct ac_shader_config;
|
|||
struct radeon_info;
|
||||
|
||||
struct ac_rtld_symbol {
|
||||
const char *name;
|
||||
uint32_t size;
|
||||
uint32_t align;
|
||||
uint64_t offset; /* filled in by ac_rtld_open */
|
||||
unsigned part_idx; /* shader part in which this symbol appears */
|
||||
const char *name;
|
||||
uint32_t size;
|
||||
uint32_t align;
|
||||
uint64_t offset; /* filled in by ac_rtld_open */
|
||||
unsigned part_idx; /* shader part in which this symbol appears */
|
||||
};
|
||||
|
||||
struct ac_rtld_options {
|
||||
/* Loader will insert an s_sethalt 1 instruction as the
|
||||
* first instruction. */
|
||||
bool halt_at_entry:1;
|
||||
/* Loader will insert an s_sethalt 1 instruction as the
|
||||
* first instruction. */
|
||||
bool halt_at_entry : 1;
|
||||
};
|
||||
|
||||
/* Lightweight wrapper around underlying ELF objects. */
|
||||
struct ac_rtld_binary {
|
||||
struct ac_rtld_options options;
|
||||
unsigned wave_size;
|
||||
struct ac_rtld_options options;
|
||||
unsigned wave_size;
|
||||
|
||||
/* Required buffer sizes, currently read/executable only. */
|
||||
uint64_t rx_size;
|
||||
/* Required buffer sizes, currently read/executable only. */
|
||||
uint64_t rx_size;
|
||||
|
||||
/* Size of executable code, for reporting purposes. */
|
||||
uint64_t exec_size;
|
||||
/* Size of executable code, for reporting purposes. */
|
||||
uint64_t exec_size;
|
||||
|
||||
uint64_t rx_end_markers;
|
||||
uint64_t rx_end_markers;
|
||||
|
||||
unsigned num_parts;
|
||||
struct ac_rtld_part *parts;
|
||||
unsigned num_parts;
|
||||
struct ac_rtld_part *parts;
|
||||
|
||||
struct util_dynarray lds_symbols;
|
||||
uint32_t lds_size;
|
||||
struct util_dynarray lds_symbols;
|
||||
uint32_t lds_size;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -82,8 +82,7 @@ struct ac_rtld_binary {
|
|||
* \param value to be filled in by the callback
|
||||
* \return whether the symbol was found successfully
|
||||
*/
|
||||
typedef bool (*ac_rtld_get_external_symbol_cb)(
|
||||
void *cb_data, const char *symbol, uint64_t *value);
|
||||
typedef bool (*ac_rtld_get_external_symbol_cb)(void *cb_data, const char *symbol, uint64_t *value);
|
||||
|
||||
/**
|
||||
* Lifetimes of \ref info, in-memory ELF objects, and the names of
|
||||
|
|
@ -91,50 +90,48 @@ typedef bool (*ac_rtld_get_external_symbol_cb)(
|
|||
* the opened binary.
|
||||
*/
|
||||
struct ac_rtld_open_info {
|
||||
const struct radeon_info *info;
|
||||
struct ac_rtld_options options;
|
||||
gl_shader_stage shader_type;
|
||||
unsigned wave_size;
|
||||
const struct radeon_info *info;
|
||||
struct ac_rtld_options options;
|
||||
gl_shader_stage shader_type;
|
||||
unsigned wave_size;
|
||||
|
||||
unsigned num_parts;
|
||||
const char * const *elf_ptrs; /* in-memory ELF objects of each part */
|
||||
const size_t *elf_sizes; /* sizes of corresponding in-memory ELF objects in bytes */
|
||||
unsigned num_parts;
|
||||
const char *const *elf_ptrs; /* in-memory ELF objects of each part */
|
||||
const size_t *elf_sizes; /* sizes of corresponding in-memory ELF objects in bytes */
|
||||
|
||||
/* Shared LDS symbols are layouted such that they are accessible from
|
||||
* all shader parts. Non-shared (private) LDS symbols of one part may
|
||||
* overlap private LDS symbols of another shader part.
|
||||
*/
|
||||
unsigned num_shared_lds_symbols;
|
||||
const struct ac_rtld_symbol *shared_lds_symbols;
|
||||
/* Shared LDS symbols are layouted such that they are accessible from
|
||||
* all shader parts. Non-shared (private) LDS symbols of one part may
|
||||
* overlap private LDS symbols of another shader part.
|
||||
*/
|
||||
unsigned num_shared_lds_symbols;
|
||||
const struct ac_rtld_symbol *shared_lds_symbols;
|
||||
};
|
||||
|
||||
bool ac_rtld_open(struct ac_rtld_binary *binary,
|
||||
struct ac_rtld_open_info i);
|
||||
bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i);
|
||||
|
||||
void ac_rtld_close(struct ac_rtld_binary *binary);
|
||||
|
||||
bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,
|
||||
const char **data, size_t *nbytes);
|
||||
bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
|
||||
size_t *nbytes);
|
||||
|
||||
bool ac_rtld_read_config(const struct radeon_info *info,
|
||||
struct ac_rtld_binary *binary,
|
||||
struct ac_shader_config *config);
|
||||
bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
|
||||
struct ac_shader_config *config);
|
||||
|
||||
struct ac_rtld_upload_info {
|
||||
struct ac_rtld_binary *binary;
|
||||
struct ac_rtld_binary *binary;
|
||||
|
||||
/** GPU mapping of the read/executable buffer. */
|
||||
uint64_t rx_va;
|
||||
/** GPU mapping of the read/executable buffer. */
|
||||
uint64_t rx_va;
|
||||
|
||||
/** CPU mapping of the read/executable buffer */
|
||||
char *rx_ptr;
|
||||
/** CPU mapping of the read/executable buffer */
|
||||
char *rx_ptr;
|
||||
|
||||
/** Optional callback function that will be queried for symbols not
|
||||
* defined in any of the binary's parts. */
|
||||
ac_rtld_get_external_symbol_cb get_external_symbol;
|
||||
/** Optional callback function that will be queried for symbols not
|
||||
* defined in any of the binary's parts. */
|
||||
ac_rtld_get_external_symbol_cb get_external_symbol;
|
||||
|
||||
/** Caller-defined data that will be passed to callback functions. */
|
||||
void *cb_data;
|
||||
/** Caller-defined data that will be passed to callback functions. */
|
||||
void *cb_data;
|
||||
};
|
||||
|
||||
bool ac_rtld_upload(struct ac_rtld_upload_info *u);
|
||||
|
|
|
|||
|
|
@ -22,34 +22,33 @@
|
|||
*/
|
||||
|
||||
#include "ac_shader_args.h"
|
||||
|
||||
#include "nir/nir_builder.h"
|
||||
|
||||
void
|
||||
ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile,
|
||||
unsigned size, enum ac_arg_type type, struct ac_arg *arg)
|
||||
void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile, unsigned size,
|
||||
enum ac_arg_type type, struct ac_arg *arg)
|
||||
{
|
||||
assert(info->arg_count < AC_MAX_ARGS);
|
||||
assert(info->arg_count < AC_MAX_ARGS);
|
||||
|
||||
unsigned offset;
|
||||
if (regfile == AC_ARG_SGPR) {
|
||||
offset = info->num_sgprs_used;
|
||||
info->num_sgprs_used += size;
|
||||
} else {
|
||||
assert(regfile == AC_ARG_VGPR);
|
||||
offset = info->num_vgprs_used;
|
||||
info->num_vgprs_used += size;
|
||||
}
|
||||
unsigned offset;
|
||||
if (regfile == AC_ARG_SGPR) {
|
||||
offset = info->num_sgprs_used;
|
||||
info->num_sgprs_used += size;
|
||||
} else {
|
||||
assert(regfile == AC_ARG_VGPR);
|
||||
offset = info->num_vgprs_used;
|
||||
info->num_vgprs_used += size;
|
||||
}
|
||||
|
||||
info->args[info->arg_count].file = regfile;
|
||||
info->args[info->arg_count].offset = offset;
|
||||
info->args[info->arg_count].size = size;
|
||||
info->args[info->arg_count].type = type;
|
||||
info->args[info->arg_count].file = regfile;
|
||||
info->args[info->arg_count].offset = offset;
|
||||
info->args[info->arg_count].size = size;
|
||||
info->args[info->arg_count].type = type;
|
||||
|
||||
if (arg) {
|
||||
arg->arg_index = info->arg_count;
|
||||
arg->used = true;
|
||||
}
|
||||
if (arg) {
|
||||
arg->arg_index = info->arg_count;
|
||||
arg->used = true;
|
||||
}
|
||||
|
||||
info->arg_count++;
|
||||
info->arg_count++;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -24,91 +24,90 @@
|
|||
#ifndef AC_SHADER_ARGS_H
|
||||
#define AC_SHADER_ARGS_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define AC_MAX_INLINE_PUSH_CONSTS 8
|
||||
|
||||
enum ac_arg_regfile {
|
||||
AC_ARG_SGPR,
|
||||
AC_ARG_VGPR,
|
||||
enum ac_arg_regfile
|
||||
{
|
||||
AC_ARG_SGPR,
|
||||
AC_ARG_VGPR,
|
||||
};
|
||||
|
||||
enum ac_arg_type {
|
||||
AC_ARG_FLOAT,
|
||||
AC_ARG_INT,
|
||||
AC_ARG_CONST_PTR, /* Pointer to i8 array */
|
||||
AC_ARG_CONST_FLOAT_PTR, /* Pointer to f32 array */
|
||||
AC_ARG_CONST_PTR_PTR, /* Pointer to pointer to i8 array */
|
||||
AC_ARG_CONST_DESC_PTR, /* Pointer to v4i32 array */
|
||||
AC_ARG_CONST_IMAGE_PTR, /* Pointer to v8i32 array */
|
||||
enum ac_arg_type
|
||||
{
|
||||
AC_ARG_FLOAT,
|
||||
AC_ARG_INT,
|
||||
AC_ARG_CONST_PTR, /* Pointer to i8 array */
|
||||
AC_ARG_CONST_FLOAT_PTR, /* Pointer to f32 array */
|
||||
AC_ARG_CONST_PTR_PTR, /* Pointer to pointer to i8 array */
|
||||
AC_ARG_CONST_DESC_PTR, /* Pointer to v4i32 array */
|
||||
AC_ARG_CONST_IMAGE_PTR, /* Pointer to v8i32 array */
|
||||
};
|
||||
|
||||
struct ac_arg {
|
||||
uint8_t arg_index;
|
||||
bool used;
|
||||
uint8_t arg_index;
|
||||
bool used;
|
||||
};
|
||||
|
||||
|
||||
#define AC_MAX_ARGS 128
|
||||
|
||||
struct ac_shader_args {
|
||||
/* Info on how to declare arguments */
|
||||
struct {
|
||||
enum ac_arg_type type;
|
||||
enum ac_arg_regfile file;
|
||||
uint8_t offset;
|
||||
uint8_t size;
|
||||
bool skip;
|
||||
} args[AC_MAX_ARGS];
|
||||
/* Info on how to declare arguments */
|
||||
struct {
|
||||
enum ac_arg_type type;
|
||||
enum ac_arg_regfile file;
|
||||
uint8_t offset;
|
||||
uint8_t size;
|
||||
bool skip;
|
||||
} args[AC_MAX_ARGS];
|
||||
|
||||
uint8_t arg_count;
|
||||
uint8_t sgpr_count;
|
||||
uint8_t num_sgprs_used;
|
||||
uint8_t num_vgprs_used;
|
||||
uint8_t arg_count;
|
||||
uint8_t sgpr_count;
|
||||
uint8_t num_sgprs_used;
|
||||
uint8_t num_vgprs_used;
|
||||
|
||||
struct ac_arg base_vertex;
|
||||
struct ac_arg start_instance;
|
||||
struct ac_arg draw_id;
|
||||
struct ac_arg vertex_id;
|
||||
struct ac_arg instance_id;
|
||||
struct ac_arg tcs_patch_id;
|
||||
struct ac_arg tcs_rel_ids;
|
||||
struct ac_arg tes_patch_id;
|
||||
struct ac_arg gs_prim_id;
|
||||
struct ac_arg gs_invocation_id;
|
||||
struct ac_arg base_vertex;
|
||||
struct ac_arg start_instance;
|
||||
struct ac_arg draw_id;
|
||||
struct ac_arg vertex_id;
|
||||
struct ac_arg instance_id;
|
||||
struct ac_arg tcs_patch_id;
|
||||
struct ac_arg tcs_rel_ids;
|
||||
struct ac_arg tes_patch_id;
|
||||
struct ac_arg gs_prim_id;
|
||||
struct ac_arg gs_invocation_id;
|
||||
|
||||
/* PS */
|
||||
struct ac_arg frag_pos[4];
|
||||
struct ac_arg front_face;
|
||||
struct ac_arg ancillary;
|
||||
struct ac_arg sample_coverage;
|
||||
struct ac_arg prim_mask;
|
||||
struct ac_arg persp_sample;
|
||||
struct ac_arg persp_center;
|
||||
struct ac_arg persp_centroid;
|
||||
struct ac_arg pull_model;
|
||||
struct ac_arg linear_sample;
|
||||
struct ac_arg linear_center;
|
||||
struct ac_arg linear_centroid;
|
||||
/* PS */
|
||||
struct ac_arg frag_pos[4];
|
||||
struct ac_arg front_face;
|
||||
struct ac_arg ancillary;
|
||||
struct ac_arg sample_coverage;
|
||||
struct ac_arg prim_mask;
|
||||
struct ac_arg persp_sample;
|
||||
struct ac_arg persp_center;
|
||||
struct ac_arg persp_centroid;
|
||||
struct ac_arg pull_model;
|
||||
struct ac_arg linear_sample;
|
||||
struct ac_arg linear_center;
|
||||
struct ac_arg linear_centroid;
|
||||
|
||||
/* CS */
|
||||
struct ac_arg local_invocation_ids;
|
||||
struct ac_arg num_work_groups;
|
||||
struct ac_arg workgroup_ids[3];
|
||||
struct ac_arg tg_size;
|
||||
/* CS */
|
||||
struct ac_arg local_invocation_ids;
|
||||
struct ac_arg num_work_groups;
|
||||
struct ac_arg workgroup_ids[3];
|
||||
struct ac_arg tg_size;
|
||||
|
||||
/* Vulkan only */
|
||||
struct ac_arg push_constants;
|
||||
struct ac_arg inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
|
||||
unsigned num_inline_push_consts;
|
||||
unsigned base_inline_push_consts;
|
||||
struct ac_arg view_index;
|
||||
/* Vulkan only */
|
||||
struct ac_arg push_constants;
|
||||
struct ac_arg inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
|
||||
unsigned num_inline_push_consts;
|
||||
unsigned base_inline_push_consts;
|
||||
struct ac_arg view_index;
|
||||
};
|
||||
|
||||
void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile,
|
||||
unsigned registers, enum ac_arg_type type,
|
||||
struct ac_arg *arg);
|
||||
void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile, unsigned registers,
|
||||
enum ac_arg_type type, struct ac_arg *arg);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -21,277 +21,303 @@
|
|||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "ac_shader_util.h"
|
||||
|
||||
#include "sid.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "ac_shader_util.h"
|
||||
#include "sid.h"
|
||||
|
||||
unsigned
|
||||
ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
|
||||
bool writes_samplemask)
|
||||
unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask)
|
||||
{
|
||||
if (writes_z) {
|
||||
/* Z needs 32 bits. */
|
||||
if (writes_samplemask)
|
||||
return V_028710_SPI_SHADER_32_ABGR;
|
||||
else if (writes_stencil)
|
||||
return V_028710_SPI_SHADER_32_GR;
|
||||
else
|
||||
return V_028710_SPI_SHADER_32_R;
|
||||
} else if (writes_stencil || writes_samplemask) {
|
||||
/* Both stencil and sample mask need only 16 bits. */
|
||||
return V_028710_SPI_SHADER_UINT16_ABGR;
|
||||
} else {
|
||||
return V_028710_SPI_SHADER_ZERO;
|
||||
}
|
||||
if (writes_z) {
|
||||
/* Z needs 32 bits. */
|
||||
if (writes_samplemask)
|
||||
return V_028710_SPI_SHADER_32_ABGR;
|
||||
else if (writes_stencil)
|
||||
return V_028710_SPI_SHADER_32_GR;
|
||||
else
|
||||
return V_028710_SPI_SHADER_32_R;
|
||||
} else if (writes_stencil || writes_samplemask) {
|
||||
/* Both stencil and sample mask need only 16 bits. */
|
||||
return V_028710_SPI_SHADER_UINT16_ABGR;
|
||||
} else {
|
||||
return V_028710_SPI_SHADER_ZERO;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned
|
||||
ac_get_cb_shader_mask(unsigned spi_shader_col_format)
|
||||
unsigned ac_get_cb_shader_mask(unsigned spi_shader_col_format)
|
||||
{
|
||||
unsigned i, cb_shader_mask = 0;
|
||||
unsigned i, cb_shader_mask = 0;
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
switch ((spi_shader_col_format >> (i * 4)) & 0xf) {
|
||||
case V_028714_SPI_SHADER_ZERO:
|
||||
break;
|
||||
case V_028714_SPI_SHADER_32_R:
|
||||
cb_shader_mask |= 0x1 << (i * 4);
|
||||
break;
|
||||
case V_028714_SPI_SHADER_32_GR:
|
||||
cb_shader_mask |= 0x3 << (i * 4);
|
||||
break;
|
||||
case V_028714_SPI_SHADER_32_AR:
|
||||
cb_shader_mask |= 0x9u << (i * 4);
|
||||
break;
|
||||
case V_028714_SPI_SHADER_FP16_ABGR:
|
||||
case V_028714_SPI_SHADER_UNORM16_ABGR:
|
||||
case V_028714_SPI_SHADER_SNORM16_ABGR:
|
||||
case V_028714_SPI_SHADER_UINT16_ABGR:
|
||||
case V_028714_SPI_SHADER_SINT16_ABGR:
|
||||
case V_028714_SPI_SHADER_32_ABGR:
|
||||
cb_shader_mask |= 0xfu << (i * 4);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
return cb_shader_mask;
|
||||
for (i = 0; i < 8; i++) {
|
||||
switch ((spi_shader_col_format >> (i * 4)) & 0xf) {
|
||||
case V_028714_SPI_SHADER_ZERO:
|
||||
break;
|
||||
case V_028714_SPI_SHADER_32_R:
|
||||
cb_shader_mask |= 0x1 << (i * 4);
|
||||
break;
|
||||
case V_028714_SPI_SHADER_32_GR:
|
||||
cb_shader_mask |= 0x3 << (i * 4);
|
||||
break;
|
||||
case V_028714_SPI_SHADER_32_AR:
|
||||
cb_shader_mask |= 0x9u << (i * 4);
|
||||
break;
|
||||
case V_028714_SPI_SHADER_FP16_ABGR:
|
||||
case V_028714_SPI_SHADER_UNORM16_ABGR:
|
||||
case V_028714_SPI_SHADER_SNORM16_ABGR:
|
||||
case V_028714_SPI_SHADER_UINT16_ABGR:
|
||||
case V_028714_SPI_SHADER_SINT16_ABGR:
|
||||
case V_028714_SPI_SHADER_32_ABGR:
|
||||
cb_shader_mask |= 0xfu << (i * 4);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
return cb_shader_mask;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the appropriate setting of VGT_GS_MODE when \p shader is a
|
||||
* geometry shader.
|
||||
*/
|
||||
uint32_t
|
||||
ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class)
|
||||
uint32_t ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class)
|
||||
{
|
||||
unsigned cut_mode;
|
||||
unsigned cut_mode;
|
||||
|
||||
if (gs_max_vert_out <= 128) {
|
||||
cut_mode = V_028A40_GS_CUT_128;
|
||||
} else if (gs_max_vert_out <= 256) {
|
||||
cut_mode = V_028A40_GS_CUT_256;
|
||||
} else if (gs_max_vert_out <= 512) {
|
||||
cut_mode = V_028A40_GS_CUT_512;
|
||||
} else {
|
||||
assert(gs_max_vert_out <= 1024);
|
||||
cut_mode = V_028A40_GS_CUT_1024;
|
||||
}
|
||||
if (gs_max_vert_out <= 128) {
|
||||
cut_mode = V_028A40_GS_CUT_128;
|
||||
} else if (gs_max_vert_out <= 256) {
|
||||
cut_mode = V_028A40_GS_CUT_256;
|
||||
} else if (gs_max_vert_out <= 512) {
|
||||
cut_mode = V_028A40_GS_CUT_512;
|
||||
} else {
|
||||
assert(gs_max_vert_out <= 1024);
|
||||
cut_mode = V_028A40_GS_CUT_1024;
|
||||
}
|
||||
|
||||
return S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
|
||||
S_028A40_CUT_MODE(cut_mode)|
|
||||
S_028A40_ES_WRITE_OPTIMIZE(chip_class <= GFX8) |
|
||||
S_028A40_GS_WRITE_OPTIMIZE(1) |
|
||||
S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0);
|
||||
return S_028A40_MODE(V_028A40_GS_SCENARIO_G) | S_028A40_CUT_MODE(cut_mode) |
|
||||
S_028A40_ES_WRITE_OPTIMIZE(chip_class <= GFX8) | S_028A40_GS_WRITE_OPTIMIZE(1) |
|
||||
S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0);
|
||||
}
|
||||
|
||||
/// Translate a (dfmt, nfmt) pair into a chip-appropriate combined format
|
||||
/// value for LLVM8+ tbuffer intrinsics.
|
||||
unsigned
|
||||
ac_get_tbuffer_format(enum chip_class chip_class,
|
||||
unsigned dfmt, unsigned nfmt)
|
||||
unsigned ac_get_tbuffer_format(enum chip_class chip_class, unsigned dfmt, unsigned nfmt)
|
||||
{
|
||||
// Some games try to access vertex buffers without a valid format.
|
||||
// This is a game bug, but we should still handle it gracefully.
|
||||
if (dfmt == V_008F0C_IMG_FORMAT_INVALID)
|
||||
return V_008F0C_IMG_FORMAT_INVALID;
|
||||
// Some games try to access vertex buffers without a valid format.
|
||||
// This is a game bug, but we should still handle it gracefully.
|
||||
if (dfmt == V_008F0C_IMG_FORMAT_INVALID)
|
||||
return V_008F0C_IMG_FORMAT_INVALID;
|
||||
|
||||
if (chip_class >= GFX10) {
|
||||
unsigned format;
|
||||
switch (dfmt) {
|
||||
default: unreachable("bad dfmt");
|
||||
case V_008F0C_BUF_DATA_FORMAT_INVALID: format = V_008F0C_IMG_FORMAT_INVALID; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_8: format = V_008F0C_IMG_FORMAT_8_UINT; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_8_8: format = V_008F0C_IMG_FORMAT_8_8_UINT; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: format = V_008F0C_IMG_FORMAT_8_8_8_8_UINT; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_16: format = V_008F0C_IMG_FORMAT_16_UINT; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_16_16: format = V_008F0C_IMG_FORMAT_16_16_UINT; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: format = V_008F0C_IMG_FORMAT_16_16_16_16_UINT; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_32: format = V_008F0C_IMG_FORMAT_32_UINT; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_32_32: format = V_008F0C_IMG_FORMAT_32_32_UINT; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_32_32_32: format = V_008F0C_IMG_FORMAT_32_32_32_UINT; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: format = V_008F0C_IMG_FORMAT_32_32_32_32_UINT; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: format = V_008F0C_IMG_FORMAT_2_10_10_10_UINT; break;
|
||||
}
|
||||
if (chip_class >= GFX10) {
|
||||
unsigned format;
|
||||
switch (dfmt) {
|
||||
default:
|
||||
unreachable("bad dfmt");
|
||||
case V_008F0C_BUF_DATA_FORMAT_INVALID:
|
||||
format = V_008F0C_IMG_FORMAT_INVALID;
|
||||
break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_8:
|
||||
format = V_008F0C_IMG_FORMAT_8_UINT;
|
||||
break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_8_8:
|
||||
format = V_008F0C_IMG_FORMAT_8_8_UINT;
|
||||
break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_8_8_8_8:
|
||||
format = V_008F0C_IMG_FORMAT_8_8_8_8_UINT;
|
||||
break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_16:
|
||||
format = V_008F0C_IMG_FORMAT_16_UINT;
|
||||
break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_16_16:
|
||||
format = V_008F0C_IMG_FORMAT_16_16_UINT;
|
||||
break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_16_16_16_16:
|
||||
format = V_008F0C_IMG_FORMAT_16_16_16_16_UINT;
|
||||
break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_32:
|
||||
format = V_008F0C_IMG_FORMAT_32_UINT;
|
||||
break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_32_32:
|
||||
format = V_008F0C_IMG_FORMAT_32_32_UINT;
|
||||
break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_32_32_32:
|
||||
format = V_008F0C_IMG_FORMAT_32_32_32_UINT;
|
||||
break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_32_32_32_32:
|
||||
format = V_008F0C_IMG_FORMAT_32_32_32_32_UINT;
|
||||
break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_2_10_10_10:
|
||||
format = V_008F0C_IMG_FORMAT_2_10_10_10_UINT;
|
||||
break;
|
||||
}
|
||||
|
||||
// Use the regularity properties of the combined format enum.
|
||||
//
|
||||
// Note: float is incompatible with 8-bit data formats,
|
||||
// [us]{norm,scaled} are incomparible with 32-bit data formats.
|
||||
// [us]scaled are not writable.
|
||||
switch (nfmt) {
|
||||
case V_008F0C_BUF_NUM_FORMAT_UNORM: format -= 4; break;
|
||||
case V_008F0C_BUF_NUM_FORMAT_SNORM: format -= 3; break;
|
||||
case V_008F0C_BUF_NUM_FORMAT_USCALED: format -= 2; break;
|
||||
case V_008F0C_BUF_NUM_FORMAT_SSCALED: format -= 1; break;
|
||||
default: unreachable("bad nfmt");
|
||||
case V_008F0C_BUF_NUM_FORMAT_UINT: break;
|
||||
case V_008F0C_BUF_NUM_FORMAT_SINT: format += 1; break;
|
||||
case V_008F0C_BUF_NUM_FORMAT_FLOAT: format += 2; break;
|
||||
}
|
||||
// Use the regularity properties of the combined format enum.
|
||||
//
|
||||
// Note: float is incompatible with 8-bit data formats,
|
||||
// [us]{norm,scaled} are incomparible with 32-bit data formats.
|
||||
// [us]scaled are not writable.
|
||||
switch (nfmt) {
|
||||
case V_008F0C_BUF_NUM_FORMAT_UNORM:
|
||||
format -= 4;
|
||||
break;
|
||||
case V_008F0C_BUF_NUM_FORMAT_SNORM:
|
||||
format -= 3;
|
||||
break;
|
||||
case V_008F0C_BUF_NUM_FORMAT_USCALED:
|
||||
format -= 2;
|
||||
break;
|
||||
case V_008F0C_BUF_NUM_FORMAT_SSCALED:
|
||||
format -= 1;
|
||||
break;
|
||||
default:
|
||||
unreachable("bad nfmt");
|
||||
case V_008F0C_BUF_NUM_FORMAT_UINT:
|
||||
break;
|
||||
case V_008F0C_BUF_NUM_FORMAT_SINT:
|
||||
format += 1;
|
||||
break;
|
||||
case V_008F0C_BUF_NUM_FORMAT_FLOAT:
|
||||
format += 2;
|
||||
break;
|
||||
}
|
||||
|
||||
return format;
|
||||
} else {
|
||||
return dfmt | (nfmt << 4);
|
||||
}
|
||||
return format;
|
||||
} else {
|
||||
return dfmt | (nfmt << 4);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct ac_data_format_info data_format_table[] = {
|
||||
[V_008F0C_BUF_DATA_FORMAT_INVALID] = { 0, 4, 0, V_008F0C_BUF_DATA_FORMAT_INVALID },
|
||||
[V_008F0C_BUF_DATA_FORMAT_8] = { 1, 1, 1, V_008F0C_BUF_DATA_FORMAT_8 },
|
||||
[V_008F0C_BUF_DATA_FORMAT_16] = { 2, 1, 2, V_008F0C_BUF_DATA_FORMAT_16 },
|
||||
[V_008F0C_BUF_DATA_FORMAT_8_8] = { 2, 2, 1, V_008F0C_BUF_DATA_FORMAT_8 },
|
||||
[V_008F0C_BUF_DATA_FORMAT_32] = { 4, 1, 4, V_008F0C_BUF_DATA_FORMAT_32 },
|
||||
[V_008F0C_BUF_DATA_FORMAT_16_16] = { 4, 2, 2, V_008F0C_BUF_DATA_FORMAT_16 },
|
||||
[V_008F0C_BUF_DATA_FORMAT_10_11_11] = { 4, 3, 0, V_008F0C_BUF_DATA_FORMAT_10_11_11 },
|
||||
[V_008F0C_BUF_DATA_FORMAT_11_11_10] = { 4, 3, 0, V_008F0C_BUF_DATA_FORMAT_11_11_10 },
|
||||
[V_008F0C_BUF_DATA_FORMAT_10_10_10_2] = { 4, 4, 0, V_008F0C_BUF_DATA_FORMAT_10_10_10_2 },
|
||||
[V_008F0C_BUF_DATA_FORMAT_2_10_10_10] = { 4, 4, 0, V_008F0C_BUF_DATA_FORMAT_2_10_10_10 },
|
||||
[V_008F0C_BUF_DATA_FORMAT_8_8_8_8] = { 4, 4, 1, V_008F0C_BUF_DATA_FORMAT_8 },
|
||||
[V_008F0C_BUF_DATA_FORMAT_32_32] = { 8, 2, 4, V_008F0C_BUF_DATA_FORMAT_32 },
|
||||
[V_008F0C_BUF_DATA_FORMAT_16_16_16_16] = { 8, 4, 2, V_008F0C_BUF_DATA_FORMAT_16 },
|
||||
[V_008F0C_BUF_DATA_FORMAT_32_32_32] = { 12, 3, 4, V_008F0C_BUF_DATA_FORMAT_32 },
|
||||
[V_008F0C_BUF_DATA_FORMAT_32_32_32_32] = { 16, 4, 4, V_008F0C_BUF_DATA_FORMAT_32 },
|
||||
[V_008F0C_BUF_DATA_FORMAT_INVALID] = {0, 4, 0, V_008F0C_BUF_DATA_FORMAT_INVALID},
|
||||
[V_008F0C_BUF_DATA_FORMAT_8] = {1, 1, 1, V_008F0C_BUF_DATA_FORMAT_8},
|
||||
[V_008F0C_BUF_DATA_FORMAT_16] = {2, 1, 2, V_008F0C_BUF_DATA_FORMAT_16},
|
||||
[V_008F0C_BUF_DATA_FORMAT_8_8] = {2, 2, 1, V_008F0C_BUF_DATA_FORMAT_8},
|
||||
[V_008F0C_BUF_DATA_FORMAT_32] = {4, 1, 4, V_008F0C_BUF_DATA_FORMAT_32},
|
||||
[V_008F0C_BUF_DATA_FORMAT_16_16] = {4, 2, 2, V_008F0C_BUF_DATA_FORMAT_16},
|
||||
[V_008F0C_BUF_DATA_FORMAT_10_11_11] = {4, 3, 0, V_008F0C_BUF_DATA_FORMAT_10_11_11},
|
||||
[V_008F0C_BUF_DATA_FORMAT_11_11_10] = {4, 3, 0, V_008F0C_BUF_DATA_FORMAT_11_11_10},
|
||||
[V_008F0C_BUF_DATA_FORMAT_10_10_10_2] = {4, 4, 0, V_008F0C_BUF_DATA_FORMAT_10_10_10_2},
|
||||
[V_008F0C_BUF_DATA_FORMAT_2_10_10_10] = {4, 4, 0, V_008F0C_BUF_DATA_FORMAT_2_10_10_10},
|
||||
[V_008F0C_BUF_DATA_FORMAT_8_8_8_8] = {4, 4, 1, V_008F0C_BUF_DATA_FORMAT_8},
|
||||
[V_008F0C_BUF_DATA_FORMAT_32_32] = {8, 2, 4, V_008F0C_BUF_DATA_FORMAT_32},
|
||||
[V_008F0C_BUF_DATA_FORMAT_16_16_16_16] = {8, 4, 2, V_008F0C_BUF_DATA_FORMAT_16},
|
||||
[V_008F0C_BUF_DATA_FORMAT_32_32_32] = {12, 3, 4, V_008F0C_BUF_DATA_FORMAT_32},
|
||||
[V_008F0C_BUF_DATA_FORMAT_32_32_32_32] = {16, 4, 4, V_008F0C_BUF_DATA_FORMAT_32},
|
||||
};
|
||||
|
||||
const struct ac_data_format_info *
|
||||
ac_get_data_format_info(unsigned dfmt)
|
||||
const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt)
|
||||
{
|
||||
assert(dfmt < ARRAY_SIZE(data_format_table));
|
||||
return &data_format_table[dfmt];
|
||||
assert(dfmt < ARRAY_SIZE(data_format_table));
|
||||
return &data_format_table[dfmt];
|
||||
}
|
||||
|
||||
enum ac_image_dim
|
||||
ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampler_dim dim,
|
||||
bool is_array)
|
||||
enum ac_image_dim ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampler_dim dim,
|
||||
bool is_array)
|
||||
{
|
||||
switch (dim) {
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
if (chip_class == GFX9)
|
||||
return is_array ? ac_image_2darray : ac_image_2d;
|
||||
return is_array ? ac_image_1darray : ac_image_1d;
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
case GLSL_SAMPLER_DIM_RECT:
|
||||
case GLSL_SAMPLER_DIM_EXTERNAL:
|
||||
return is_array ? ac_image_2darray : ac_image_2d;
|
||||
case GLSL_SAMPLER_DIM_3D:
|
||||
return ac_image_3d;
|
||||
case GLSL_SAMPLER_DIM_CUBE:
|
||||
return ac_image_cube;
|
||||
case GLSL_SAMPLER_DIM_MS:
|
||||
return is_array ? ac_image_2darraymsaa : ac_image_2dmsaa;
|
||||
case GLSL_SAMPLER_DIM_SUBPASS:
|
||||
return ac_image_2darray;
|
||||
case GLSL_SAMPLER_DIM_SUBPASS_MS:
|
||||
return ac_image_2darraymsaa;
|
||||
default:
|
||||
unreachable("bad sampler dim");
|
||||
}
|
||||
switch (dim) {
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
if (chip_class == GFX9)
|
||||
return is_array ? ac_image_2darray : ac_image_2d;
|
||||
return is_array ? ac_image_1darray : ac_image_1d;
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
case GLSL_SAMPLER_DIM_RECT:
|
||||
case GLSL_SAMPLER_DIM_EXTERNAL:
|
||||
return is_array ? ac_image_2darray : ac_image_2d;
|
||||
case GLSL_SAMPLER_DIM_3D:
|
||||
return ac_image_3d;
|
||||
case GLSL_SAMPLER_DIM_CUBE:
|
||||
return ac_image_cube;
|
||||
case GLSL_SAMPLER_DIM_MS:
|
||||
return is_array ? ac_image_2darraymsaa : ac_image_2dmsaa;
|
||||
case GLSL_SAMPLER_DIM_SUBPASS:
|
||||
return ac_image_2darray;
|
||||
case GLSL_SAMPLER_DIM_SUBPASS_MS:
|
||||
return ac_image_2darraymsaa;
|
||||
default:
|
||||
unreachable("bad sampler dim");
|
||||
}
|
||||
}
|
||||
|
||||
enum ac_image_dim
|
||||
ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler_dim sdim,
|
||||
bool is_array)
|
||||
enum ac_image_dim ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler_dim sdim,
|
||||
bool is_array)
|
||||
{
|
||||
enum ac_image_dim dim = ac_get_sampler_dim(chip_class, sdim, is_array);
|
||||
enum ac_image_dim dim = ac_get_sampler_dim(chip_class, sdim, is_array);
|
||||
|
||||
/* Match the resource type set in the descriptor. */
|
||||
if (dim == ac_image_cube ||
|
||||
(chip_class <= GFX8 && dim == ac_image_3d))
|
||||
dim = ac_image_2darray;
|
||||
else if (sdim == GLSL_SAMPLER_DIM_2D && !is_array && chip_class == GFX9) {
|
||||
/* When a single layer of a 3D texture is bound, the shader
|
||||
* will refer to a 2D target, but the descriptor has a 3D type.
|
||||
* Since the HW ignores BASE_ARRAY in this case, we need to
|
||||
* send 3 coordinates. This doesn't hurt when the underlying
|
||||
* texture is non-3D.
|
||||
*/
|
||||
dim = ac_image_3d;
|
||||
}
|
||||
/* Match the resource type set in the descriptor. */
|
||||
if (dim == ac_image_cube || (chip_class <= GFX8 && dim == ac_image_3d))
|
||||
dim = ac_image_2darray;
|
||||
else if (sdim == GLSL_SAMPLER_DIM_2D && !is_array && chip_class == GFX9) {
|
||||
/* When a single layer of a 3D texture is bound, the shader
|
||||
* will refer to a 2D target, but the descriptor has a 3D type.
|
||||
* Since the HW ignores BASE_ARRAY in this case, we need to
|
||||
* send 3 coordinates. This doesn't hurt when the underlying
|
||||
* texture is non-3D.
|
||||
*/
|
||||
dim = ac_image_3d;
|
||||
}
|
||||
|
||||
return dim;
|
||||
return dim;
|
||||
}
|
||||
|
||||
unsigned
|
||||
ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
|
||||
signed char *face_vgpr_index_ptr,
|
||||
signed char *ancillary_vgpr_index_ptr)
|
||||
unsigned ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
|
||||
signed char *face_vgpr_index_ptr,
|
||||
signed char *ancillary_vgpr_index_ptr)
|
||||
{
|
||||
unsigned num_input_vgprs = 0;
|
||||
signed char face_vgpr_index = -1;
|
||||
signed char ancillary_vgpr_index = -1;
|
||||
unsigned num_input_vgprs = 0;
|
||||
signed char face_vgpr_index = -1;
|
||||
signed char ancillary_vgpr_index = -1;
|
||||
|
||||
if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 2;
|
||||
if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 2;
|
||||
if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 2;
|
||||
if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 3;
|
||||
if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 2;
|
||||
if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 2;
|
||||
if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 2;
|
||||
if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 1;
|
||||
if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 1;
|
||||
if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 1;
|
||||
if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 1;
|
||||
if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 1;
|
||||
if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr)) {
|
||||
face_vgpr_index = num_input_vgprs;
|
||||
num_input_vgprs += 1;
|
||||
}
|
||||
if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr)) {
|
||||
ancillary_vgpr_index = num_input_vgprs;
|
||||
num_input_vgprs += 1;
|
||||
}
|
||||
if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 1;
|
||||
if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 1;
|
||||
if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 2;
|
||||
if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 2;
|
||||
if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 2;
|
||||
if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 3;
|
||||
if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 2;
|
||||
if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 2;
|
||||
if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 2;
|
||||
if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 1;
|
||||
if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 1;
|
||||
if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 1;
|
||||
if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 1;
|
||||
if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 1;
|
||||
if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr)) {
|
||||
face_vgpr_index = num_input_vgprs;
|
||||
num_input_vgprs += 1;
|
||||
}
|
||||
if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr)) {
|
||||
ancillary_vgpr_index = num_input_vgprs;
|
||||
num_input_vgprs += 1;
|
||||
}
|
||||
if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 1;
|
||||
if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
|
||||
num_input_vgprs += 1;
|
||||
|
||||
if (face_vgpr_index_ptr)
|
||||
*face_vgpr_index_ptr = face_vgpr_index;
|
||||
if (ancillary_vgpr_index_ptr)
|
||||
*ancillary_vgpr_index_ptr = ancillary_vgpr_index;
|
||||
if (face_vgpr_index_ptr)
|
||||
*face_vgpr_index_ptr = face_vgpr_index;
|
||||
if (ancillary_vgpr_index_ptr)
|
||||
*ancillary_vgpr_index_ptr = ancillary_vgpr_index;
|
||||
|
||||
return num_input_vgprs;
|
||||
return num_input_vgprs;
|
||||
}
|
||||
|
||||
void ac_choose_spi_color_formats(unsigned format, unsigned swap,
|
||||
unsigned ntype, bool is_depth,
|
||||
struct ac_spi_color_formats *formats)
|
||||
void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, bool is_depth,
|
||||
struct ac_spi_color_formats *formats)
|
||||
{
|
||||
/* Alpha is needed for alpha-to-coverage.
|
||||
* Blending may be with or without alpha.
|
||||
|
|
|
|||
|
|
@ -24,75 +24,64 @@
|
|||
#ifndef AC_SHADER_UTIL_H
|
||||
#define AC_SHADER_UTIL_H
|
||||
|
||||
#include "ac_binary.h"
|
||||
#include "amd_family.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "amd_family.h"
|
||||
#include "ac_binary.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum ac_image_dim {
|
||||
ac_image_1d,
|
||||
ac_image_2d,
|
||||
ac_image_3d,
|
||||
ac_image_cube, // includes cube arrays
|
||||
ac_image_1darray,
|
||||
ac_image_2darray,
|
||||
ac_image_2dmsaa,
|
||||
ac_image_2darraymsaa,
|
||||
enum ac_image_dim
|
||||
{
|
||||
ac_image_1d,
|
||||
ac_image_2d,
|
||||
ac_image_3d,
|
||||
ac_image_cube, // includes cube arrays
|
||||
ac_image_1darray,
|
||||
ac_image_2darray,
|
||||
ac_image_2dmsaa,
|
||||
ac_image_2darraymsaa,
|
||||
};
|
||||
|
||||
struct ac_data_format_info {
|
||||
uint8_t element_size;
|
||||
uint8_t num_channels;
|
||||
uint8_t chan_byte_size;
|
||||
uint8_t chan_format;
|
||||
uint8_t element_size;
|
||||
uint8_t num_channels;
|
||||
uint8_t chan_byte_size;
|
||||
uint8_t chan_format;
|
||||
};
|
||||
|
||||
struct ac_spi_color_formats {
|
||||
unsigned normal : 8;
|
||||
unsigned alpha : 8;
|
||||
unsigned blend : 8;
|
||||
unsigned blend_alpha : 8;
|
||||
unsigned normal : 8;
|
||||
unsigned alpha : 8;
|
||||
unsigned blend : 8;
|
||||
unsigned blend_alpha : 8;
|
||||
};
|
||||
|
||||
unsigned
|
||||
ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
|
||||
bool writes_samplemask);
|
||||
unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask);
|
||||
|
||||
unsigned
|
||||
ac_get_cb_shader_mask(unsigned spi_shader_col_format);
|
||||
unsigned ac_get_cb_shader_mask(unsigned spi_shader_col_format);
|
||||
|
||||
uint32_t
|
||||
ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class);
|
||||
uint32_t ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class);
|
||||
|
||||
unsigned
|
||||
ac_get_tbuffer_format(enum chip_class chip_class,
|
||||
unsigned dfmt, unsigned nfmt);
|
||||
unsigned ac_get_tbuffer_format(enum chip_class chip_class, unsigned dfmt, unsigned nfmt);
|
||||
|
||||
const struct ac_data_format_info *
|
||||
ac_get_data_format_info(unsigned dfmt);
|
||||
const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt);
|
||||
|
||||
enum ac_image_dim
|
||||
ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampler_dim dim,
|
||||
bool is_array);
|
||||
enum ac_image_dim ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampler_dim dim,
|
||||
bool is_array);
|
||||
|
||||
enum ac_image_dim
|
||||
ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler_dim sdim,
|
||||
bool is_array);
|
||||
enum ac_image_dim ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler_dim sdim,
|
||||
bool is_array);
|
||||
|
||||
unsigned
|
||||
ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
|
||||
signed char *face_vgpr_index,
|
||||
signed char *ancillary_vgpr_index);
|
||||
unsigned ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
|
||||
signed char *face_vgpr_index, signed char *ancillary_vgpr_index);
|
||||
|
||||
void ac_choose_spi_color_formats(unsigned format, unsigned swap,
|
||||
unsigned ntype, bool is_depth,
|
||||
struct ac_spi_color_formats *formats);
|
||||
void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, bool is_depth,
|
||||
struct ac_spi_color_formats *formats);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -35,7 +35,8 @@ struct ac_reg_range {
|
|||
unsigned size;
|
||||
};
|
||||
|
||||
enum ac_reg_range_type {
|
||||
enum ac_reg_range_type
|
||||
{
|
||||
SI_REG_RANGE_UCONFIG,
|
||||
SI_REG_RANGE_CONTEXT,
|
||||
SI_REG_RANGE_SH,
|
||||
|
|
@ -46,14 +47,13 @@ enum ac_reg_range_type {
|
|||
SI_NUM_ALL_REG_RANGES,
|
||||
};
|
||||
|
||||
typedef void (*set_context_reg_seq_array_fn)(struct radeon_cmdbuf *cs, unsigned reg,
|
||||
unsigned num, const uint32_t *values);
|
||||
typedef void (*set_context_reg_seq_array_fn)(struct radeon_cmdbuf *cs, unsigned reg, unsigned num,
|
||||
const uint32_t *values);
|
||||
|
||||
void ac_get_reg_ranges(enum chip_class chip_class, enum radeon_family family,
|
||||
enum ac_reg_range_type type, unsigned *num_ranges,
|
||||
const struct ac_reg_range **ranges);
|
||||
void ac_emulate_clear_state(const struct radeon_info *info,
|
||||
struct radeon_cmdbuf *cs,
|
||||
void ac_emulate_clear_state(const struct radeon_info *info, struct radeon_cmdbuf *cs,
|
||||
set_context_reg_seq_array_fn set_context_reg_seq_array);
|
||||
void ac_check_shadowed_regs(enum chip_class chip_class, enum radeon_family family,
|
||||
unsigned reg_offset, unsigned count);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -26,11 +26,11 @@
|
|||
#ifndef AC_SURFACE_H
|
||||
#define AC_SURFACE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "amd_family.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
|
@ -41,280 +41,274 @@ struct ac_addrlib;
|
|||
struct amdgpu_gpu_info;
|
||||
struct radeon_info;
|
||||
|
||||
#define RADEON_SURF_MAX_LEVELS 15
|
||||
#define RADEON_SURF_MAX_LEVELS 15
|
||||
|
||||
enum radeon_surf_mode {
|
||||
RADEON_SURF_MODE_LINEAR_ALIGNED = 1,
|
||||
RADEON_SURF_MODE_1D = 2,
|
||||
RADEON_SURF_MODE_2D = 3,
|
||||
enum radeon_surf_mode
|
||||
{
|
||||
RADEON_SURF_MODE_LINEAR_ALIGNED = 1,
|
||||
RADEON_SURF_MODE_1D = 2,
|
||||
RADEON_SURF_MODE_2D = 3,
|
||||
};
|
||||
|
||||
/* This describes D/S/Z/R swizzle modes.
|
||||
* Defined in the GB_TILE_MODEn.MICRO_TILE_MODE_NEW order.
|
||||
*/
|
||||
enum radeon_micro_mode {
|
||||
RADEON_MICRO_MODE_DISPLAY = 0,
|
||||
RADEON_MICRO_MODE_STANDARD = 1,
|
||||
RADEON_MICRO_MODE_DEPTH = 2,
|
||||
RADEON_MICRO_MODE_RENDER = 3, /* gfx9 and older: rotated */
|
||||
enum radeon_micro_mode
|
||||
{
|
||||
RADEON_MICRO_MODE_DISPLAY = 0,
|
||||
RADEON_MICRO_MODE_STANDARD = 1,
|
||||
RADEON_MICRO_MODE_DEPTH = 2,
|
||||
RADEON_MICRO_MODE_RENDER = 3, /* gfx9 and older: rotated */
|
||||
};
|
||||
|
||||
/* the first 16 bits are reserved for libdrm_radeon, don't use them */
|
||||
#define RADEON_SURF_SCANOUT (1 << 16)
|
||||
#define RADEON_SURF_ZBUFFER (1 << 17)
|
||||
#define RADEON_SURF_SBUFFER (1 << 18)
|
||||
#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
|
||||
#define RADEON_SURF_SCANOUT (1 << 16)
|
||||
#define RADEON_SURF_ZBUFFER (1 << 17)
|
||||
#define RADEON_SURF_SBUFFER (1 << 18)
|
||||
#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
|
||||
/* bits 19 and 20 are reserved for libdrm_radeon, don't use them */
|
||||
#define RADEON_SURF_FMASK (1 << 21)
|
||||
#define RADEON_SURF_DISABLE_DCC (1 << 22)
|
||||
#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
|
||||
#define RADEON_SURF_IMPORTED (1 << 24)
|
||||
#define RADEON_SURF_CONTIGUOUS_DCC_LAYERS (1 << 25)
|
||||
#define RADEON_SURF_SHAREABLE (1 << 26)
|
||||
#define RADEON_SURF_NO_RENDER_TARGET (1 << 27)
|
||||
#define RADEON_SURF_FMASK (1 << 21)
|
||||
#define RADEON_SURF_DISABLE_DCC (1 << 22)
|
||||
#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
|
||||
#define RADEON_SURF_IMPORTED (1 << 24)
|
||||
#define RADEON_SURF_CONTIGUOUS_DCC_LAYERS (1 << 25)
|
||||
#define RADEON_SURF_SHAREABLE (1 << 26)
|
||||
#define RADEON_SURF_NO_RENDER_TARGET (1 << 27)
|
||||
/* Force a swizzle mode (gfx9+) or tile mode (gfx6-8).
|
||||
* If this is not set, optimize for space. */
|
||||
#define RADEON_SURF_FORCE_SWIZZLE_MODE (1 << 28)
|
||||
#define RADEON_SURF_NO_FMASK (1 << 29)
|
||||
#define RADEON_SURF_NO_HTILE (1 << 30)
|
||||
#define RADEON_SURF_FORCE_MICRO_TILE_MODE (1u << 31)
|
||||
#define RADEON_SURF_FORCE_SWIZZLE_MODE (1 << 28)
|
||||
#define RADEON_SURF_NO_FMASK (1 << 29)
|
||||
#define RADEON_SURF_NO_HTILE (1 << 30)
|
||||
#define RADEON_SURF_FORCE_MICRO_TILE_MODE (1u << 31)
|
||||
|
||||
struct legacy_surf_level {
|
||||
uint64_t offset;
|
||||
uint32_t slice_size_dw; /* in dwords; max = 4GB / 4. */
|
||||
uint32_t dcc_offset; /* relative offset within DCC mip tree */
|
||||
uint32_t dcc_fast_clear_size;
|
||||
uint32_t dcc_slice_fast_clear_size;
|
||||
unsigned nblk_x:15;
|
||||
unsigned nblk_y:15;
|
||||
enum radeon_surf_mode mode:2;
|
||||
uint64_t offset;
|
||||
uint32_t slice_size_dw; /* in dwords; max = 4GB / 4. */
|
||||
uint32_t dcc_offset; /* relative offset within DCC mip tree */
|
||||
uint32_t dcc_fast_clear_size;
|
||||
uint32_t dcc_slice_fast_clear_size;
|
||||
unsigned nblk_x : 15;
|
||||
unsigned nblk_y : 15;
|
||||
enum radeon_surf_mode mode : 2;
|
||||
};
|
||||
|
||||
struct legacy_surf_fmask {
|
||||
unsigned slice_tile_max; /* max 4M */
|
||||
uint8_t tiling_index; /* max 31 */
|
||||
uint8_t bankh; /* max 8 */
|
||||
uint16_t pitch_in_pixels;
|
||||
uint64_t slice_size;
|
||||
unsigned slice_tile_max; /* max 4M */
|
||||
uint8_t tiling_index; /* max 31 */
|
||||
uint8_t bankh; /* max 8 */
|
||||
uint16_t pitch_in_pixels;
|
||||
uint64_t slice_size;
|
||||
};
|
||||
|
||||
struct legacy_surf_layout {
|
||||
unsigned bankw:4; /* max 8 */
|
||||
unsigned bankh:4; /* max 8 */
|
||||
unsigned mtilea:4; /* max 8 */
|
||||
unsigned tile_split:13; /* max 4K */
|
||||
unsigned stencil_tile_split:13; /* max 4K */
|
||||
unsigned pipe_config:5; /* max 17 */
|
||||
unsigned num_banks:5; /* max 16 */
|
||||
unsigned macro_tile_index:4; /* max 15 */
|
||||
unsigned bankw : 4; /* max 8 */
|
||||
unsigned bankh : 4; /* max 8 */
|
||||
unsigned mtilea : 4; /* max 8 */
|
||||
unsigned tile_split : 13; /* max 4K */
|
||||
unsigned stencil_tile_split : 13; /* max 4K */
|
||||
unsigned pipe_config : 5; /* max 17 */
|
||||
unsigned num_banks : 5; /* max 16 */
|
||||
unsigned macro_tile_index : 4; /* max 15 */
|
||||
|
||||
/* Whether the depth miptree or stencil miptree as used by the DB are
|
||||
* adjusted from their TC compatible form to ensure depth/stencil
|
||||
* compatibility. If either is true, the corresponding plane cannot be
|
||||
* sampled from.
|
||||
*/
|
||||
unsigned depth_adjusted:1;
|
||||
unsigned stencil_adjusted:1;
|
||||
/* Whether the depth miptree or stencil miptree as used by the DB are
|
||||
* adjusted from their TC compatible form to ensure depth/stencil
|
||||
* compatibility. If either is true, the corresponding plane cannot be
|
||||
* sampled from.
|
||||
*/
|
||||
unsigned depth_adjusted : 1;
|
||||
unsigned stencil_adjusted : 1;
|
||||
|
||||
struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS];
|
||||
struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
|
||||
uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
|
||||
uint8_t stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
|
||||
struct legacy_surf_fmask fmask;
|
||||
unsigned cmask_slice_tile_max;
|
||||
struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS];
|
||||
struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
|
||||
uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
|
||||
uint8_t stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
|
||||
struct legacy_surf_fmask fmask;
|
||||
unsigned cmask_slice_tile_max;
|
||||
};
|
||||
|
||||
/* Same as addrlib - AddrResourceType. */
|
||||
enum gfx9_resource_type {
|
||||
RADEON_RESOURCE_1D = 0,
|
||||
RADEON_RESOURCE_2D,
|
||||
RADEON_RESOURCE_3D,
|
||||
enum gfx9_resource_type
|
||||
{
|
||||
RADEON_RESOURCE_1D = 0,
|
||||
RADEON_RESOURCE_2D,
|
||||
RADEON_RESOURCE_3D,
|
||||
};
|
||||
|
||||
struct gfx9_surf_flags {
|
||||
uint16_t swizzle_mode; /* tile mode */
|
||||
uint16_t epitch; /* (pitch - 1) or (height - 1) */
|
||||
uint16_t swizzle_mode; /* tile mode */
|
||||
uint16_t epitch; /* (pitch - 1) or (height - 1) */
|
||||
};
|
||||
|
||||
struct gfx9_surf_meta_flags {
|
||||
unsigned rb_aligned:1; /* optimal for RBs */
|
||||
unsigned pipe_aligned:1; /* optimal for TC */
|
||||
unsigned independent_64B_blocks:1;
|
||||
unsigned independent_128B_blocks:1;
|
||||
unsigned max_compressed_block_size:2;
|
||||
unsigned rb_aligned : 1; /* optimal for RBs */
|
||||
unsigned pipe_aligned : 1; /* optimal for TC */
|
||||
unsigned independent_64B_blocks : 1;
|
||||
unsigned independent_128B_blocks : 1;
|
||||
unsigned max_compressed_block_size : 2;
|
||||
};
|
||||
|
||||
struct gfx9_surf_layout {
|
||||
struct gfx9_surf_flags surf; /* color or depth surface */
|
||||
struct gfx9_surf_flags fmask; /* not added to surf_size */
|
||||
struct gfx9_surf_flags stencil; /* added to surf_size, use stencil_offset */
|
||||
struct gfx9_surf_flags surf; /* color or depth surface */
|
||||
struct gfx9_surf_flags fmask; /* not added to surf_size */
|
||||
struct gfx9_surf_flags stencil; /* added to surf_size, use stencil_offset */
|
||||
|
||||
struct gfx9_surf_meta_flags dcc; /* metadata of color */
|
||||
struct gfx9_surf_meta_flags dcc; /* metadata of color */
|
||||
|
||||
enum gfx9_resource_type resource_type; /* 1D, 2D or 3D */
|
||||
uint16_t surf_pitch; /* in blocks */
|
||||
uint16_t surf_height;
|
||||
enum gfx9_resource_type resource_type; /* 1D, 2D or 3D */
|
||||
uint16_t surf_pitch; /* in blocks */
|
||||
uint16_t surf_height;
|
||||
|
||||
uint64_t surf_offset; /* 0 unless imported with an offset */
|
||||
/* The size of the 2D plane containing all mipmap levels. */
|
||||
uint64_t surf_slice_size;
|
||||
/* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
|
||||
uint32_t offset[RADEON_SURF_MAX_LEVELS];
|
||||
/* Mipmap level pitch in elements. Only valid for LINEAR. */
|
||||
uint16_t pitch[RADEON_SURF_MAX_LEVELS];
|
||||
uint64_t surf_offset; /* 0 unless imported with an offset */
|
||||
/* The size of the 2D plane containing all mipmap levels. */
|
||||
uint64_t surf_slice_size;
|
||||
/* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
|
||||
uint32_t offset[RADEON_SURF_MAX_LEVELS];
|
||||
/* Mipmap level pitch in elements. Only valid for LINEAR. */
|
||||
uint16_t pitch[RADEON_SURF_MAX_LEVELS];
|
||||
|
||||
uint64_t stencil_offset; /* separate stencil */
|
||||
uint64_t stencil_offset; /* separate stencil */
|
||||
|
||||
uint8_t dcc_block_width;
|
||||
uint8_t dcc_block_height;
|
||||
uint8_t dcc_block_depth;
|
||||
uint8_t dcc_block_width;
|
||||
uint8_t dcc_block_height;
|
||||
uint8_t dcc_block_depth;
|
||||
|
||||
/* Displayable DCC. This is always rb_aligned=0 and pipe_aligned=0.
|
||||
* The 3D engine doesn't support that layout except for chips with 1 RB.
|
||||
* All other chips must set rb_aligned=1.
|
||||
* A compute shader needs to convert from aligned DCC to unaligned.
|
||||
*/
|
||||
uint32_t display_dcc_size;
|
||||
uint32_t display_dcc_alignment;
|
||||
uint16_t display_dcc_pitch_max; /* (mip chain pitch - 1) */
|
||||
bool dcc_retile_use_uint16; /* if all values fit into uint16_t */
|
||||
uint32_t dcc_retile_num_elements;
|
||||
void *dcc_retile_map;
|
||||
/* Displayable DCC. This is always rb_aligned=0 and pipe_aligned=0.
|
||||
* The 3D engine doesn't support that layout except for chips with 1 RB.
|
||||
* All other chips must set rb_aligned=1.
|
||||
* A compute shader needs to convert from aligned DCC to unaligned.
|
||||
*/
|
||||
uint32_t display_dcc_size;
|
||||
uint32_t display_dcc_alignment;
|
||||
uint16_t display_dcc_pitch_max; /* (mip chain pitch - 1) */
|
||||
bool dcc_retile_use_uint16; /* if all values fit into uint16_t */
|
||||
uint32_t dcc_retile_num_elements;
|
||||
void *dcc_retile_map;
|
||||
};
|
||||
|
||||
struct radeon_surf {
|
||||
/* Format properties. */
|
||||
unsigned blk_w:4;
|
||||
unsigned blk_h:4;
|
||||
unsigned bpe:5;
|
||||
/* Number of mipmap levels where DCC is enabled starting from level 0.
|
||||
* Non-zero levels may be disabled due to alignment constraints, but not
|
||||
* the first level.
|
||||
*/
|
||||
unsigned num_dcc_levels:4;
|
||||
unsigned is_linear:1;
|
||||
unsigned has_stencil:1;
|
||||
/* This might be true even if micro_tile_mode isn't displayable or rotated. */
|
||||
unsigned is_displayable:1;
|
||||
/* Displayable, thin, depth, rotated. AKA D,S,Z,R swizzle modes. */
|
||||
unsigned micro_tile_mode:3;
|
||||
uint32_t flags;
|
||||
/* Format properties. */
|
||||
unsigned blk_w : 4;
|
||||
unsigned blk_h : 4;
|
||||
unsigned bpe : 5;
|
||||
/* Number of mipmap levels where DCC is enabled starting from level 0.
|
||||
* Non-zero levels may be disabled due to alignment constraints, but not
|
||||
* the first level.
|
||||
*/
|
||||
unsigned num_dcc_levels : 4;
|
||||
unsigned is_linear : 1;
|
||||
unsigned has_stencil : 1;
|
||||
/* This might be true even if micro_tile_mode isn't displayable or rotated. */
|
||||
unsigned is_displayable : 1;
|
||||
/* Displayable, thin, depth, rotated. AKA D,S,Z,R swizzle modes. */
|
||||
unsigned micro_tile_mode : 3;
|
||||
uint32_t flags;
|
||||
|
||||
/* These are return values. Some of them can be set by the caller, but
|
||||
* they will be treated as hints (e.g. bankw, bankh) and might be
|
||||
* changed by the calculator.
|
||||
*/
|
||||
/* These are return values. Some of them can be set by the caller, but
|
||||
* they will be treated as hints (e.g. bankw, bankh) and might be
|
||||
* changed by the calculator.
|
||||
*/
|
||||
|
||||
/* Tile swizzle can be OR'd with low bits of the BASE_256B address.
|
||||
* The value is the same for all mipmap levels. Supported tile modes:
|
||||
* - GFX6: Only macro tiling.
|
||||
* - GFX9: Only *_X and *_T swizzle modes. Level 0 must not be in the mip
|
||||
* tail.
|
||||
*
|
||||
* Only these surfaces are allowed to set it:
|
||||
* - color (if it doesn't have to be displayable)
|
||||
* - DCC (same tile swizzle as color)
|
||||
* - FMASK
|
||||
* - CMASK if it's TC-compatible or if the gen is GFX9
|
||||
* - depth/stencil if HTILE is not TC-compatible and if the gen is not GFX9
|
||||
*/
|
||||
uint8_t tile_swizzle;
|
||||
uint8_t fmask_tile_swizzle;
|
||||
/* Tile swizzle can be OR'd with low bits of the BASE_256B address.
|
||||
* The value is the same for all mipmap levels. Supported tile modes:
|
||||
* - GFX6: Only macro tiling.
|
||||
* - GFX9: Only *_X and *_T swizzle modes. Level 0 must not be in the mip
|
||||
* tail.
|
||||
*
|
||||
* Only these surfaces are allowed to set it:
|
||||
* - color (if it doesn't have to be displayable)
|
||||
* - DCC (same tile swizzle as color)
|
||||
* - FMASK
|
||||
* - CMASK if it's TC-compatible or if the gen is GFX9
|
||||
* - depth/stencil if HTILE is not TC-compatible and if the gen is not GFX9
|
||||
*/
|
||||
uint8_t tile_swizzle;
|
||||
uint8_t fmask_tile_swizzle;
|
||||
|
||||
uint64_t surf_size;
|
||||
uint64_t fmask_size;
|
||||
uint32_t surf_alignment;
|
||||
uint32_t fmask_alignment;
|
||||
uint64_t surf_size;
|
||||
uint64_t fmask_size;
|
||||
uint32_t surf_alignment;
|
||||
uint32_t fmask_alignment;
|
||||
|
||||
/* DCC and HTILE are very small. */
|
||||
uint32_t dcc_size;
|
||||
uint32_t dcc_slice_size;
|
||||
uint32_t dcc_alignment;
|
||||
/* DCC and HTILE are very small. */
|
||||
uint32_t dcc_size;
|
||||
uint32_t dcc_slice_size;
|
||||
uint32_t dcc_alignment;
|
||||
|
||||
uint32_t htile_size;
|
||||
uint32_t htile_slice_size;
|
||||
uint32_t htile_alignment;
|
||||
uint32_t htile_size;
|
||||
uint32_t htile_slice_size;
|
||||
uint32_t htile_alignment;
|
||||
|
||||
uint32_t cmask_size;
|
||||
uint32_t cmask_slice_size;
|
||||
uint32_t cmask_alignment;
|
||||
uint32_t cmask_size;
|
||||
uint32_t cmask_slice_size;
|
||||
uint32_t cmask_alignment;
|
||||
|
||||
/* All buffers combined. */
|
||||
uint64_t htile_offset;
|
||||
uint64_t fmask_offset;
|
||||
uint64_t cmask_offset;
|
||||
uint64_t dcc_offset;
|
||||
uint64_t display_dcc_offset;
|
||||
uint64_t dcc_retile_map_offset;
|
||||
uint64_t total_size;
|
||||
uint32_t alignment;
|
||||
/* All buffers combined. */
|
||||
uint64_t htile_offset;
|
||||
uint64_t fmask_offset;
|
||||
uint64_t cmask_offset;
|
||||
uint64_t dcc_offset;
|
||||
uint64_t display_dcc_offset;
|
||||
uint64_t dcc_retile_map_offset;
|
||||
uint64_t total_size;
|
||||
uint32_t alignment;
|
||||
|
||||
union {
|
||||
/* Return values for GFX8 and older.
|
||||
*
|
||||
* Some of them can be set by the caller if certain parameters are
|
||||
* desirable. The allocator will try to obey them.
|
||||
*/
|
||||
struct legacy_surf_layout legacy;
|
||||
union {
|
||||
/* Return values for GFX8 and older.
|
||||
*
|
||||
* Some of them can be set by the caller if certain parameters are
|
||||
* desirable. The allocator will try to obey them.
|
||||
*/
|
||||
struct legacy_surf_layout legacy;
|
||||
|
||||
/* GFX9+ return values. */
|
||||
struct gfx9_surf_layout gfx9;
|
||||
} u;
|
||||
/* GFX9+ return values. */
|
||||
struct gfx9_surf_layout gfx9;
|
||||
} u;
|
||||
};
|
||||
|
||||
struct ac_surf_info {
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
uint32_t depth;
|
||||
uint8_t samples; /* For Z/S: samples; For color: FMASK coverage samples */
|
||||
uint8_t storage_samples; /* For color: allocated samples */
|
||||
uint8_t levels;
|
||||
uint8_t num_channels; /* heuristic for displayability */
|
||||
uint16_t array_size;
|
||||
uint32_t *surf_index; /* Set a monotonic counter for tile swizzling. */
|
||||
uint32_t *fmask_surf_index;
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
uint32_t depth;
|
||||
uint8_t samples; /* For Z/S: samples; For color: FMASK coverage samples */
|
||||
uint8_t storage_samples; /* For color: allocated samples */
|
||||
uint8_t levels;
|
||||
uint8_t num_channels; /* heuristic for displayability */
|
||||
uint16_t array_size;
|
||||
uint32_t *surf_index; /* Set a monotonic counter for tile swizzling. */
|
||||
uint32_t *fmask_surf_index;
|
||||
};
|
||||
|
||||
struct ac_surf_config {
|
||||
struct ac_surf_info info;
|
||||
unsigned is_1d : 1;
|
||||
unsigned is_3d : 1;
|
||||
unsigned is_cube : 1;
|
||||
struct ac_surf_info info;
|
||||
unsigned is_1d : 1;
|
||||
unsigned is_3d : 1;
|
||||
unsigned is_cube : 1;
|
||||
};
|
||||
|
||||
struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info,
|
||||
const struct amdgpu_gpu_info *amdinfo,
|
||||
uint64_t *max_alignment);
|
||||
const struct amdgpu_gpu_info *amdinfo,
|
||||
uint64_t *max_alignment);
|
||||
void ac_addrlib_destroy(struct ac_addrlib *addrlib);
|
||||
|
||||
int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
|
||||
const struct ac_surf_config * config,
|
||||
enum radeon_surf_mode mode,
|
||||
struct radeon_surf *surf);
|
||||
const struct ac_surf_config *config, enum radeon_surf_mode mode,
|
||||
struct radeon_surf *surf);
|
||||
void ac_surface_zero_dcc_fields(struct radeon_surf *surf);
|
||||
|
||||
void ac_surface_set_bo_metadata(const struct radeon_info *info,
|
||||
struct radeon_surf *surf, uint64_t tiling_flags,
|
||||
enum radeon_surf_mode *mode);
|
||||
void ac_surface_get_bo_metadata(const struct radeon_info *info,
|
||||
struct radeon_surf *surf, uint64_t *tiling_flags);
|
||||
void ac_surface_set_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
|
||||
uint64_t tiling_flags, enum radeon_surf_mode *mode);
|
||||
void ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
|
||||
uint64_t *tiling_flags);
|
||||
|
||||
bool ac_surface_set_umd_metadata(const struct radeon_info *info,
|
||||
struct radeon_surf *surf,
|
||||
unsigned num_storage_samples,
|
||||
unsigned num_mipmap_levels,
|
||||
unsigned size_metadata,
|
||||
uint32_t metadata[64]);
|
||||
void ac_surface_get_umd_metadata(const struct radeon_info *info,
|
||||
struct radeon_surf *surf,
|
||||
unsigned num_mipmap_levels,
|
||||
uint32_t desc[8],
|
||||
bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
|
||||
unsigned num_storage_samples, unsigned num_mipmap_levels,
|
||||
unsigned size_metadata, uint32_t metadata[64]);
|
||||
void ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
|
||||
unsigned num_mipmap_levels, uint32_t desc[8],
|
||||
unsigned *size_metadata, uint32_t metadata[64]);
|
||||
|
||||
void ac_surface_override_offset_stride(const struct radeon_info *info,
|
||||
struct radeon_surf *surf,
|
||||
unsigned num_mipmap_levels,
|
||||
uint64_t offset, unsigned pitch);
|
||||
void ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf,
|
||||
unsigned num_mipmap_levels, uint64_t offset, unsigned pitch);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,117 +24,120 @@
|
|||
#ifndef AMD_FAMILY_H
|
||||
#define AMD_FAMILY_H
|
||||
|
||||
enum radeon_family {
|
||||
CHIP_UNKNOWN = 0,
|
||||
CHIP_R300, /* R3xx-based cores. (GFX2) */
|
||||
CHIP_R350,
|
||||
CHIP_RV350,
|
||||
CHIP_RV370,
|
||||
CHIP_RV380,
|
||||
CHIP_RS400,
|
||||
CHIP_RC410,
|
||||
CHIP_RS480,
|
||||
CHIP_R420, /* R4xx-based cores. (GFX2) */
|
||||
CHIP_R423,
|
||||
CHIP_R430,
|
||||
CHIP_R480,
|
||||
CHIP_R481,
|
||||
CHIP_RV410,
|
||||
CHIP_RS600,
|
||||
CHIP_RS690,
|
||||
CHIP_RS740,
|
||||
CHIP_RV515, /* R5xx-based cores. (GFX2) */
|
||||
CHIP_R520,
|
||||
CHIP_RV530,
|
||||
CHIP_R580,
|
||||
CHIP_RV560,
|
||||
CHIP_RV570,
|
||||
CHIP_R600, /* GFX3 (R6xx) */
|
||||
CHIP_RV610,
|
||||
CHIP_RV630,
|
||||
CHIP_RV670,
|
||||
CHIP_RV620,
|
||||
CHIP_RV635,
|
||||
CHIP_RS780,
|
||||
CHIP_RS880,
|
||||
CHIP_RV770, /* GFX3 (R7xx) */
|
||||
CHIP_RV730,
|
||||
CHIP_RV710,
|
||||
CHIP_RV740,
|
||||
CHIP_CEDAR, /* GFX4 (Evergreen) */
|
||||
CHIP_REDWOOD,
|
||||
CHIP_JUNIPER,
|
||||
CHIP_CYPRESS,
|
||||
CHIP_HEMLOCK,
|
||||
CHIP_PALM,
|
||||
CHIP_SUMO,
|
||||
CHIP_SUMO2,
|
||||
CHIP_BARTS,
|
||||
CHIP_TURKS,
|
||||
CHIP_CAICOS,
|
||||
CHIP_CAYMAN, /* GFX5 (Northern Islands) */
|
||||
CHIP_ARUBA,
|
||||
CHIP_TAHITI, /* GFX6 (Southern Islands) */
|
||||
CHIP_PITCAIRN,
|
||||
CHIP_VERDE,
|
||||
CHIP_OLAND,
|
||||
CHIP_HAINAN,
|
||||
CHIP_BONAIRE, /* GFX7 (Sea Islands) */
|
||||
CHIP_KAVERI,
|
||||
CHIP_KABINI,
|
||||
CHIP_HAWAII,
|
||||
CHIP_TONGA, /* GFX8 (Volcanic Islands & Polaris) */
|
||||
CHIP_ICELAND,
|
||||
CHIP_CARRIZO,
|
||||
CHIP_FIJI,
|
||||
CHIP_STONEY,
|
||||
CHIP_POLARIS10,
|
||||
CHIP_POLARIS11,
|
||||
CHIP_POLARIS12,
|
||||
CHIP_VEGAM,
|
||||
CHIP_VEGA10, /* GFX9 (Vega) */
|
||||
CHIP_VEGA12,
|
||||
CHIP_VEGA20,
|
||||
CHIP_RAVEN,
|
||||
CHIP_RAVEN2,
|
||||
CHIP_RENOIR,
|
||||
CHIP_ARCTURUS,
|
||||
CHIP_NAVI10,
|
||||
CHIP_NAVI12,
|
||||
CHIP_NAVI14,
|
||||
CHIP_SIENNA_CICHLID,
|
||||
CHIP_NAVY_FLOUNDER,
|
||||
CHIP_LAST,
|
||||
enum radeon_family
|
||||
{
|
||||
CHIP_UNKNOWN = 0,
|
||||
CHIP_R300, /* R3xx-based cores. (GFX2) */
|
||||
CHIP_R350,
|
||||
CHIP_RV350,
|
||||
CHIP_RV370,
|
||||
CHIP_RV380,
|
||||
CHIP_RS400,
|
||||
CHIP_RC410,
|
||||
CHIP_RS480,
|
||||
CHIP_R420, /* R4xx-based cores. (GFX2) */
|
||||
CHIP_R423,
|
||||
CHIP_R430,
|
||||
CHIP_R480,
|
||||
CHIP_R481,
|
||||
CHIP_RV410,
|
||||
CHIP_RS600,
|
||||
CHIP_RS690,
|
||||
CHIP_RS740,
|
||||
CHIP_RV515, /* R5xx-based cores. (GFX2) */
|
||||
CHIP_R520,
|
||||
CHIP_RV530,
|
||||
CHIP_R580,
|
||||
CHIP_RV560,
|
||||
CHIP_RV570,
|
||||
CHIP_R600, /* GFX3 (R6xx) */
|
||||
CHIP_RV610,
|
||||
CHIP_RV630,
|
||||
CHIP_RV670,
|
||||
CHIP_RV620,
|
||||
CHIP_RV635,
|
||||
CHIP_RS780,
|
||||
CHIP_RS880,
|
||||
CHIP_RV770, /* GFX3 (R7xx) */
|
||||
CHIP_RV730,
|
||||
CHIP_RV710,
|
||||
CHIP_RV740,
|
||||
CHIP_CEDAR, /* GFX4 (Evergreen) */
|
||||
CHIP_REDWOOD,
|
||||
CHIP_JUNIPER,
|
||||
CHIP_CYPRESS,
|
||||
CHIP_HEMLOCK,
|
||||
CHIP_PALM,
|
||||
CHIP_SUMO,
|
||||
CHIP_SUMO2,
|
||||
CHIP_BARTS,
|
||||
CHIP_TURKS,
|
||||
CHIP_CAICOS,
|
||||
CHIP_CAYMAN, /* GFX5 (Northern Islands) */
|
||||
CHIP_ARUBA,
|
||||
CHIP_TAHITI, /* GFX6 (Southern Islands) */
|
||||
CHIP_PITCAIRN,
|
||||
CHIP_VERDE,
|
||||
CHIP_OLAND,
|
||||
CHIP_HAINAN,
|
||||
CHIP_BONAIRE, /* GFX7 (Sea Islands) */
|
||||
CHIP_KAVERI,
|
||||
CHIP_KABINI,
|
||||
CHIP_HAWAII,
|
||||
CHIP_TONGA, /* GFX8 (Volcanic Islands & Polaris) */
|
||||
CHIP_ICELAND,
|
||||
CHIP_CARRIZO,
|
||||
CHIP_FIJI,
|
||||
CHIP_STONEY,
|
||||
CHIP_POLARIS10,
|
||||
CHIP_POLARIS11,
|
||||
CHIP_POLARIS12,
|
||||
CHIP_VEGAM,
|
||||
CHIP_VEGA10, /* GFX9 (Vega) */
|
||||
CHIP_VEGA12,
|
||||
CHIP_VEGA20,
|
||||
CHIP_RAVEN,
|
||||
CHIP_RAVEN2,
|
||||
CHIP_RENOIR,
|
||||
CHIP_ARCTURUS,
|
||||
CHIP_NAVI10,
|
||||
CHIP_NAVI12,
|
||||
CHIP_NAVI14,
|
||||
CHIP_SIENNA_CICHLID,
|
||||
CHIP_NAVY_FLOUNDER,
|
||||
CHIP_LAST,
|
||||
};
|
||||
|
||||
enum chip_class {
|
||||
CLASS_UNKNOWN = 0,
|
||||
R300,
|
||||
R400,
|
||||
R500,
|
||||
R600,
|
||||
R700,
|
||||
EVERGREEN,
|
||||
CAYMAN,
|
||||
GFX6,
|
||||
GFX7,
|
||||
GFX8,
|
||||
GFX9,
|
||||
GFX10,
|
||||
GFX10_3,
|
||||
enum chip_class
|
||||
{
|
||||
CLASS_UNKNOWN = 0,
|
||||
R300,
|
||||
R400,
|
||||
R500,
|
||||
R600,
|
||||
R700,
|
||||
EVERGREEN,
|
||||
CAYMAN,
|
||||
GFX6,
|
||||
GFX7,
|
||||
GFX8,
|
||||
GFX9,
|
||||
GFX10,
|
||||
GFX10_3,
|
||||
};
|
||||
|
||||
enum ring_type {
|
||||
RING_GFX = 0,
|
||||
RING_COMPUTE,
|
||||
RING_DMA,
|
||||
RING_UVD,
|
||||
RING_VCE,
|
||||
RING_UVD_ENC,
|
||||
RING_VCN_DEC,
|
||||
RING_VCN_ENC,
|
||||
RING_VCN_JPEG,
|
||||
NUM_RING_TYPES,
|
||||
enum ring_type
|
||||
{
|
||||
RING_GFX = 0,
|
||||
RING_COMPUTE,
|
||||
RING_DMA,
|
||||
RING_UVD,
|
||||
RING_VCE,
|
||||
RING_UVD_ENC,
|
||||
RING_VCN_DEC,
|
||||
RING_VCN_ENC,
|
||||
RING_VCN_JPEG,
|
||||
NUM_RING_TYPES,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -30,13 +30,12 @@
|
|||
//---------------------------------------------------------------------------//
|
||||
|
||||
// Sets val bits for specified mask in specified dst packed instance.
|
||||
#define AMD_HSA_BITS_SET(dst, mask, val) \
|
||||
dst &= (~(1 << mask ## _SHIFT) & ~mask); \
|
||||
dst |= (((val) << mask ## _SHIFT) & mask)
|
||||
#define AMD_HSA_BITS_SET(dst, mask, val) \
|
||||
dst &= (~(1 << mask##_SHIFT) & ~mask); \
|
||||
dst |= (((val) << mask##_SHIFT) & mask)
|
||||
|
||||
// Gets bits for specified mask from specified src packed instance.
|
||||
#define AMD_HSA_BITS_GET(src, mask) \
|
||||
((src & mask) >> mask ## _SHIFT)
|
||||
#define AMD_HSA_BITS_GET(src, mask) ((src & mask) >> mask##_SHIFT)
|
||||
|
||||
/* Every amd_*_code_t has the following properties, which are composed of
|
||||
* a number of bit fields. Every bit field has a mask (AMD_CODE_PROPERTY_*),
|
||||
|
|
@ -47,132 +46,164 @@
|
|||
* implementation defined in the C standard and so cannot be used to
|
||||
* specify an ABI)
|
||||
*/
|
||||
enum amd_code_property_mask_t {
|
||||
enum amd_code_property_mask_t
|
||||
{
|
||||
|
||||
/* Enable the setup of the SGPR user data registers
|
||||
* (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t
|
||||
* for initial register state.
|
||||
*
|
||||
* The total number of SGPRuser data registers requested must not
|
||||
* exceed 16. Any requests beyond 16 will be ignored.
|
||||
*
|
||||
* Used to set COMPUTE_PGM_RSRC2.USER_SGPR (set to total count of
|
||||
* SGPR user data registers enabled up to 16).
|
||||
*/
|
||||
/* Enable the setup of the SGPR user data registers
|
||||
* (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t
|
||||
* for initial register state.
|
||||
*
|
||||
* The total number of SGPRuser data registers requested must not
|
||||
* exceed 16. Any requests beyond 16 will be ignored.
|
||||
*
|
||||
* Used to set COMPUTE_PGM_RSRC2.USER_SGPR (set to total count of
|
||||
* SGPR user data registers enabled up to 16).
|
||||
*/
|
||||
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT = 0,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT = 0,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER =
|
||||
((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
|
||||
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR =
|
||||
((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
|
||||
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT = 2,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT = 2,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR =
|
||||
((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
|
||||
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT = 3,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT = 3,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR =
|
||||
((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
|
||||
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT = 4,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT = 4,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID =
|
||||
((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
|
||||
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT = 5,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT = 5,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT =
|
||||
((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
|
||||
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT = 6,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT = 6,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE =
|
||||
((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
|
||||
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT = 7,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT = 7,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X =
|
||||
((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT,
|
||||
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT = 8,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT = 8,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y =
|
||||
((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT,
|
||||
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT = 9,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT = 9,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z =
|
||||
((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT,
|
||||
|
||||
AMD_CODE_PROPERTY_RESERVED1_SHIFT = 10,
|
||||
AMD_CODE_PROPERTY_RESERVED1_WIDTH = 6,
|
||||
AMD_CODE_PROPERTY_RESERVED1 = ((1 << AMD_CODE_PROPERTY_RESERVED1_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED1_SHIFT,
|
||||
AMD_CODE_PROPERTY_RESERVED1_SHIFT = 10,
|
||||
AMD_CODE_PROPERTY_RESERVED1_WIDTH = 6,
|
||||
AMD_CODE_PROPERTY_RESERVED1 = ((1 << AMD_CODE_PROPERTY_RESERVED1_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_RESERVED1_SHIFT,
|
||||
|
||||
/* Control wave ID base counter for GDS ordered-append. Used to set
|
||||
* COMPUTE_DISPATCH_INITIATOR.ORDERED_APPEND_ENBL. (Not sure if
|
||||
* ORDERED_APPEND_MODE also needs to be settable)
|
||||
*/
|
||||
AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT = 16,
|
||||
AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS = ((1 << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT,
|
||||
/* Control wave ID base counter for GDS ordered-append. Used to set
|
||||
* COMPUTE_DISPATCH_INITIATOR.ORDERED_APPEND_ENBL. (Not sure if
|
||||
* ORDERED_APPEND_MODE also needs to be settable)
|
||||
*/
|
||||
AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT = 16,
|
||||
AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS =
|
||||
((1 << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT,
|
||||
|
||||
/* The interleave (swizzle) element size in bytes required by the
|
||||
* code for private memory. This must be 2, 4, 8 or 16. This value
|
||||
* is provided to the finalizer when it is invoked and is recorded
|
||||
* here. The hardware will interleave the memory requests of each
|
||||
* lane of a wavefront by this element size to ensure each
|
||||
* work-item gets a distinct memory memory location. Therefore, the
|
||||
* finalizer ensures that all load and store operations done to
|
||||
* private memory do not exceed this size. For example, if the
|
||||
* element size is 4 (32-bits or dword) and a 64-bit value must be
|
||||
* loaded, the finalizer will generate two 32-bit loads. This
|
||||
* ensures that the interleaving will get the work-item
|
||||
* specific dword for both halves of the 64-bit value. If it just
|
||||
* did a 64-bit load then it would get one dword which belonged to
|
||||
* its own work-item, but the second dword would belong to the
|
||||
* adjacent lane work-item since the interleaving is in dwords.
|
||||
*
|
||||
* The value used must match the value that the runtime configures
|
||||
* the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
|
||||
* is generally DWORD.
|
||||
*
|
||||
* USE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM.
|
||||
*/
|
||||
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 17,
|
||||
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
|
||||
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE = ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,
|
||||
/* The interleave (swizzle) element size in bytes required by the
|
||||
* code for private memory. This must be 2, 4, 8 or 16. This value
|
||||
* is provided to the finalizer when it is invoked and is recorded
|
||||
* here. The hardware will interleave the memory requests of each
|
||||
* lane of a wavefront by this element size to ensure each
|
||||
* work-item gets a distinct memory memory location. Therefore, the
|
||||
* finalizer ensures that all load and store operations done to
|
||||
* private memory do not exceed this size. For example, if the
|
||||
* element size is 4 (32-bits or dword) and a 64-bit value must be
|
||||
* loaded, the finalizer will generate two 32-bit loads. This
|
||||
* ensures that the interleaving will get the work-item
|
||||
* specific dword for both halves of the 64-bit value. If it just
|
||||
* did a 64-bit load then it would get one dword which belonged to
|
||||
* its own work-item, but the second dword would belong to the
|
||||
* adjacent lane work-item since the interleaving is in dwords.
|
||||
*
|
||||
* The value used must match the value that the runtime configures
|
||||
* the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
|
||||
* is generally DWORD.
|
||||
*
|
||||
* USE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM.
|
||||
*/
|
||||
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 17,
|
||||
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
|
||||
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE =
|
||||
((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,
|
||||
|
||||
/* Are global memory addresses 64 bits. Must match
|
||||
* amd_kernel_code_t.hsail_machine_model ==
|
||||
* HSA_MACHINE_LARGE. Must also match
|
||||
* SH_MEM_CONFIG.PTR32 (GFX6 (SI)/GFX7 (CI)),
|
||||
* SH_MEM_CONFIG.ADDRESS_MODE (GFX8 (VI)+).
|
||||
*/
|
||||
AMD_CODE_PROPERTY_IS_PTR64_SHIFT = 19,
|
||||
AMD_CODE_PROPERTY_IS_PTR64_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_IS_PTR64 = ((1 << AMD_CODE_PROPERTY_IS_PTR64_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_PTR64_SHIFT,
|
||||
/* Are global memory addresses 64 bits. Must match
|
||||
* amd_kernel_code_t.hsail_machine_model ==
|
||||
* HSA_MACHINE_LARGE. Must also match
|
||||
* SH_MEM_CONFIG.PTR32 (GFX6 (SI)/GFX7 (CI)),
|
||||
* SH_MEM_CONFIG.ADDRESS_MODE (GFX8 (VI)+).
|
||||
*/
|
||||
AMD_CODE_PROPERTY_IS_PTR64_SHIFT = 19,
|
||||
AMD_CODE_PROPERTY_IS_PTR64_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_IS_PTR64 = ((1 << AMD_CODE_PROPERTY_IS_PTR64_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_IS_PTR64_SHIFT,
|
||||
|
||||
/* Indicate if the generated ISA is using a dynamically sized call
|
||||
* stack. This can happen if calls are implemented using a call
|
||||
* stack and recursion, alloca or calls to indirect functions are
|
||||
* present. In these cases the Finalizer cannot compute the total
|
||||
* private segment size at compile time. In this case the
|
||||
* workitem_private_segment_byte_size only specifies the statically
|
||||
* know private segment size, and additional space must be added
|
||||
* for the call stack.
|
||||
*/
|
||||
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT = 20,
|
||||
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK = ((1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT,
|
||||
/* Indicate if the generated ISA is using a dynamically sized call
|
||||
* stack. This can happen if calls are implemented using a call
|
||||
* stack and recursion, alloca or calls to indirect functions are
|
||||
* present. In these cases the Finalizer cannot compute the total
|
||||
* private segment size at compile time. In this case the
|
||||
* workitem_private_segment_byte_size only specifies the statically
|
||||
* know private segment size, and additional space must be added
|
||||
* for the call stack.
|
||||
*/
|
||||
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT = 20,
|
||||
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK =
|
||||
((1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT,
|
||||
|
||||
/* Indicate if code generated has support for debugging. */
|
||||
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 21,
|
||||
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT,
|
||||
/* Indicate if code generated has support for debugging. */
|
||||
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 21,
|
||||
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT,
|
||||
|
||||
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT = 22,
|
||||
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT,
|
||||
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT = 22,
|
||||
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH = 1,
|
||||
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT,
|
||||
|
||||
AMD_CODE_PROPERTY_RESERVED2_SHIFT = 23,
|
||||
AMD_CODE_PROPERTY_RESERVED2_WIDTH = 9,
|
||||
AMD_CODE_PROPERTY_RESERVED2 = ((1 << AMD_CODE_PROPERTY_RESERVED2_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED2_SHIFT
|
||||
AMD_CODE_PROPERTY_RESERVED2_SHIFT = 23,
|
||||
AMD_CODE_PROPERTY_RESERVED2_WIDTH = 9,
|
||||
AMD_CODE_PROPERTY_RESERVED2 = ((1 << AMD_CODE_PROPERTY_RESERVED2_WIDTH) - 1)
|
||||
<< AMD_CODE_PROPERTY_RESERVED2_SHIFT
|
||||
};
|
||||
|
||||
/* AMD Kernel Code Object (amd_kernel_code_t). GPU CP uses the AMD Kernel
|
||||
|
|
@ -381,154 +412,154 @@ enum amd_code_property_mask_t {
|
|||
*/
|
||||
|
||||
typedef struct amd_kernel_code_s {
|
||||
uint32_t amd_kernel_code_version_major;
|
||||
uint32_t amd_kernel_code_version_minor;
|
||||
uint16_t amd_machine_kind;
|
||||
uint16_t amd_machine_version_major;
|
||||
uint16_t amd_machine_version_minor;
|
||||
uint16_t amd_machine_version_stepping;
|
||||
uint32_t amd_kernel_code_version_major;
|
||||
uint32_t amd_kernel_code_version_minor;
|
||||
uint16_t amd_machine_kind;
|
||||
uint16_t amd_machine_version_major;
|
||||
uint16_t amd_machine_version_minor;
|
||||
uint16_t amd_machine_version_stepping;
|
||||
|
||||
/* Byte offset (possibly negative) from start of amd_kernel_code_t
|
||||
* object to kernel's entry point instruction. The actual code for
|
||||
* the kernel is required to be 256 byte aligned to match hardware
|
||||
* requirements (SQ cache line is 16). The code must be position
|
||||
* independent code (PIC) for AMD devices to give runtime the
|
||||
* option of copying code to discrete GPU memory or APU L2
|
||||
* cache. The Finalizer should endeavour to allocate all kernel
|
||||
* machine code in contiguous memory pages so that a device
|
||||
* pre-fetcher will tend to only pre-fetch Kernel Code objects,
|
||||
* improving cache performance.
|
||||
*/
|
||||
int64_t kernel_code_entry_byte_offset;
|
||||
/* Byte offset (possibly negative) from start of amd_kernel_code_t
|
||||
* object to kernel's entry point instruction. The actual code for
|
||||
* the kernel is required to be 256 byte aligned to match hardware
|
||||
* requirements (SQ cache line is 16). The code must be position
|
||||
* independent code (PIC) for AMD devices to give runtime the
|
||||
* option of copying code to discrete GPU memory or APU L2
|
||||
* cache. The Finalizer should endeavour to allocate all kernel
|
||||
* machine code in contiguous memory pages so that a device
|
||||
* pre-fetcher will tend to only pre-fetch Kernel Code objects,
|
||||
* improving cache performance.
|
||||
*/
|
||||
int64_t kernel_code_entry_byte_offset;
|
||||
|
||||
/* Range of bytes to consider prefetching expressed as an offset
|
||||
* and size. The offset is from the start (possibly negative) of
|
||||
* amd_kernel_code_t object. Set both to 0 if no prefetch
|
||||
* information is available.
|
||||
*/
|
||||
int64_t kernel_code_prefetch_byte_offset;
|
||||
uint64_t kernel_code_prefetch_byte_size;
|
||||
/* Range of bytes to consider prefetching expressed as an offset
|
||||
* and size. The offset is from the start (possibly negative) of
|
||||
* amd_kernel_code_t object. Set both to 0 if no prefetch
|
||||
* information is available.
|
||||
*/
|
||||
int64_t kernel_code_prefetch_byte_offset;
|
||||
uint64_t kernel_code_prefetch_byte_size;
|
||||
|
||||
/* Number of bytes of scratch backing memory required for full
|
||||
* occupancy of target chip. This takes into account the number of
|
||||
* bytes of scratch per work-item, the wavefront size, the maximum
|
||||
* number of wavefronts per CU, and the number of CUs. This is an
|
||||
* upper limit on scratch. If the grid being dispatched is small it
|
||||
* may only need less than this. If the kernel uses no scratch, or
|
||||
* the Finalizer has not computed this value, it must be 0.
|
||||
*/
|
||||
uint64_t max_scratch_backing_memory_byte_size;
|
||||
/* Number of bytes of scratch backing memory required for full
|
||||
* occupancy of target chip. This takes into account the number of
|
||||
* bytes of scratch per work-item, the wavefront size, the maximum
|
||||
* number of wavefronts per CU, and the number of CUs. This is an
|
||||
* upper limit on scratch. If the grid being dispatched is small it
|
||||
* may only need less than this. If the kernel uses no scratch, or
|
||||
* the Finalizer has not computed this value, it must be 0.
|
||||
*/
|
||||
uint64_t max_scratch_backing_memory_byte_size;
|
||||
|
||||
/* Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
|
||||
* COMPUTE_PGM_RSRC2 registers.
|
||||
*/
|
||||
uint64_t compute_pgm_resource_registers;
|
||||
/* Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
|
||||
* COMPUTE_PGM_RSRC2 registers.
|
||||
*/
|
||||
uint64_t compute_pgm_resource_registers;
|
||||
|
||||
/* Code properties. See amd_code_property_mask_t for a full list of
|
||||
* properties.
|
||||
*/
|
||||
uint32_t code_properties;
|
||||
/* Code properties. See amd_code_property_mask_t for a full list of
|
||||
* properties.
|
||||
*/
|
||||
uint32_t code_properties;
|
||||
|
||||
/* The amount of memory required for the combined private, spill
|
||||
* and arg segments for a work-item in bytes. If
|
||||
* is_dynamic_callstack is 1 then additional space must be added to
|
||||
* this value for the call stack.
|
||||
*/
|
||||
uint32_t workitem_private_segment_byte_size;
|
||||
/* The amount of memory required for the combined private, spill
|
||||
* and arg segments for a work-item in bytes. If
|
||||
* is_dynamic_callstack is 1 then additional space must be added to
|
||||
* this value for the call stack.
|
||||
*/
|
||||
uint32_t workitem_private_segment_byte_size;
|
||||
|
||||
/* The amount of group segment memory required by a work-group in
|
||||
* bytes. This does not include any dynamically allocated group
|
||||
* segment memory that may be added when the kernel is
|
||||
* dispatched.
|
||||
*/
|
||||
uint32_t workgroup_group_segment_byte_size;
|
||||
/* The amount of group segment memory required by a work-group in
|
||||
* bytes. This does not include any dynamically allocated group
|
||||
* segment memory that may be added when the kernel is
|
||||
* dispatched.
|
||||
*/
|
||||
uint32_t workgroup_group_segment_byte_size;
|
||||
|
||||
/* Number of byte of GDS required by kernel dispatch. Must be 0 if
|
||||
* not using GDS.
|
||||
*/
|
||||
uint32_t gds_segment_byte_size;
|
||||
/* Number of byte of GDS required by kernel dispatch. Must be 0 if
|
||||
* not using GDS.
|
||||
*/
|
||||
uint32_t gds_segment_byte_size;
|
||||
|
||||
/* The size in bytes of the kernarg segment that holds the values
|
||||
* of the arguments to the kernel. This could be used by CP to
|
||||
* prefetch the kernarg segment pointed to by the dispatch packet.
|
||||
*/
|
||||
uint64_t kernarg_segment_byte_size;
|
||||
/* The size in bytes of the kernarg segment that holds the values
|
||||
* of the arguments to the kernel. This could be used by CP to
|
||||
* prefetch the kernarg segment pointed to by the dispatch packet.
|
||||
*/
|
||||
uint64_t kernarg_segment_byte_size;
|
||||
|
||||
/* Number of fbarrier's used in the kernel and all functions it
|
||||
* calls. If the implementation uses group memory to allocate the
|
||||
* fbarriers then that amount must already be included in the
|
||||
* workgroup_group_segment_byte_size total.
|
||||
*/
|
||||
uint32_t workgroup_fbarrier_count;
|
||||
/* Number of fbarrier's used in the kernel and all functions it
|
||||
* calls. If the implementation uses group memory to allocate the
|
||||
* fbarriers then that amount must already be included in the
|
||||
* workgroup_group_segment_byte_size total.
|
||||
*/
|
||||
uint32_t workgroup_fbarrier_count;
|
||||
|
||||
/* Number of scalar registers used by a wavefront. This includes
|
||||
* the special SGPRs for VCC, Flat Scratch Base, Flat Scratch Size
|
||||
* and XNACK (for GFX8 (VI)). It does not include the 16 SGPR added if a
|
||||
* trap handler is enabled. Used to set COMPUTE_PGM_RSRC1.SGPRS.
|
||||
*/
|
||||
uint16_t wavefront_sgpr_count;
|
||||
/* Number of scalar registers used by a wavefront. This includes
|
||||
* the special SGPRs for VCC, Flat Scratch Base, Flat Scratch Size
|
||||
* and XNACK (for GFX8 (VI)). It does not include the 16 SGPR added if a
|
||||
* trap handler is enabled. Used to set COMPUTE_PGM_RSRC1.SGPRS.
|
||||
*/
|
||||
uint16_t wavefront_sgpr_count;
|
||||
|
||||
/* Number of vector registers used by each work-item. Used to set
|
||||
* COMPUTE_PGM_RSRC1.VGPRS.
|
||||
*/
|
||||
uint16_t workitem_vgpr_count;
|
||||
/* Number of vector registers used by each work-item. Used to set
|
||||
* COMPUTE_PGM_RSRC1.VGPRS.
|
||||
*/
|
||||
uint16_t workitem_vgpr_count;
|
||||
|
||||
/* If reserved_vgpr_count is 0 then must be 0. Otherwise, this is the
|
||||
* first fixed VGPR number reserved.
|
||||
*/
|
||||
uint16_t reserved_vgpr_first;
|
||||
/* If reserved_vgpr_count is 0 then must be 0. Otherwise, this is the
|
||||
* first fixed VGPR number reserved.
|
||||
*/
|
||||
uint16_t reserved_vgpr_first;
|
||||
|
||||
/* The number of consecutive VGPRs reserved by the client. If
|
||||
* is_debug_supported then this count includes VGPRs reserved
|
||||
* for debugger use.
|
||||
*/
|
||||
uint16_t reserved_vgpr_count;
|
||||
/* The number of consecutive VGPRs reserved by the client. If
|
||||
* is_debug_supported then this count includes VGPRs reserved
|
||||
* for debugger use.
|
||||
*/
|
||||
uint16_t reserved_vgpr_count;
|
||||
|
||||
/* If reserved_sgpr_count is 0 then must be 0. Otherwise, this is the
|
||||
* first fixed SGPR number reserved.
|
||||
*/
|
||||
uint16_t reserved_sgpr_first;
|
||||
/* If reserved_sgpr_count is 0 then must be 0. Otherwise, this is the
|
||||
* first fixed SGPR number reserved.
|
||||
*/
|
||||
uint16_t reserved_sgpr_first;
|
||||
|
||||
/* The number of consecutive SGPRs reserved by the client. If
|
||||
* is_debug_supported then this count includes SGPRs reserved
|
||||
* for debugger use.
|
||||
*/
|
||||
uint16_t reserved_sgpr_count;
|
||||
/* The number of consecutive SGPRs reserved by the client. If
|
||||
* is_debug_supported then this count includes SGPRs reserved
|
||||
* for debugger use.
|
||||
*/
|
||||
uint16_t reserved_sgpr_count;
|
||||
|
||||
/* If is_debug_supported is 0 then must be 0. Otherwise, this is the
|
||||
* fixed SGPR number used to hold the wave scratch offset for the
|
||||
* entire kernel execution, or uint16_t(-1) if the register is not
|
||||
* used or not known.
|
||||
*/
|
||||
uint16_t debug_wavefront_private_segment_offset_sgpr;
|
||||
/* If is_debug_supported is 0 then must be 0. Otherwise, this is the
|
||||
* fixed SGPR number used to hold the wave scratch offset for the
|
||||
* entire kernel execution, or uint16_t(-1) if the register is not
|
||||
* used or not known.
|
||||
*/
|
||||
uint16_t debug_wavefront_private_segment_offset_sgpr;
|
||||
|
||||
/* If is_debug_supported is 0 then must be 0. Otherwise, this is the
|
||||
* fixed SGPR number of the first of 4 SGPRs used to hold the
|
||||
* scratch V# used for the entire kernel execution, or uint16_t(-1)
|
||||
* if the registers are not used or not known.
|
||||
*/
|
||||
uint16_t debug_private_segment_buffer_sgpr;
|
||||
/* If is_debug_supported is 0 then must be 0. Otherwise, this is the
|
||||
* fixed SGPR number of the first of 4 SGPRs used to hold the
|
||||
* scratch V# used for the entire kernel execution, or uint16_t(-1)
|
||||
* if the registers are not used or not known.
|
||||
*/
|
||||
uint16_t debug_private_segment_buffer_sgpr;
|
||||
|
||||
/* The maximum byte alignment of variables used by the kernel in
|
||||
* the specified memory segment. Expressed as a power of two. Must
|
||||
* be at least HSA_POWERTWO_16.
|
||||
*/
|
||||
uint8_t kernarg_segment_alignment;
|
||||
uint8_t group_segment_alignment;
|
||||
uint8_t private_segment_alignment;
|
||||
/* The maximum byte alignment of variables used by the kernel in
|
||||
* the specified memory segment. Expressed as a power of two. Must
|
||||
* be at least HSA_POWERTWO_16.
|
||||
*/
|
||||
uint8_t kernarg_segment_alignment;
|
||||
uint8_t group_segment_alignment;
|
||||
uint8_t private_segment_alignment;
|
||||
|
||||
/* Wavefront size expressed as a power of two. Must be a power of 2
|
||||
* in range 1..64 inclusive. Used to support runtime query that
|
||||
* obtains wavefront size, which may be used by application to
|
||||
* allocated dynamic group memory and set the dispatch work-group
|
||||
* size.
|
||||
*/
|
||||
uint8_t wavefront_size;
|
||||
/* Wavefront size expressed as a power of two. Must be a power of 2
|
||||
* in range 1..64 inclusive. Used to support runtime query that
|
||||
* obtains wavefront size, which may be used by application to
|
||||
* allocated dynamic group memory and set the dispatch work-group
|
||||
* size.
|
||||
*/
|
||||
uint8_t wavefront_size;
|
||||
|
||||
int32_t call_convention;
|
||||
uint8_t reserved3[12];
|
||||
uint64_t runtime_loader_kernel_symbol;
|
||||
uint64_t control_directives[16];
|
||||
int32_t call_convention;
|
||||
uint8_t reserved3[12];
|
||||
uint64_t runtime_loader_kernel_symbol;
|
||||
uint64_t control_directives[16];
|
||||
} amd_kernel_code_t;
|
||||
|
||||
#endif // AMDKERNELCODET_H
|
||||
|
|
|
|||
|
|
@ -27,16 +27,17 @@
|
|||
#ifndef GFX10_FORMAT_TABLE_H
|
||||
#define GFX10_FORMAT_TABLE_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "pipe/p_format.h"
|
||||
|
||||
struct gfx10_format {
|
||||
unsigned img_format:9;
|
||||
#include <stdbool.h>
|
||||
|
||||
/* Various formats are only supported with workarounds for vertex fetch,
|
||||
* and some 32_32_32 formats are supported natively, but only for buffers
|
||||
* (possibly with some image support, actually, but no filtering). */
|
||||
bool buffers_only:1;
|
||||
struct gfx10_format {
|
||||
unsigned img_format : 9;
|
||||
|
||||
/* Various formats are only supported with workarounds for vertex fetch,
|
||||
* and some 32_32_32 formats are supported natively, but only for buffers
|
||||
* (possibly with some image support, actually, but no filtering). */
|
||||
bool buffers_only : 1;
|
||||
};
|
||||
|
||||
extern const struct gfx10_format gfx10_format_table[PIPE_FORMAT_COUNT];
|
||||
|
|
|
|||
|
|
@ -27,227 +27,227 @@
|
|||
#include "amdgfxregs.h"
|
||||
|
||||
/* si values */
|
||||
#define SI_CONFIG_REG_OFFSET 0x00008000
|
||||
#define SI_CONFIG_REG_END 0x0000B000
|
||||
#define SI_SH_REG_OFFSET 0x0000B000
|
||||
#define SI_SH_REG_END 0x0000C000
|
||||
#define SI_CONTEXT_REG_OFFSET 0x00028000
|
||||
#define SI_CONTEXT_REG_END 0x00030000
|
||||
#define CIK_UCONFIG_REG_OFFSET 0x00030000
|
||||
#define CIK_UCONFIG_REG_END 0x00040000
|
||||
#define SI_UCONFIG_PERF_REG_OFFSET 0x00034000
|
||||
#define SI_UCONFIG_PERF_REG_END 0x00038000
|
||||
#define SI_CONFIG_REG_OFFSET 0x00008000
|
||||
#define SI_CONFIG_REG_END 0x0000B000
|
||||
#define SI_SH_REG_OFFSET 0x0000B000
|
||||
#define SI_SH_REG_END 0x0000C000
|
||||
#define SI_CONTEXT_REG_OFFSET 0x00028000
|
||||
#define SI_CONTEXT_REG_END 0x00030000
|
||||
#define CIK_UCONFIG_REG_OFFSET 0x00030000
|
||||
#define CIK_UCONFIG_REG_END 0x00040000
|
||||
#define SI_UCONFIG_PERF_REG_OFFSET 0x00034000
|
||||
#define SI_UCONFIG_PERF_REG_END 0x00038000
|
||||
|
||||
/* For register shadowing: */
|
||||
#define SI_SH_REG_SPACE_SIZE (SI_SH_REG_END - SI_SH_REG_OFFSET)
|
||||
#define SI_CONTEXT_REG_SPACE_SIZE (SI_CONTEXT_REG_END - SI_CONTEXT_REG_OFFSET)
|
||||
#define SI_UCONFIG_REG_SPACE_SIZE (CIK_UCONFIG_REG_END - CIK_UCONFIG_REG_OFFSET)
|
||||
#define SI_UCONFIG_PERF_REG_SPACE_SIZE (SI_UCONFIG_PERF_REG_END - SI_UCONFIG_PERF_REG_OFFSET)
|
||||
#define SI_SH_REG_SPACE_SIZE (SI_SH_REG_END - SI_SH_REG_OFFSET)
|
||||
#define SI_CONTEXT_REG_SPACE_SIZE (SI_CONTEXT_REG_END - SI_CONTEXT_REG_OFFSET)
|
||||
#define SI_UCONFIG_REG_SPACE_SIZE (CIK_UCONFIG_REG_END - CIK_UCONFIG_REG_OFFSET)
|
||||
#define SI_UCONFIG_PERF_REG_SPACE_SIZE (SI_UCONFIG_PERF_REG_END - SI_UCONFIG_PERF_REG_OFFSET)
|
||||
|
||||
#define SI_SHADOWED_SH_REG_OFFSET 0
|
||||
#define SI_SHADOWED_CONTEXT_REG_OFFSET SI_SH_REG_SPACE_SIZE
|
||||
#define SI_SHADOWED_UCONFIG_REG_OFFSET (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE)
|
||||
#define SI_SHADOWED_REG_BUFFER_SIZE (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE + \
|
||||
SI_UCONFIG_REG_SPACE_SIZE)
|
||||
#define SI_SHADOWED_SH_REG_OFFSET 0
|
||||
#define SI_SHADOWED_CONTEXT_REG_OFFSET SI_SH_REG_SPACE_SIZE
|
||||
#define SI_SHADOWED_UCONFIG_REG_OFFSET (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE)
|
||||
#define SI_SHADOWED_REG_BUFFER_SIZE \
|
||||
(SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE + SI_UCONFIG_REG_SPACE_SIZE)
|
||||
|
||||
#define EVENT_TYPE_CACHE_FLUSH 0x6
|
||||
#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
|
||||
#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
|
||||
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
|
||||
#define EVENT_TYPE_ZPASS_DONE 0x15
|
||||
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
|
||||
#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f
|
||||
#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20
|
||||
#define EVENT_TYPE(x) ((x) << 0)
|
||||
#define EVENT_INDEX(x) ((x) << 8)
|
||||
/* 0 - any non-TS event
|
||||
* 1 - ZPASS_DONE
|
||||
* 2 - SAMPLE_PIPELINESTAT
|
||||
* 3 - SAMPLE_STREAMOUTSTAT*
|
||||
* 4 - *S_PARTIAL_FLUSH
|
||||
* 5 - TS events
|
||||
*/
|
||||
#define EVENT_TYPE_ZPASS_DONE 0x15
|
||||
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
|
||||
#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f
|
||||
#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20
|
||||
#define EVENT_TYPE(x) ((x) << 0)
|
||||
#define EVENT_INDEX(x) ((x) << 8)
|
||||
/* 0 - any non-TS event
|
||||
* 1 - ZPASS_DONE
|
||||
* 2 - SAMPLE_PIPELINESTAT
|
||||
* 3 - SAMPLE_STREAMOUTSTAT*
|
||||
* 4 - *S_PARTIAL_FLUSH
|
||||
* 5 - TS events
|
||||
*/
|
||||
|
||||
/* EVENT_WRITE_EOP (SI-VI) & RELEASE_MEM (GFX9) */
|
||||
#define EVENT_TCL1_VOL_ACTION_ENA (1 << 12)
|
||||
#define EVENT_TC_VOL_ACTION_ENA (1 << 13)
|
||||
#define EVENT_TC_WB_ACTION_ENA (1 << 15)
|
||||
#define EVENT_TCL1_ACTION_ENA (1 << 16)
|
||||
#define EVENT_TC_ACTION_ENA (1 << 17)
|
||||
#define EVENT_TC_NC_ACTION_ENA (1 << 19) /* GFX9+ */
|
||||
#define EVENT_TC_WC_ACTION_ENA (1 << 20) /* GFX9+ */
|
||||
#define EVENT_TC_MD_ACTION_ENA (1 << 21) /* GFX9+ */
|
||||
#define EVENT_TCL1_VOL_ACTION_ENA (1 << 12)
|
||||
#define EVENT_TC_VOL_ACTION_ENA (1 << 13)
|
||||
#define EVENT_TC_WB_ACTION_ENA (1 << 15)
|
||||
#define EVENT_TCL1_ACTION_ENA (1 << 16)
|
||||
#define EVENT_TC_ACTION_ENA (1 << 17)
|
||||
#define EVENT_TC_NC_ACTION_ENA (1 << 19) /* GFX9+ */
|
||||
#define EVENT_TC_WC_ACTION_ENA (1 << 20) /* GFX9+ */
|
||||
#define EVENT_TC_MD_ACTION_ENA (1 << 21) /* GFX9+ */
|
||||
|
||||
|
||||
#define PREDICATION_OP_CLEAR 0x0
|
||||
#define PREDICATION_OP_ZPASS 0x1
|
||||
#define PREDICATION_OP_CLEAR 0x0
|
||||
#define PREDICATION_OP_ZPASS 0x1
|
||||
#define PREDICATION_OP_PRIMCOUNT 0x2
|
||||
#define PREDICATION_OP_BOOL64 0x3
|
||||
#define PREDICATION_OP_BOOL64 0x3
|
||||
|
||||
#define PRED_OP(x) ((x) << 16)
|
||||
|
||||
#define PREDICATION_CONTINUE (1 << 31)
|
||||
|
||||
#define PREDICATION_HINT_WAIT (0 << 12)
|
||||
#define PREDICATION_HINT_WAIT (0 << 12)
|
||||
#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
|
||||
|
||||
#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
|
||||
#define PREDICATION_DRAW_VISIBLE (1 << 8)
|
||||
#define PREDICATION_DRAW_VISIBLE (1 << 8)
|
||||
|
||||
#define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7
|
||||
#define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7
|
||||
|
||||
/* All registers defined in this packet section don't exist and the only
|
||||
* purpose of these definitions is to define packet encoding that
|
||||
* the IB parser understands, and also to have an accurate documentation.
|
||||
*/
|
||||
#define PKT3_NOP 0x10
|
||||
#define PKT3_SET_BASE 0x11
|
||||
#define PKT3_CLEAR_STATE 0x12
|
||||
#define PKT3_INDEX_BUFFER_SIZE 0x13
|
||||
#define PKT3_DISPATCH_DIRECT 0x15
|
||||
#define PKT3_DISPATCH_INDIRECT 0x16
|
||||
#define PKT3_OCCLUSION_QUERY 0x1F /* new for CIK */
|
||||
#define PKT3_SET_PREDICATION 0x20
|
||||
#define PKT3_COND_EXEC 0x22
|
||||
#define PKT3_PRED_EXEC 0x23
|
||||
#define PKT3_DRAW_INDIRECT 0x24
|
||||
#define PKT3_DRAW_INDEX_INDIRECT 0x25
|
||||
#define PKT3_INDEX_BASE 0x26
|
||||
#define PKT3_DRAW_INDEX_2 0x27
|
||||
#define PKT3_CONTEXT_CONTROL 0x28
|
||||
#define CC0_LOAD_GLOBAL_CONFIG(x) (((unsigned)(x) & 0x1) << 0)
|
||||
#define CC0_LOAD_PER_CONTEXT_STATE(x) (((unsigned)(x) & 0x1) << 1)
|
||||
#define CC0_LOAD_GLOBAL_UCONFIG(x) (((unsigned)(x) & 0x1) << 15)
|
||||
#define CC0_LOAD_GFX_SH_REGS(x) (((unsigned)(x) & 0x1) << 16)
|
||||
#define CC0_LOAD_CS_SH_REGS(x) (((unsigned)(x) & 0x1) << 24)
|
||||
#define CC0_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28)
|
||||
#define CC0_UPDATE_LOAD_ENABLES(x) (((unsigned)(x) & 0x1) << 31)
|
||||
#define CC1_SHADOW_GLOBAL_CONFIG(x) (((unsigned)(x) & 0x1) << 0)
|
||||
#define CC1_SHADOW_PER_CONTEXT_STATE(x) (((unsigned)(x) & 0x1) << 1)
|
||||
#define CC1_SHADOW_GLOBAL_UCONFIG(x) (((unsigned)(x) & 0x1) << 15)
|
||||
#define CC1_SHADOW_GFX_SH_REGS(x) (((unsigned)(x) & 0x1) << 16)
|
||||
#define CC1_SHADOW_CS_SH_REGS(x) (((unsigned)(x) & 0x1) << 24)
|
||||
#define CC1_UPDATE_SHADOW_ENABLES(x) (((unsigned)(x) & 0x1) << 31)
|
||||
#define PKT3_INDEX_TYPE 0x2A /* not on GFX9 */
|
||||
#define PKT3_DRAW_INDIRECT_MULTI 0x2C
|
||||
#define R_2C3_DRAW_INDEX_LOC 0x2C3
|
||||
#define S_2C3_COUNT_INDIRECT_ENABLE(x) (((unsigned)(x) & 0x1) << 30)
|
||||
#define S_2C3_DRAW_INDEX_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
|
||||
#define PKT3_DRAW_INDEX_AUTO 0x2D
|
||||
#define PKT3_DRAW_INDEX_IMMD 0x2E /* not on CIK */
|
||||
#define PKT3_NUM_INSTANCES 0x2F
|
||||
#define PKT3_DRAW_INDEX_MULTI_AUTO 0x30
|
||||
#define PKT3_INDIRECT_BUFFER_SI 0x32 /* not on CIK */
|
||||
#define PKT3_INDIRECT_BUFFER_CONST 0x33
|
||||
#define PKT3_STRMOUT_BUFFER_UPDATE 0x34
|
||||
#define STRMOUT_STORE_BUFFER_FILLED_SIZE 1
|
||||
#define STRMOUT_OFFSET_SOURCE(x) (((unsigned)(x) & 0x3) << 1)
|
||||
#define STRMOUT_OFFSET_FROM_PACKET 0
|
||||
#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
|
||||
#define STRMOUT_OFFSET_FROM_MEM 2
|
||||
#define STRMOUT_OFFSET_NONE 3
|
||||
#define STRMOUT_DATA_TYPE(x) (((unsigned)(x) & 0x1) << 7)
|
||||
#define STRMOUT_SELECT_BUFFER(x) (((unsigned)(x) & 0x3) << 8)
|
||||
#define PKT3_DRAW_INDEX_OFFSET_2 0x35
|
||||
#define PKT3_WRITE_DATA 0x37
|
||||
#define PKT3_DRAW_INDEX_INDIRECT_MULTI 0x38
|
||||
#define PKT3_MEM_SEMAPHORE 0x39
|
||||
#define PKT3_MPEG_INDEX 0x3A /* not on CIK */
|
||||
#define PKT3_WAIT_REG_MEM 0x3C
|
||||
#define WAIT_REG_MEM_EQUAL 3
|
||||
#define WAIT_REG_MEM_NOT_EQUAL 4
|
||||
#define WAIT_REG_MEM_GREATER_OR_EQUAL 5
|
||||
#define WAIT_REG_MEM_MEM_SPACE(x) (((unsigned)(x) & 0x3) << 4)
|
||||
#define WAIT_REG_MEM_PFP (1 << 8)
|
||||
#define PKT3_MEM_WRITE 0x3D /* not on CIK */
|
||||
#define PKT3_INDIRECT_BUFFER_CIK 0x3F /* new on CIK */
|
||||
#define PKT3_NOP 0x10
|
||||
#define PKT3_SET_BASE 0x11
|
||||
#define PKT3_CLEAR_STATE 0x12
|
||||
#define PKT3_INDEX_BUFFER_SIZE 0x13
|
||||
#define PKT3_DISPATCH_DIRECT 0x15
|
||||
#define PKT3_DISPATCH_INDIRECT 0x16
|
||||
#define PKT3_OCCLUSION_QUERY 0x1F /* new for CIK */
|
||||
#define PKT3_SET_PREDICATION 0x20
|
||||
#define PKT3_COND_EXEC 0x22
|
||||
#define PKT3_PRED_EXEC 0x23
|
||||
#define PKT3_DRAW_INDIRECT 0x24
|
||||
#define PKT3_DRAW_INDEX_INDIRECT 0x25
|
||||
#define PKT3_INDEX_BASE 0x26
|
||||
#define PKT3_DRAW_INDEX_2 0x27
|
||||
#define PKT3_CONTEXT_CONTROL 0x28
|
||||
#define CC0_LOAD_GLOBAL_CONFIG(x) (((unsigned)(x)&0x1) << 0)
|
||||
#define CC0_LOAD_PER_CONTEXT_STATE(x) (((unsigned)(x)&0x1) << 1)
|
||||
#define CC0_LOAD_GLOBAL_UCONFIG(x) (((unsigned)(x)&0x1) << 15)
|
||||
#define CC0_LOAD_GFX_SH_REGS(x) (((unsigned)(x)&0x1) << 16)
|
||||
#define CC0_LOAD_CS_SH_REGS(x) (((unsigned)(x)&0x1) << 24)
|
||||
#define CC0_LOAD_CE_RAM(x) (((unsigned)(x)&0x1) << 28)
|
||||
#define CC0_UPDATE_LOAD_ENABLES(x) (((unsigned)(x)&0x1) << 31)
|
||||
#define CC1_SHADOW_GLOBAL_CONFIG(x) (((unsigned)(x)&0x1) << 0)
|
||||
#define CC1_SHADOW_PER_CONTEXT_STATE(x) (((unsigned)(x)&0x1) << 1)
|
||||
#define CC1_SHADOW_GLOBAL_UCONFIG(x) (((unsigned)(x)&0x1) << 15)
|
||||
#define CC1_SHADOW_GFX_SH_REGS(x) (((unsigned)(x)&0x1) << 16)
|
||||
#define CC1_SHADOW_CS_SH_REGS(x) (((unsigned)(x)&0x1) << 24)
|
||||
#define CC1_UPDATE_SHADOW_ENABLES(x) (((unsigned)(x)&0x1) << 31)
|
||||
#define PKT3_INDEX_TYPE 0x2A /* not on GFX9 */
|
||||
#define PKT3_DRAW_INDIRECT_MULTI 0x2C
|
||||
#define R_2C3_DRAW_INDEX_LOC 0x2C3
|
||||
#define S_2C3_COUNT_INDIRECT_ENABLE(x) (((unsigned)(x)&0x1) << 30)
|
||||
#define S_2C3_DRAW_INDEX_ENABLE(x) (((unsigned)(x)&0x1) << 31)
|
||||
#define PKT3_DRAW_INDEX_AUTO 0x2D
|
||||
#define PKT3_DRAW_INDEX_IMMD 0x2E /* not on CIK */
|
||||
#define PKT3_NUM_INSTANCES 0x2F
|
||||
#define PKT3_DRAW_INDEX_MULTI_AUTO 0x30
|
||||
#define PKT3_INDIRECT_BUFFER_SI 0x32 /* not on CIK */
|
||||
#define PKT3_INDIRECT_BUFFER_CONST 0x33
|
||||
#define PKT3_STRMOUT_BUFFER_UPDATE 0x34
|
||||
#define STRMOUT_STORE_BUFFER_FILLED_SIZE 1
|
||||
#define STRMOUT_OFFSET_SOURCE(x) (((unsigned)(x)&0x3) << 1)
|
||||
#define STRMOUT_OFFSET_FROM_PACKET 0
|
||||
#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
|
||||
#define STRMOUT_OFFSET_FROM_MEM 2
|
||||
#define STRMOUT_OFFSET_NONE 3
|
||||
#define STRMOUT_DATA_TYPE(x) (((unsigned)(x)&0x1) << 7)
|
||||
#define STRMOUT_SELECT_BUFFER(x) (((unsigned)(x)&0x3) << 8)
|
||||
#define PKT3_DRAW_INDEX_OFFSET_2 0x35
|
||||
#define PKT3_WRITE_DATA 0x37
|
||||
#define PKT3_DRAW_INDEX_INDIRECT_MULTI 0x38
|
||||
#define PKT3_MEM_SEMAPHORE 0x39
|
||||
#define PKT3_MPEG_INDEX 0x3A /* not on CIK */
|
||||
#define PKT3_WAIT_REG_MEM 0x3C
|
||||
#define WAIT_REG_MEM_EQUAL 3
|
||||
#define WAIT_REG_MEM_NOT_EQUAL 4
|
||||
#define WAIT_REG_MEM_GREATER_OR_EQUAL 5
|
||||
#define WAIT_REG_MEM_MEM_SPACE(x) (((unsigned)(x)&0x3) << 4)
|
||||
#define WAIT_REG_MEM_PFP (1 << 8)
|
||||
#define PKT3_MEM_WRITE 0x3D /* not on CIK */
|
||||
#define PKT3_INDIRECT_BUFFER_CIK 0x3F /* new on CIK */
|
||||
|
||||
#define PKT3_COPY_DATA 0x40
|
||||
#define COPY_DATA_SRC_SEL(x) ((x) & 0xf)
|
||||
#define COPY_DATA_REG 0
|
||||
#define COPY_DATA_SRC_MEM 1 /* only valid as source */
|
||||
#define COPY_DATA_TC_L2 2
|
||||
#define COPY_DATA_GDS 3
|
||||
#define COPY_DATA_PERF 4
|
||||
#define COPY_DATA_IMM 5
|
||||
#define COPY_DATA_TIMESTAMP 9
|
||||
#define COPY_DATA_DST_SEL(x) (((unsigned)(x) & 0xf) << 8)
|
||||
#define COPY_DATA_DST_MEM_GRBM 1 /* sync across GRBM, deprecated */
|
||||
#define COPY_DATA_TC_L2 2
|
||||
#define COPY_DATA_GDS 3
|
||||
#define COPY_DATA_PERF 4
|
||||
#define COPY_DATA_DST_MEM 5
|
||||
#define COPY_DATA_COUNT_SEL (1 << 16)
|
||||
#define COPY_DATA_WR_CONFIRM (1 << 20)
|
||||
#define COPY_DATA_ENGINE_PFP (1 << 30)
|
||||
#define PKT3_PFP_SYNC_ME 0x42
|
||||
#define PKT3_COPY_DATA 0x40
|
||||
#define COPY_DATA_SRC_SEL(x) ((x)&0xf)
|
||||
#define COPY_DATA_REG 0
|
||||
#define COPY_DATA_SRC_MEM 1 /* only valid as source */
|
||||
#define COPY_DATA_TC_L2 2
|
||||
#define COPY_DATA_GDS 3
|
||||
#define COPY_DATA_PERF 4
|
||||
#define COPY_DATA_IMM 5
|
||||
#define COPY_DATA_TIMESTAMP 9
|
||||
#define COPY_DATA_DST_SEL(x) (((unsigned)(x)&0xf) << 8)
|
||||
#define COPY_DATA_DST_MEM_GRBM 1 /* sync across GRBM, deprecated */
|
||||
#define COPY_DATA_TC_L2 2
|
||||
#define COPY_DATA_GDS 3
|
||||
#define COPY_DATA_PERF 4
|
||||
#define COPY_DATA_DST_MEM 5
|
||||
#define COPY_DATA_COUNT_SEL (1 << 16)
|
||||
#define COPY_DATA_WR_CONFIRM (1 << 20)
|
||||
#define COPY_DATA_ENGINE_PFP (1 << 30)
|
||||
#define PKT3_PFP_SYNC_ME 0x42
|
||||
#define PKT3_SURFACE_SYNC 0x43 /* deprecated on CIK, use ACQUIRE_MEM */
|
||||
#define PKT3_ME_INITIALIZE 0x44 /* not on CIK */
|
||||
#define PKT3_COND_WRITE 0x45
|
||||
#define PKT3_EVENT_WRITE 0x46
|
||||
#define PKT3_EVENT_WRITE_EOP 0x47 /* not on GFX9 */
|
||||
#define EOP_DST_SEL(x) ((x) << 16)
|
||||
#define EOP_DST_SEL_MEM 0
|
||||
#define EOP_DST_SEL_TC_L2 1
|
||||
#define EOP_INT_SEL(x) ((x) << 24)
|
||||
#define EOP_INT_SEL_NONE 0
|
||||
#define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3
|
||||
#define EOP_DATA_SEL(x) ((x) << 29)
|
||||
#define EOP_DATA_SEL_DISCARD 0
|
||||
#define EOP_DATA_SEL_VALUE_32BIT 1
|
||||
#define EOP_DATA_SEL_VALUE_64BIT 2
|
||||
#define EOP_DATA_SEL_TIMESTAMP 3
|
||||
#define EOP_DATA_SEL_GDS 5
|
||||
#define EOP_DATA_GDS(dw_offset, num_dwords) ((dw_offset) | ((unsigned)(num_dwords) << 16))
|
||||
#define EOP_DST_SEL(x) ((x) << 16)
|
||||
#define EOP_DST_SEL_MEM 0
|
||||
#define EOP_DST_SEL_TC_L2 1
|
||||
#define EOP_INT_SEL(x) ((x) << 24)
|
||||
#define EOP_INT_SEL_NONE 0
|
||||
#define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3
|
||||
#define EOP_DATA_SEL(x) ((x) << 29)
|
||||
#define EOP_DATA_SEL_DISCARD 0
|
||||
#define EOP_DATA_SEL_VALUE_32BIT 1
|
||||
#define EOP_DATA_SEL_VALUE_64BIT 2
|
||||
#define EOP_DATA_SEL_TIMESTAMP 3
|
||||
#define EOP_DATA_SEL_GDS 5
|
||||
#define EOP_DATA_GDS(dw_offset, num_dwords) ((dw_offset) | ((unsigned)(num_dwords) << 16))
|
||||
/* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
|
||||
* are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
|
||||
* DST_SEL=MC. Only CIK chips are affected.
|
||||
*/
|
||||
/* fix CP DMA before uncommenting: */
|
||||
/*#define PKT3_EVENT_WRITE_EOS 0x48*/ /* not on GFX9 */
|
||||
#define PKT3_RELEASE_MEM 0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */
|
||||
#define PKT3_CONTEXT_REG_RMW 0x51 /* older firmware versions on older chips don't have this */
|
||||
#define PKT3_ONE_REG_WRITE 0x57 /* not on CIK */
|
||||
#define PKT3_ACQUIRE_MEM 0x58 /* new for CIK */
|
||||
#define PKT3_REWIND 0x59 /* VI+ [any ring] or CIK [compute ring only] */
|
||||
#define PKT3_LOAD_UCONFIG_REG 0x5E /* GFX7+ */
|
||||
#define PKT3_LOAD_SH_REG 0x5F
|
||||
#define PKT3_LOAD_CONTEXT_REG 0x61
|
||||
#define PKT3_SET_CONFIG_REG 0x68
|
||||
#define PKT3_SET_CONTEXT_REG 0x69
|
||||
#define PKT3_SET_SH_REG 0x76
|
||||
#define PKT3_SET_SH_REG_OFFSET 0x77
|
||||
#define PKT3_SET_UCONFIG_REG 0x79 /* new for CIK */
|
||||
#define PKT3_SET_UCONFIG_REG_INDEX 0x7A /* new for GFX9, CP ucode version >= 26 */
|
||||
#define PKT3_LOAD_CONST_RAM 0x80
|
||||
#define PKT3_WRITE_CONST_RAM 0x81
|
||||
#define PKT3_DUMP_CONST_RAM 0x83
|
||||
#define PKT3_INCREMENT_CE_COUNTER 0x84
|
||||
#define PKT3_INCREMENT_DE_COUNTER 0x85
|
||||
#define PKT3_WAIT_ON_CE_COUNTER 0x86
|
||||
#define PKT3_SET_SH_REG_INDEX 0x9B
|
||||
#define PKT3_LOAD_CONTEXT_REG_INDEX 0x9F /* new for VI */
|
||||
#define PKT3_RELEASE_MEM 0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */
|
||||
#define PKT3_CONTEXT_REG_RMW 0x51 /* older firmware versions on older chips don't have this */
|
||||
#define PKT3_ONE_REG_WRITE 0x57 /* not on CIK */
|
||||
#define PKT3_ACQUIRE_MEM 0x58 /* new for CIK */
|
||||
#define PKT3_REWIND 0x59 /* VI+ [any ring] or CIK [compute ring only] */
|
||||
#define PKT3_LOAD_UCONFIG_REG 0x5E /* GFX7+ */
|
||||
#define PKT3_LOAD_SH_REG 0x5F
|
||||
#define PKT3_LOAD_CONTEXT_REG 0x61
|
||||
#define PKT3_SET_CONFIG_REG 0x68
|
||||
#define PKT3_SET_CONTEXT_REG 0x69
|
||||
#define PKT3_SET_SH_REG 0x76
|
||||
#define PKT3_SET_SH_REG_OFFSET 0x77
|
||||
#define PKT3_SET_UCONFIG_REG 0x79 /* new for CIK */
|
||||
#define PKT3_SET_UCONFIG_REG_INDEX 0x7A /* new for GFX9, CP ucode version >= 26 */
|
||||
#define PKT3_LOAD_CONST_RAM 0x80
|
||||
#define PKT3_WRITE_CONST_RAM 0x81
|
||||
#define PKT3_DUMP_CONST_RAM 0x83
|
||||
#define PKT3_INCREMENT_CE_COUNTER 0x84
|
||||
#define PKT3_INCREMENT_DE_COUNTER 0x85
|
||||
#define PKT3_WAIT_ON_CE_COUNTER 0x86
|
||||
#define PKT3_SET_SH_REG_INDEX 0x9B
|
||||
#define PKT3_LOAD_CONTEXT_REG_INDEX 0x9F /* new for VI */
|
||||
|
||||
#define PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30)
|
||||
#define PKT_TYPE_G(x) (((x) >> 30) & 0x3)
|
||||
#define PKT_TYPE_C 0x3FFFFFFF
|
||||
#define PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16)
|
||||
#define PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF)
|
||||
#define PKT_COUNT_C 0xC000FFFF
|
||||
#define PKT0_BASE_INDEX_S(x) (((unsigned)(x) & 0xFFFF) << 0)
|
||||
#define PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF)
|
||||
#define PKT0_BASE_INDEX_C 0xFFFF0000
|
||||
#define PKT3_IT_OPCODE_S(x) (((unsigned)(x) & 0xFF) << 8)
|
||||
#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF)
|
||||
#define PKT3_IT_OPCODE_C 0xFFFF00FF
|
||||
#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
|
||||
#define PKT3_SHADER_TYPE_S(x) (((unsigned)(x) & 0x1) << 1)
|
||||
#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
|
||||
#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
|
||||
#define PKT_TYPE_S(x) (((unsigned)(x)&0x3) << 30)
|
||||
#define PKT_TYPE_G(x) (((x) >> 30) & 0x3)
|
||||
#define PKT_TYPE_C 0x3FFFFFFF
|
||||
#define PKT_COUNT_S(x) (((unsigned)(x)&0x3FFF) << 16)
|
||||
#define PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF)
|
||||
#define PKT_COUNT_C 0xC000FFFF
|
||||
#define PKT0_BASE_INDEX_S(x) (((unsigned)(x)&0xFFFF) << 0)
|
||||
#define PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF)
|
||||
#define PKT0_BASE_INDEX_C 0xFFFF0000
|
||||
#define PKT3_IT_OPCODE_S(x) (((unsigned)(x)&0xFF) << 8)
|
||||
#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF)
|
||||
#define PKT3_IT_OPCODE_C 0xFFFF00FF
|
||||
#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
|
||||
#define PKT3_SHADER_TYPE_S(x) (((unsigned)(x)&0x1) << 1)
|
||||
#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
|
||||
#define PKT3(op, count, predicate) \
|
||||
(PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
|
||||
|
||||
#define PKT2_NOP_PAD PKT_TYPE_S(2)
|
||||
#define PKT3_NOP_PAD PKT3(PKT3_NOP, 0x3fff, 0) /* header-only version */
|
||||
#define PKT2_NOP_PAD PKT_TYPE_S(2)
|
||||
#define PKT3_NOP_PAD PKT3(PKT3_NOP, 0x3fff, 0) /* header-only version */
|
||||
|
||||
#define PKT3_CP_DMA 0x41
|
||||
#define PKT3_CP_DMA 0x41
|
||||
/* 1. header
|
||||
* 2. SRC_ADDR_LO [31:0] or DATA [31:0]
|
||||
* 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] | SRC_ADDR_HI [15:0]
|
||||
|
|
@ -256,7 +256,7 @@
|
|||
* 6. COMMAND [29:22] | BYTE_COUNT [20:0]
|
||||
*/
|
||||
|
||||
#define PKT3_DMA_DATA 0x50 /* new for CIK */
|
||||
#define PKT3_DMA_DATA 0x50 /* new for CIK */
|
||||
/* 1. header
|
||||
* 2. CP_SYNC [31] | SRC_SEL [30:29] | DST_SEL [21:20] | ENGINE [0]
|
||||
* 2. SRC_ADDR_LO [31:0] or DATA [31:0]
|
||||
|
|
@ -267,69 +267,70 @@
|
|||
*/
|
||||
|
||||
/* SI async DMA packets */
|
||||
#define SI_DMA_PACKET(cmd, sub_cmd, n) ((((unsigned)(cmd) & 0xF) << 28) | \
|
||||
(((unsigned)(sub_cmd) & 0xFF) << 20) |\
|
||||
(((unsigned)(n) & 0xFFFFF) << 0))
|
||||
#define SI_DMA_PACKET(cmd, sub_cmd, n) \
|
||||
((((unsigned)(cmd)&0xF) << 28) | (((unsigned)(sub_cmd)&0xFF) << 20) | \
|
||||
(((unsigned)(n)&0xFFFFF) << 0))
|
||||
/* SI async DMA Packet types */
|
||||
#define SI_DMA_PACKET_WRITE 0x2
|
||||
#define SI_DMA_PACKET_COPY 0x3
|
||||
#define SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE 0xfffe0
|
||||
#define SI_DMA_PACKET_WRITE 0x2
|
||||
#define SI_DMA_PACKET_COPY 0x3
|
||||
#define SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE 0xfffe0
|
||||
/* The documentation says 0xffff8 is the maximum size in dwords, which is
|
||||
* 0x3fffe0 in bytes. */
|
||||
#define SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE 0x3fffe0
|
||||
#define SI_DMA_COPY_DWORD_ALIGNED 0x00
|
||||
#define SI_DMA_COPY_BYTE_ALIGNED 0x40
|
||||
#define SI_DMA_COPY_TILED 0x8
|
||||
#define SI_DMA_PACKET_INDIRECT_BUFFER 0x4
|
||||
#define SI_DMA_PACKET_SEMAPHORE 0x5
|
||||
#define SI_DMA_PACKET_FENCE 0x6
|
||||
#define SI_DMA_PACKET_TRAP 0x7
|
||||
#define SI_DMA_PACKET_SRBM_WRITE 0x9
|
||||
#define SI_DMA_PACKET_CONSTANT_FILL 0xd
|
||||
#define SI_DMA_PACKET_NOP 0xf
|
||||
#define SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE 0x3fffe0
|
||||
#define SI_DMA_COPY_DWORD_ALIGNED 0x00
|
||||
#define SI_DMA_COPY_BYTE_ALIGNED 0x40
|
||||
#define SI_DMA_COPY_TILED 0x8
|
||||
#define SI_DMA_PACKET_INDIRECT_BUFFER 0x4
|
||||
#define SI_DMA_PACKET_SEMAPHORE 0x5
|
||||
#define SI_DMA_PACKET_FENCE 0x6
|
||||
#define SI_DMA_PACKET_TRAP 0x7
|
||||
#define SI_DMA_PACKET_SRBM_WRITE 0x9
|
||||
#define SI_DMA_PACKET_CONSTANT_FILL 0xd
|
||||
#define SI_DMA_PACKET_NOP 0xf
|
||||
|
||||
/* CIK async DMA packets */
|
||||
#define CIK_SDMA_PACKET(op, sub_op, n) ((((unsigned)(n) & 0xFFFF) << 16) | \
|
||||
(((unsigned)(sub_op) & 0xFF) << 8) | \
|
||||
(((unsigned)(op) & 0xFF) << 0))
|
||||
#define CIK_SDMA_PACKET(op, sub_op, n) \
|
||||
((((unsigned)(n)&0xFFFF) << 16) | (((unsigned)(sub_op)&0xFF) << 8) | \
|
||||
(((unsigned)(op)&0xFF) << 0))
|
||||
/* CIK async DMA packet types */
|
||||
#define CIK_SDMA_OPCODE_NOP 0x0
|
||||
#define CIK_SDMA_OPCODE_COPY 0x1
|
||||
#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR 0x0
|
||||
#define CIK_SDMA_COPY_SUB_OPCODE_TILED 0x1
|
||||
#define CIK_SDMA_COPY_SUB_OPCODE_SOA 0x3
|
||||
#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4
|
||||
#define CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW 0x5
|
||||
#define CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW 0x6
|
||||
#define CIK_SDMA_OPCODE_WRITE 0x2
|
||||
#define SDMA_WRITE_SUB_OPCODE_LINEAR 0x0
|
||||
#define SDMA_WRTIE_SUB_OPCODE_TILED 0x1
|
||||
#define CIK_SDMA_OPCODE_INDIRECT_BUFFER 0x4
|
||||
#define CIK_SDMA_PACKET_FENCE 0x5
|
||||
#define CIK_SDMA_PACKET_TRAP 0x6
|
||||
#define CIK_SDMA_PACKET_SEMAPHORE 0x7
|
||||
#define CIK_SDMA_PACKET_CONSTANT_FILL 0xb
|
||||
#define CIK_SDMA_OPCODE_TIMESTAMP 0xd
|
||||
#define SDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP 0x0
|
||||
#define SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP 0x1
|
||||
#define SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP 0x2
|
||||
#define CIK_SDMA_PACKET_SRBM_WRITE 0xe
|
||||
#define CIK_SDMA_OPCODE_NOP 0x0
|
||||
#define CIK_SDMA_OPCODE_COPY 0x1
|
||||
#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR 0x0
|
||||
#define CIK_SDMA_COPY_SUB_OPCODE_TILED 0x1
|
||||
#define CIK_SDMA_COPY_SUB_OPCODE_SOA 0x3
|
||||
#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4
|
||||
#define CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW 0x5
|
||||
#define CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW 0x6
|
||||
#define CIK_SDMA_OPCODE_WRITE 0x2
|
||||
#define SDMA_WRITE_SUB_OPCODE_LINEAR 0x0
|
||||
#define SDMA_WRTIE_SUB_OPCODE_TILED 0x1
|
||||
#define CIK_SDMA_OPCODE_INDIRECT_BUFFER 0x4
|
||||
#define CIK_SDMA_PACKET_FENCE 0x5
|
||||
#define CIK_SDMA_PACKET_TRAP 0x6
|
||||
#define CIK_SDMA_PACKET_SEMAPHORE 0x7
|
||||
#define CIK_SDMA_PACKET_CONSTANT_FILL 0xb
|
||||
#define CIK_SDMA_OPCODE_TIMESTAMP 0xd
|
||||
#define SDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP 0x0
|
||||
#define SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP 0x1
|
||||
#define SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP 0x2
|
||||
#define CIK_SDMA_PACKET_SRBM_WRITE 0xe
|
||||
/* There is apparently an undocumented HW limitation that
|
||||
prevents the HW from copying the last 255 bytes of (1 << 22) - 1 */
|
||||
#define CIK_SDMA_COPY_MAX_SIZE 0x3fff00 /* almost 4 MB*/
|
||||
#define GFX103_SDMA_COPY_MAX_SIZE 0x3fffff00 /* almost 1 GB */
|
||||
#define CIK_SDMA_COPY_MAX_SIZE 0x3fff00 /* almost 4 MB*/
|
||||
#define GFX103_SDMA_COPY_MAX_SIZE 0x3fffff00 /* almost 1 GB */
|
||||
|
||||
enum amd_cmp_class_flags {
|
||||
S_NAN = 1 << 0, // Signaling NaN
|
||||
Q_NAN = 1 << 1, // Quiet NaN
|
||||
N_INFINITY = 1 << 2, // Negative infinity
|
||||
N_NORMAL = 1 << 3, // Negative normal
|
||||
N_SUBNORMAL = 1 << 4, // Negative subnormal
|
||||
N_ZERO = 1 << 5, // Negative zero
|
||||
P_ZERO = 1 << 6, // Positive zero
|
||||
P_SUBNORMAL = 1 << 7, // Positive subnormal
|
||||
P_NORMAL = 1 << 8, // Positive normal
|
||||
P_INFINITY = 1 << 9 // Positive infinity
|
||||
enum amd_cmp_class_flags
|
||||
{
|
||||
S_NAN = 1 << 0, // Signaling NaN
|
||||
Q_NAN = 1 << 1, // Quiet NaN
|
||||
N_INFINITY = 1 << 2, // Negative infinity
|
||||
N_NORMAL = 1 << 3, // Negative normal
|
||||
N_SUBNORMAL = 1 << 4, // Negative subnormal
|
||||
N_ZERO = 1 << 5, // Negative zero
|
||||
P_ZERO = 1 << 6, // Positive zero
|
||||
P_SUBNORMAL = 1 << 7, // Positive subnormal
|
||||
P_NORMAL = 1 << 8, // Positive normal
|
||||
P_INFINITY = 1 << 9 // Positive infinity
|
||||
};
|
||||
|
||||
#endif /* _SID_H */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue