nvc0: expose ARB_compute_variable_group_size

Only expose 512 threads/block on Fermi to not be limited by
32 GPRs/thread.

v4: - use 512 threads on Fermi, 1024 on Kepler+

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Samuel Pitoiset 2016-09-10 16:45:32 +02:00
parent 11e75fffeb
commit 56a0bed2c1

View file

@ -448,6 +448,12 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
RET(((uint64_t []) { 1024, 1024, 64 }));
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
RET((uint64_t []) { 1024 });
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
if (obj_class >= NVE4_COMPUTE_CLASS) {
RET((uint64_t []) { 1024 });
} else {
RET((uint64_t []) { 512 });
}
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
RET((uint64_t []) { 1ULL << 40 });
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
@ -478,8 +484,6 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
case PIPE_COMPUTE_CAP_ADDRESS_BITS:
RET((uint32_t []) { 64 });
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
RET((uint64_t []) { 0 });
default:
return 0;
}