mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 04:50:11 +01:00
classic/i965: Remove driver
Reviewed-by: Emma Anholt <emma@anholt.net> Acked-by: Jason Ekstrand <jason@jlekstrand.net> Acked-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Adam Jackson <ajax@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10153>
This commit is contained in:
parent
0cad451f00
commit
cdde031ac2
124 changed files with 8 additions and 55418 deletions
|
|
@ -825,7 +825,6 @@ fedora-release:
|
|||
-Wno-error=uninitialized
|
||||
CPP_ARGS: >
|
||||
-Wno-error=array-bounds
|
||||
DRI_DRIVERS: "i965"
|
||||
DRI_LOADERS: >
|
||||
-D glx=dri
|
||||
-D gbm=enabled
|
||||
|
|
@ -1039,7 +1038,6 @@ debian-clang:
|
|||
-Wno-error=unused-variable
|
||||
DRI_LOADERS: >
|
||||
-D glvnd=true
|
||||
DRI_DRIVERS: "auto"
|
||||
GALLIUM_DRIVERS: "iris,nouveau,kmsro,r300,r600,freedreno,swr,swrast,svga,v3d,vc4,virgl,etnaviv,panfrost,lima,zink,radeonsi,tegra,d3d12,crocus"
|
||||
VULKAN_DRIVERS: intel,amd,freedreno,broadcom,virtio-experimental
|
||||
CC: clang
|
||||
|
|
@ -1118,7 +1116,6 @@ debian-i386:
|
|||
CROSS: i386
|
||||
VULKAN_DRIVERS: intel,amd,swrast,virtio-experimental
|
||||
GALLIUM_DRIVERS: "iris,nouveau,r300,r600,radeonsi,swrast,virgl,zink,crocus"
|
||||
DRI_DRIVERS: "i965"
|
||||
EXTRA_OPTION: >
|
||||
-D vulkan-layers=device-select,overlay
|
||||
|
||||
|
|
@ -1155,7 +1152,6 @@ debian-mingw32-x86_64:
|
|||
-Wno-error=format
|
||||
-Wno-error=format-extra-args
|
||||
CPP_ARGS: $C_ARGS
|
||||
DRI_DRIVERS: ""
|
||||
GALLIUM_DRIVERS: "swrast"
|
||||
EXTRA_OPTION: >
|
||||
-Dllvm=disabled
|
||||
|
|
|
|||
|
|
@ -68,7 +68,6 @@ meson _build --native-file=native.file \
|
|||
-D cpp_args="$(echo -n $CPP_ARGS)" \
|
||||
-D libunwind=${UNWIND} \
|
||||
${DRI_LOADERS} \
|
||||
-D dri-drivers=${DRI_DRIVERS:-[]} \
|
||||
${GALLIUM_ST} \
|
||||
-D gallium-drivers=${GALLIUM_DRIVERS:-[]} \
|
||||
-D vulkan-drivers=${VULKAN_DRIVERS:-[]} \
|
||||
|
|
|
|||
|
|
@ -1,260 +0,0 @@
|
|||
#ifndef PREFER_CROCUS
|
||||
CHIPSET(0x29A2, i965, "BW", "Intel(R) 965G")
|
||||
CHIPSET(0x2992, i965, "BW", "Intel(R) 965Q")
|
||||
CHIPSET(0x2982, i965, "BW", "Intel(R) 965G")
|
||||
CHIPSET(0x2972, i965, "BW", "Intel(R) 946GZ")
|
||||
CHIPSET(0x2A02, i965, "CL", "Intel(R) 965GM")
|
||||
CHIPSET(0x2A12, i965, "CL", "Intel(R) 965GME/GLE")
|
||||
|
||||
CHIPSET(0x2A42, g4x, "CTG", "Mobile Intel® GM45 Express Chipset")
|
||||
CHIPSET(0x2E02, g4x, "ELK", "Intel(R) Integrated Graphics Device")
|
||||
CHIPSET(0x2E12, g4x, "ELK", "Intel(R) Q45/Q43")
|
||||
CHIPSET(0x2E22, g4x, "ELK", "Intel(R) G45/G43")
|
||||
CHIPSET(0x2E32, g4x, "ELK", "Intel(R) G41")
|
||||
CHIPSET(0x2E42, g4x, "ELK", "Intel(R) B43")
|
||||
CHIPSET(0x2E92, g4x, "ELK", "Intel(R) B43")
|
||||
|
||||
CHIPSET(0x0042, ilk, "ILK", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0046, ilk, "ILK", "Intel(R) HD Graphics")
|
||||
|
||||
CHIPSET(0x0102, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000")
|
||||
CHIPSET(0x0112, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
|
||||
CHIPSET(0x0122, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
|
||||
CHIPSET(0x0106, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000")
|
||||
CHIPSET(0x0116, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
|
||||
CHIPSET(0x0126, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
|
||||
CHIPSET(0x010A, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000")
|
||||
|
||||
CHIPSET(0x0152, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics 2500")
|
||||
CHIPSET(0x0162, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics 4000")
|
||||
CHIPSET(0x0156, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics 2500")
|
||||
CHIPSET(0x0166, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics 4000")
|
||||
CHIPSET(0x015a, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x016a, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics P4000")
|
||||
|
||||
CHIPSET(0x0402, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0412, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600")
|
||||
CHIPSET(0x0422, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0406, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0416, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600")
|
||||
CHIPSET(0x0426, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x040A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x041A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics P4600/P4700")
|
||||
CHIPSET(0x042A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x040B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x041B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x042B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x040E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x041E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4400")
|
||||
CHIPSET(0x042E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C22, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C26, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0C2E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0A02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0A12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0A22, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0A06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0A16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4400")
|
||||
CHIPSET(0x0A26, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics 5000")
|
||||
CHIPSET(0x0A0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0A1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0A2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0A0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0A1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0A2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0A0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0A1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4200")
|
||||
CHIPSET(0x0A2E, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Graphics 5100")
|
||||
CHIPSET(0x0D02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0D12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600")
|
||||
CHIPSET(0x0D22, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Pro Graphics 5200")
|
||||
CHIPSET(0x0D06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0D16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0D26, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Pro Graphics P5200")
|
||||
CHIPSET(0x0D0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0D1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0D2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0D0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0D1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0D2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0D0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0D1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0D2E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
|
||||
|
||||
CHIPSET(0x0F31, byt, "BYT", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0F32, byt, "BYT", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0F33, byt, "BYT", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0157, byt, "BYT", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x0155, byt, "BYT", "Intel(R) HD Graphics")
|
||||
|
||||
CHIPSET(0x22B0, chv, "CHV", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x22B1, chv, "BSW", "Intel(R) HD Graphics XXX") /* Overridden in brw_get_renderer_string */
|
||||
CHIPSET(0x22B2, chv, "CHV", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x22B3, chv, "CHV", "Intel(R) HD Graphics")
|
||||
#endif
|
||||
|
||||
#ifndef PREFER_IRIS
|
||||
CHIPSET(0x1602, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x1606, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x160A, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x160B, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x160D, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x160E, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x1612, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5600")
|
||||
CHIPSET(0x1616, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5500")
|
||||
CHIPSET(0x161A, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics P5700")
|
||||
CHIPSET(0x161B, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x161D, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x161E, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5300")
|
||||
CHIPSET(0x1622, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Pro Graphics 6200")
|
||||
CHIPSET(0x1626, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics 6000")
|
||||
CHIPSET(0x162A, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Pro Graphics P6300")
|
||||
CHIPSET(0x162B, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Graphics 6100")
|
||||
CHIPSET(0x162D, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x162E, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics")
|
||||
|
||||
CHIPSET(0x1902, skl_gt1, "SKL GT1", "Intel(R) HD Graphics 510")
|
||||
CHIPSET(0x1906, skl_gt1, "SKL GT1", "Intel(R) HD Graphics 510")
|
||||
CHIPSET(0x190A, skl_gt1, "SKL GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x190B, skl_gt1, "SKL GT1", "Intel(R) HD Graphics 510")
|
||||
CHIPSET(0x190E, skl_gt1, "SKL GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x1912, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 530")
|
||||
CHIPSET(0x1913, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x1915, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x1916, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 520")
|
||||
CHIPSET(0x1917, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x191A, skl_gt2, "SKL GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x191B, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 530")
|
||||
CHIPSET(0x191D, skl_gt2, "SKL GT2", "Intel(R) HD Graphics P530")
|
||||
CHIPSET(0x191E, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 515")
|
||||
CHIPSET(0x1921, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 520")
|
||||
CHIPSET(0x1923, skl_gt3, "SKL GT3", "Intel(R) HD Graphics 535")
|
||||
CHIPSET(0x1926, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics 540")
|
||||
CHIPSET(0x1927, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics 550")
|
||||
CHIPSET(0x192A, skl_gt4, "SKL GT4", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x192B, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics 555")
|
||||
CHIPSET(0x192D, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics P555")
|
||||
CHIPSET(0x1932, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics 580")
|
||||
CHIPSET(0x193A, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics P580")
|
||||
CHIPSET(0x193B, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics 580")
|
||||
CHIPSET(0x193D, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics P580")
|
||||
|
||||
CHIPSET(0x0A84, bxt, "BXT 3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x1A84, bxt, "BXT 3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x1A85, bxt_2x6, "BXT 2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x5A84, bxt, "APL 3", "Intel(R) HD Graphics 505")
|
||||
CHIPSET(0x5A85, bxt_2x6, "APL 2", "Intel(R) HD Graphics 500")
|
||||
|
||||
CHIPSET(0x3184, glk, "GLK 3", "Intel(R) UHD Graphics 605")
|
||||
CHIPSET(0x3185, glk_2x6, "GLK 2", "Intel(R) UHD Graphics 600")
|
||||
|
||||
CHIPSET(0x5902, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics 610")
|
||||
CHIPSET(0x5906, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics 610")
|
||||
CHIPSET(0x590A, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x5908, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x590B, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics 610")
|
||||
CHIPSET(0x590E, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x5913, kbl_gt1_5, "KBL GT1.5", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x5915, kbl_gt1_5, "KBL GT1.5", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x5917, kbl_gt2, "KBL GT2", "Intel(R) UHD Graphics 620")
|
||||
CHIPSET(0x5912, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 630")
|
||||
CHIPSET(0x5916, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 620")
|
||||
CHIPSET(0x591A, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics P630")
|
||||
CHIPSET(0x591B, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 630")
|
||||
CHIPSET(0x591D, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics P630")
|
||||
CHIPSET(0x591E, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 615")
|
||||
CHIPSET(0x5921, kbl_gt2, "KBL GT2F", "Intel(R) HD Graphics 620")
|
||||
CHIPSET(0x5923, kbl_gt3, "KBL GT3", "Intel(R) HD Graphics 635")
|
||||
CHIPSET(0x5926, kbl_gt3, "KBL GT3", "Intel(R) Iris(R) Plus Graphics 640 (Kaby Lake GT3e)")
|
||||
CHIPSET(0x5927, kbl_gt3, "KBL GT3", "Intel(R) Iris(R) Plus Graphics 650 (Kaby Lake GT3e)")
|
||||
CHIPSET(0x593B, kbl_gt4, "KBL GT4", "Intel(R) HD Graphics")
|
||||
|
||||
CHIPSET(0x591C, kbl_gt2, "AML-KBL", "Intel(R) UHD Graphics 615")
|
||||
CHIPSET(0x87C0, kbl_gt2, "AML-KBL", "Intel(R) UHD Graphics 617")
|
||||
|
||||
CHIPSET(0x87CA, cfl_gt2, "AML-CFL", "Intel(R) UHD Graphics")
|
||||
|
||||
CHIPSET(0x3E90, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
|
||||
CHIPSET(0x3E93, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
|
||||
CHIPSET(0x3E99, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
|
||||
CHIPSET(0x3E9C, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
|
||||
CHIPSET(0x3E91, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
|
||||
CHIPSET(0x3E92, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
|
||||
CHIPSET(0x3E96, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630")
|
||||
CHIPSET(0x3E98, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
|
||||
CHIPSET(0x3E9A, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630")
|
||||
CHIPSET(0x3E9B, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
|
||||
CHIPSET(0x3E94, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630")
|
||||
CHIPSET(0x3EA9, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 620")
|
||||
CHIPSET(0x3EA5, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 655")
|
||||
CHIPSET(0x3EA6, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 645")
|
||||
CHIPSET(0x3EA7, cfl_gt3, "CFL GT3", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x3EA8, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 655")
|
||||
|
||||
CHIPSET(0x3EA1, cfl_gt1, "WHL GT1", "Intel(R) UHD Graphics 610")
|
||||
CHIPSET(0x3EA4, cfl_gt1, "WHL GT1", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x3EA0, cfl_gt2, "WHL GT2", "Intel(R) UHD Graphics 620")
|
||||
CHIPSET(0x3EA3, cfl_gt2, "WHL GT2", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x3EA2, cfl_gt3, "WHL GT3", "Intel(R) UHD Graphics")
|
||||
|
||||
CHIPSET(0x9B21, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x9BA0, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x9BA2, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x9BA4, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x9BA5, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics 610")
|
||||
CHIPSET(0x9BA8, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics 610")
|
||||
CHIPSET(0x9BAA, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x9BAB, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x9BAC, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x9B41, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x9BC0, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x9BC2, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x9BC4, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x9BC5, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics 630")
|
||||
CHIPSET(0x9BC6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630")
|
||||
CHIPSET(0x9BC8, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics 630")
|
||||
CHIPSET(0x9BCA, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x9BCB, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x9BCC, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x9BE6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630")
|
||||
CHIPSET(0x9BF6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630")
|
||||
|
||||
CHIPSET(0x8A50, icl_gt2, "ICL GT2", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x8A51, icl_gt2, "ICL GT2", "Intel(R) Iris(R) Plus Graphics")
|
||||
CHIPSET(0x8A52, icl_gt2, "ICL GT2", "Intel(R) Iris(R) Plus Graphics")
|
||||
CHIPSET(0x8A53, icl_gt2, "ICL GT2", "Intel(R) Iris(R) Plus Graphics")
|
||||
CHIPSET(0x8A54, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics")
|
||||
CHIPSET(0x8A56, icl_gt1, "ICL GT1", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x8A57, icl_gt1_5, "ICL GT1.5", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x8A58, icl_gt1, "ICL GT1", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x8A59, icl_gt1_5, "ICL GT1.5", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x8A5A, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics")
|
||||
CHIPSET(0x8A5B, icl_gt1, "ICL GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x8A5C, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics")
|
||||
CHIPSET(0x8A5D, icl_gt1, "ICL GT1", "Intel(R) HD Graphics")
|
||||
CHIPSET(0x8A71, icl_gt0_5, "ICL GT0.5", "Intel(R) HD Graphics")
|
||||
|
||||
CHIPSET(0x4500, ehl_4x8, "EHL", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x4541, ehl_2x4, "EHL", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x4551, ehl_4x4, "EHL", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x4555, ehl_2x8, "EHL", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x4557, ehl_4x5, "EHL", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x4571, ehl_4x8, "EHL", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x4E51, ehl_4x4, "JSL", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x4E55, ehl_2x8, "JSL", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x4E57, ehl_4x5, "JSL", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x4E61, ehl_4x6, "JSL", "Intel(R) UHD Graphics")
|
||||
CHIPSET(0x4E71, ehl_4x8, "JSL", "Intel(R) UHD Graphics")
|
||||
#endif
|
||||
28
meson.build
28
meson.build
|
|
@ -174,27 +174,9 @@ with_shared_glapi = with_shared_glapi and with_any_opengl
|
|||
system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'gnu/kfreebsd', 'dragonfly', 'linux', 'sunos'].contains(host_machine.system())
|
||||
|
||||
dri_drivers = get_option('dri-drivers')
|
||||
if dri_drivers.contains('auto')
|
||||
if system_has_kms_drm
|
||||
# TODO: PPC, Sparc
|
||||
if ['x86', 'x86_64'].contains(host_machine.cpu_family())
|
||||
dri_drivers = ['i965']
|
||||
elif ['arm', 'aarch64', 'mips', 'mips64'].contains(host_machine.cpu_family())
|
||||
dri_drivers = []
|
||||
else
|
||||
error('Unknown architecture @0@. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.'.format(
|
||||
host_machine.cpu_family()))
|
||||
if dri_drivers.length() != 0
|
||||
error('Mesa\'s main branch no longer has any "classic" drivers, use the "amber" branch instead.')
|
||||
endif
|
||||
elif ['darwin', 'windows', 'cygwin', 'haiku'].contains(host_machine.system())
|
||||
# only swrast would make sense here, but gallium swrast is a much better default
|
||||
dri_drivers = []
|
||||
else
|
||||
error('Unknown OS @0@. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.'.format(
|
||||
host_machine.system()))
|
||||
endif
|
||||
endif
|
||||
|
||||
with_dri_i965 = dri_drivers.contains('i965')
|
||||
|
||||
with_dri = dri_drivers.length() != 0
|
||||
|
||||
|
|
@ -205,7 +187,7 @@ if gallium_drivers.contains('auto')
|
|||
if ['x86', 'x86_64'].contains(host_machine.cpu_family())
|
||||
gallium_drivers = [
|
||||
'r300', 'r600', 'radeonsi', 'nouveau', 'virgl', 'svga', 'swrast',
|
||||
'iris', 'crocus'
|
||||
'iris', 'crocus', 'i915'
|
||||
]
|
||||
elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
|
||||
gallium_drivers = [
|
||||
|
|
@ -293,7 +275,7 @@ with_broadcom_vk = _vulkan_drivers.contains('broadcom')
|
|||
with_any_vk = _vulkan_drivers.length() != 0
|
||||
|
||||
with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk
|
||||
with_any_intel = with_dri_i965 or with_intel_vk or with_gallium_iris or with_gallium_crocus
|
||||
with_any_intel = with_intel_vk or with_gallium_iris or with_gallium_crocus
|
||||
|
||||
if with_swrast_vk and not with_gallium_softpipe
|
||||
error('swrast vulkan requires gallium swrast')
|
||||
|
|
@ -1493,8 +1475,6 @@ if cc.has_function('dl_iterate_phdr')
|
|||
pre_args += '-DHAVE_DL_ITERATE_PHDR'
|
||||
elif with_intel_vk
|
||||
error('Intel "Anvil" Vulkan driver requires the dl_iterate_phdr function')
|
||||
elif with_dri_i965 and with_shader_cache
|
||||
error('Intel i965 GL driver requires dl_iterate_phdr when built with shader caching.')
|
||||
endif
|
||||
|
||||
# Determine whether or not the rt library is needed for time functions
|
||||
|
|
|
|||
|
|
@ -54,9 +54,7 @@ option(
|
|||
option(
|
||||
'dri-drivers',
|
||||
type : 'array',
|
||||
value : ['auto'],
|
||||
choices : ['auto', 'i965'],
|
||||
description : 'List of dri drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
|
||||
description : 'DEPRECATED: List of dri drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
|
||||
)
|
||||
option(
|
||||
'dri-drivers-path',
|
||||
|
|
@ -455,18 +453,6 @@ option(
|
|||
value : true,
|
||||
description : 'Enable direct rendering in GLX and EGL for DRI',
|
||||
)
|
||||
option(
|
||||
'prefer-iris',
|
||||
type : 'boolean',
|
||||
value : true,
|
||||
description : 'Prefer new Intel iris driver over older i965 driver'
|
||||
)
|
||||
option(
|
||||
'prefer-crocus',
|
||||
type : 'boolean',
|
||||
value : false,
|
||||
description : 'Prefer new crocus driver over older i965 driver for gen4-7'
|
||||
)
|
||||
option('egl-lib-suffix',
|
||||
type : 'string',
|
||||
value : '',
|
||||
|
|
|
|||
|
|
@ -1247,7 +1247,7 @@ intel_get_device_info_from_pci_id(int pci_id,
|
|||
#undef CHIPSET
|
||||
#define CHIPSET(id, family, fam_str, name) \
|
||||
case id: *devinfo = intel_device_info_##family; break;
|
||||
#include "pci_ids/i965_pci_ids.h"
|
||||
#include "pci_ids/crocus_pci_ids.h"
|
||||
#include "pci_ids/iris_pci_ids.h"
|
||||
|
||||
#undef CHIPSET
|
||||
|
|
@ -1269,7 +1269,7 @@ intel_get_device_info_from_pci_id(int pci_id,
|
|||
sizeof(devinfo->name)); \
|
||||
strncpy(devinfo->name, _name " (" _fam_str ")", sizeof(devinfo->name)); \
|
||||
break;
|
||||
#include "pci_ids/i965_pci_ids.h"
|
||||
#include "pci_ids/crocus_pci_ids.h"
|
||||
#include "pci_ids/iris_pci_ids.h"
|
||||
default:
|
||||
strncpy(devinfo->name, "Intel Unknown", sizeof(devinfo->name));
|
||||
|
|
|
|||
|
|
@ -14,13 +14,8 @@ main(int argc, char *argv[])
|
|||
} chipsets[] = {
|
||||
#undef CHIPSET
|
||||
#define CHIPSET(id, family, family_str, str_name) { .pci_id = id, .name = str_name, },
|
||||
#include "pci_ids/crocus_pci_ids.h"
|
||||
#include "pci_ids/i965_pci_ids.h"
|
||||
#include "pci_ids/iris_pci_ids.h"
|
||||
#undef CHIPSET
|
||||
#define CHIPSET(id, fam_str, str_name) { .pci_id = id, .name = str_name, },
|
||||
#include "pci_ids/i915_pci_ids.h"
|
||||
#undef CHIPSET
|
||||
#include "pci_ids/crocus_pci_ids.h"
|
||||
};
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(chipsets); i++) {
|
||||
|
|
|
|||
|
|
@ -40,15 +40,6 @@ loader_c_args = [
|
|||
'-DUSE_DRICONF',
|
||||
'-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path),
|
||||
]
|
||||
|
||||
if get_option('prefer-iris')
|
||||
loader_c_args += ['-DPREFER_IRIS']
|
||||
endif
|
||||
|
||||
if get_option('prefer-crocus')
|
||||
loader_c_args += ['-DPREFER_CROCUS']
|
||||
endif
|
||||
|
||||
libloader = static_library(
|
||||
'loader',
|
||||
['loader_dri_helper.c', 'loader.c'],
|
||||
|
|
|
|||
|
|
@ -8,12 +8,6 @@
|
|||
# error "Only include from loader.c"
|
||||
#endif
|
||||
|
||||
static const int i965_chip_ids[] = {
|
||||
#define CHIPSET(chip, family, family_str, name) chip,
|
||||
#include "pci_ids/i965_pci_ids.h"
|
||||
#undef CHIPSET
|
||||
};
|
||||
|
||||
static const int crocus_chip_ids[] = {
|
||||
#define CHIPSET(chip, family, family_str, name) chip,
|
||||
#include "pci_ids/crocus_pci_ids.h"
|
||||
|
|
@ -53,7 +47,6 @@ static const struct {
|
|||
int num_chips_ids;
|
||||
bool (*predicate)(int fd);
|
||||
} driver_map[] = {
|
||||
{ 0x8086, "i965", i965_chip_ids, ARRAY_SIZE(i965_chip_ids) },
|
||||
{ 0x8086, "crocus", crocus_chip_ids, ARRAY_SIZE(crocus_chip_ids) },
|
||||
{ 0x8086, "iris", NULL, -1, is_kernel_i915 },
|
||||
{ 0x1002, "r300", r300_chip_ids, ARRAY_SIZE(r300_chip_ids) },
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,166 +0,0 @@
|
|||
#ifndef BRW_BATCH_H
|
||||
#define BRW_BATCH_H
|
||||
|
||||
#include "main/mtypes.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_bufmgr.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The kernel assumes batchbuffers are smaller than 256kB. */
|
||||
#define MAX_BATCH_SIZE (256 * 1024)
|
||||
|
||||
/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base
|
||||
* Address, which means that we can't put binding tables beyond 64kB. This
|
||||
* effectively limits the maximum statebuffer size to 64kB.
|
||||
*/
|
||||
#define MAX_STATE_SIZE (64 * 1024)
|
||||
|
||||
struct brw_batch;
|
||||
|
||||
void brw_batch_init(struct brw_context *brw);
|
||||
void brw_batch_free(struct brw_batch *batch);
|
||||
void brw_batch_save_state(struct brw_context *brw);
|
||||
bool brw_batch_saved_state_is_empty(struct brw_context *brw);
|
||||
void brw_batch_reset_to_saved(struct brw_context *brw);
|
||||
void brw_batch_require_space(struct brw_context *brw, GLuint sz);
|
||||
int _brw_batch_flush_fence(struct brw_context *brw,
|
||||
int in_fence_fd, int *out_fence_fd,
|
||||
const char *file, int line);
|
||||
void brw_batch_maybe_noop(struct brw_context *brw);
|
||||
|
||||
#define brw_batch_flush(brw) \
|
||||
_brw_batch_flush_fence((brw), -1, NULL, __FILE__, __LINE__)
|
||||
|
||||
#define brw_batch_flush_fence(brw, in_fence_fd, out_fence_fd) \
|
||||
_brw_batch_flush_fence((brw), (in_fence_fd), (out_fence_fd), \
|
||||
__FILE__, __LINE__)
|
||||
|
||||
/* Unlike bmBufferData, this currently requires the buffer be mapped.
|
||||
* Consider it a convenience function wrapping multple
|
||||
* brw_buffer_dword() calls.
|
||||
*/
|
||||
void brw_batch_data(struct brw_context *brw,
|
||||
const void *data, GLuint bytes);
|
||||
|
||||
static inline bool
|
||||
brw_batch_has_aperture_space(struct brw_context *brw, uint64_t extra_space)
|
||||
{
|
||||
return brw->batch.aperture_space + extra_space <=
|
||||
brw->screen->aperture_threshold;
|
||||
}
|
||||
|
||||
bool brw_batch_references(struct brw_batch *batch, struct brw_bo *bo);
|
||||
|
||||
#define RELOC_WRITE EXEC_OBJECT_WRITE
|
||||
#define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT
|
||||
/* Inverted meaning, but using the same bit...emit_reloc will flip it. */
|
||||
#define RELOC_32BIT EXEC_OBJECT_SUPPORTS_48B_ADDRESS
|
||||
|
||||
void brw_use_pinned_bo(struct brw_batch *batch, struct brw_bo *bo,
|
||||
unsigned writeable_flag);
|
||||
|
||||
uint64_t brw_batch_reloc(struct brw_batch *batch,
|
||||
uint32_t batch_offset,
|
||||
struct brw_bo *target,
|
||||
uint32_t target_offset,
|
||||
unsigned flags);
|
||||
uint64_t brw_state_reloc(struct brw_batch *batch,
|
||||
uint32_t batch_offset,
|
||||
struct brw_bo *target,
|
||||
uint32_t target_offset,
|
||||
unsigned flags);
|
||||
|
||||
#define USED_BATCH(_batch) \
|
||||
((uintptr_t)((_batch).map_next - (_batch).batch.map))
|
||||
|
||||
static inline uint32_t float_as_int(float f)
|
||||
{
|
||||
union {
|
||||
float f;
|
||||
uint32_t d;
|
||||
} fi;
|
||||
|
||||
fi.f = f;
|
||||
return fi.d;
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_batch_begin(struct brw_context *brw, int n)
|
||||
{
|
||||
brw_batch_require_space(brw, n * 4);
|
||||
|
||||
#ifdef DEBUG
|
||||
brw->batch.emit = USED_BATCH(brw->batch);
|
||||
brw->batch.total = n;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_batch_advance(struct brw_context *brw)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
struct brw_batch *batch = &brw->batch;
|
||||
unsigned int _n = USED_BATCH(*batch) - batch->emit;
|
||||
assert(batch->total != 0);
|
||||
if (_n != batch->total) {
|
||||
fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",
|
||||
_n, batch->total);
|
||||
abort();
|
||||
}
|
||||
batch->total = 0;
|
||||
#else
|
||||
(void) brw;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool
|
||||
brw_ptr_in_state_buffer(struct brw_batch *batch, void *p)
|
||||
{
|
||||
return (char *) p >= (char *) batch->state.map &&
|
||||
(char *) p < (char *) batch->state.map + batch->state.bo->size;
|
||||
}
|
||||
|
||||
#define BEGIN_BATCH(n) do { \
|
||||
brw_batch_begin(brw, (n)); \
|
||||
uint32_t *__map = brw->batch.map_next; \
|
||||
brw->batch.map_next += (n)
|
||||
|
||||
#define BEGIN_BATCH_BLT(n) do { \
|
||||
assert(brw->screen->devinfo.ver < 6); \
|
||||
brw_batch_begin(brw, (n)); \
|
||||
uint32_t *__map = brw->batch.map_next; \
|
||||
brw->batch.map_next += (n)
|
||||
|
||||
#define OUT_BATCH(d) *__map++ = (d)
|
||||
#define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f)))
|
||||
|
||||
#define OUT_RELOC(buf, flags, delta) do { \
|
||||
uint32_t __offset = (__map - brw->batch.batch.map) * 4; \
|
||||
uint32_t reloc = \
|
||||
brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \
|
||||
OUT_BATCH(reloc); \
|
||||
} while (0)
|
||||
|
||||
/* Handle 48-bit address relocations for Gfx8+ */
|
||||
#define OUT_RELOC64(buf, flags, delta) do { \
|
||||
uint32_t __offset = (__map - brw->batch.batch.map) * 4; \
|
||||
uint64_t reloc64 = \
|
||||
brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \
|
||||
OUT_BATCH(reloc64); \
|
||||
OUT_BATCH(reloc64 >> 32); \
|
||||
} while (0)
|
||||
|
||||
#define ADVANCE_BATCH() \
|
||||
assert(__map == brw->batch.map_next); \
|
||||
brw_batch_advance(brw); \
|
||||
} while (0)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -1,307 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file brw_binding_tables.c
|
||||
*
|
||||
* State atoms which upload the "binding table" for each shader stage.
|
||||
*
|
||||
* Binding tables map a numeric "surface index" to the SURFACE_STATE structure
|
||||
* for a currently bound surface. This allows SEND messages (such as sampler
|
||||
* or data port messages) to refer to a particular surface by number, rather
|
||||
* than by pointer.
|
||||
*
|
||||
* The binding table is stored as a (sparse) array of SURFACE_STATE entries;
|
||||
* surface indexes are simply indexes into the array. The ordering of the
|
||||
* entries is entirely left up to software; see the SURF_INDEX_* macros in
|
||||
* brw_context.h to see our current layout.
|
||||
*/
|
||||
|
||||
#include "main/mtypes.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_batch.h"
|
||||
|
||||
/**
|
||||
* Upload a shader stage's binding table as indirect state.
|
||||
*
|
||||
* This copies brw_stage_state::surf_offset[] into the indirect state section
|
||||
* of the batchbuffer (allocated by brw_state_batch()).
|
||||
*/
|
||||
void
|
||||
brw_upload_binding_table(struct brw_context *brw,
|
||||
uint32_t packet_name,
|
||||
const struct brw_stage_prog_data *prog_data,
|
||||
struct brw_stage_state *stage_state)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
if (prog_data->binding_table.size_bytes == 0) {
|
||||
/* There are no surfaces; skip making the binding table altogether. */
|
||||
if (stage_state->bind_bo_offset == 0 && devinfo->ver < 9)
|
||||
return;
|
||||
|
||||
stage_state->bind_bo_offset = 0;
|
||||
} else {
|
||||
/* Upload a new binding table. */
|
||||
if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
|
||||
brw_emit_buffer_surface_state(
|
||||
brw, &stage_state->surf_offset[
|
||||
prog_data->binding_table.shader_time_start],
|
||||
brw->shader_time.bo, 0, ISL_FORMAT_RAW,
|
||||
brw->shader_time.bo->size, 1, RELOC_WRITE);
|
||||
}
|
||||
uint32_t *bind =
|
||||
brw_state_batch(brw, prog_data->binding_table.size_bytes,
|
||||
32, &stage_state->bind_bo_offset);
|
||||
|
||||
/* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
|
||||
memcpy(bind, stage_state->surf_offset,
|
||||
prog_data->binding_table.size_bytes);
|
||||
}
|
||||
|
||||
brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
|
||||
|
||||
if (devinfo->ver >= 7) {
|
||||
BEGIN_BATCH(2);
|
||||
OUT_BATCH(packet_name << 16 | (2 - 2));
|
||||
/* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table field
|
||||
* when hw-generated binding table is enabled.
|
||||
*/
|
||||
OUT_BATCH(stage_state->bind_bo_offset);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* State atoms which upload the binding table for a particular shader stage.
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** Upload the VS binding table. */
|
||||
static void
|
||||
brw_vs_upload_binding_table(struct brw_context *brw)
|
||||
{
|
||||
/* BRW_NEW_VS_PROG_DATA */
|
||||
const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
|
||||
brw_upload_binding_table(brw,
|
||||
_3DSTATE_BINDING_TABLE_POINTERS_VS,
|
||||
prog_data,
|
||||
&brw->vs.base);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_vs_binding_table = {
|
||||
.dirty = {
|
||||
.mesa = 0,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_VS_CONSTBUF |
|
||||
BRW_NEW_VS_PROG_DATA |
|
||||
BRW_NEW_SURFACES,
|
||||
},
|
||||
.emit = brw_vs_upload_binding_table,
|
||||
};
|
||||
|
||||
|
||||
/** Upload the PS binding table. */
|
||||
static void
|
||||
brw_upload_wm_binding_table(struct brw_context *brw)
|
||||
{
|
||||
/* BRW_NEW_FS_PROG_DATA */
|
||||
const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
|
||||
brw_upload_binding_table(brw,
|
||||
_3DSTATE_BINDING_TABLE_POINTERS_PS,
|
||||
prog_data,
|
||||
&brw->wm.base);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_wm_binding_table = {
|
||||
.dirty = {
|
||||
.mesa = 0,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_FS_PROG_DATA |
|
||||
BRW_NEW_SURFACES,
|
||||
},
|
||||
.emit = brw_upload_wm_binding_table,
|
||||
};
|
||||
|
||||
/** Upload the TCS binding table (if tessellation stages are active). */
|
||||
static void
|
||||
brw_tcs_upload_binding_table(struct brw_context *brw)
|
||||
{
|
||||
/* Skip if the tessellation stages are disabled. */
|
||||
if (brw->programs[MESA_SHADER_TESS_EVAL] == NULL)
|
||||
return;
|
||||
|
||||
/* BRW_NEW_TCS_PROG_DATA */
|
||||
const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
|
||||
brw_upload_binding_table(brw,
|
||||
_3DSTATE_BINDING_TABLE_POINTERS_HS,
|
||||
prog_data,
|
||||
&brw->tcs.base);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_tcs_binding_table = {
|
||||
.dirty = {
|
||||
.mesa = 0,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_DEFAULT_TESS_LEVELS |
|
||||
BRW_NEW_SURFACES |
|
||||
BRW_NEW_TCS_CONSTBUF |
|
||||
BRW_NEW_TCS_PROG_DATA,
|
||||
},
|
||||
.emit = brw_tcs_upload_binding_table,
|
||||
};
|
||||
|
||||
/** Upload the TES binding table (if TES is active). */
|
||||
static void
|
||||
brw_tes_upload_binding_table(struct brw_context *brw)
|
||||
{
|
||||
/* If there's no TES, skip changing anything. */
|
||||
if (brw->programs[MESA_SHADER_TESS_EVAL] == NULL)
|
||||
return;
|
||||
|
||||
/* BRW_NEW_TES_PROG_DATA */
|
||||
const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
|
||||
brw_upload_binding_table(brw,
|
||||
_3DSTATE_BINDING_TABLE_POINTERS_DS,
|
||||
prog_data,
|
||||
&brw->tes.base);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_tes_binding_table = {
|
||||
.dirty = {
|
||||
.mesa = 0,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_SURFACES |
|
||||
BRW_NEW_TES_CONSTBUF |
|
||||
BRW_NEW_TES_PROG_DATA,
|
||||
},
|
||||
.emit = brw_tes_upload_binding_table,
|
||||
};
|
||||
|
||||
/** Upload the GS binding table (if GS is active). */
|
||||
static void
|
||||
brw_gs_upload_binding_table(struct brw_context *brw)
|
||||
{
|
||||
/* If there's no GS, skip changing anything. */
|
||||
if (brw->programs[MESA_SHADER_GEOMETRY] == NULL)
|
||||
return;
|
||||
|
||||
/* BRW_NEW_GS_PROG_DATA */
|
||||
const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
|
||||
brw_upload_binding_table(brw,
|
||||
_3DSTATE_BINDING_TABLE_POINTERS_GS,
|
||||
prog_data,
|
||||
&brw->gs.base);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_gs_binding_table = {
|
||||
.dirty = {
|
||||
.mesa = 0,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_GS_CONSTBUF |
|
||||
BRW_NEW_GS_PROG_DATA |
|
||||
BRW_NEW_SURFACES,
|
||||
},
|
||||
.emit = brw_gs_upload_binding_table,
|
||||
};
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* State atoms which emit 3DSTATE packets to update the binding table pointers.
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* (Gfx4-5) Upload the binding table pointers for all shader stages.
|
||||
*
|
||||
* The binding table pointers are relative to the surface state base address,
|
||||
* which points at the batchbuffer containing the streamed batch state.
|
||||
*/
|
||||
static void
|
||||
gfx4_upload_binding_table_pointers(struct brw_context *brw)
|
||||
{
|
||||
BEGIN_BATCH(6);
|
||||
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
|
||||
OUT_BATCH(brw->vs.base.bind_bo_offset);
|
||||
OUT_BATCH(0); /* gs */
|
||||
OUT_BATCH(0); /* clip */
|
||||
OUT_BATCH(0); /* sf */
|
||||
OUT_BATCH(brw->wm.base.bind_bo_offset);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_binding_table_pointers = {
|
||||
.dirty = {
|
||||
.mesa = 0,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_BINDING_TABLE_POINTERS |
|
||||
BRW_NEW_STATE_BASE_ADDRESS,
|
||||
},
|
||||
.emit = gfx4_upload_binding_table_pointers,
|
||||
};
|
||||
|
||||
/**
|
||||
* (Sandybridge Only) Upload the binding table pointers for all shader stages.
|
||||
*
|
||||
* The binding table pointers are relative to the surface state base address,
|
||||
* which points at the batchbuffer containing the streamed batch state.
|
||||
*/
|
||||
static void
|
||||
gfx6_upload_binding_table_pointers(struct brw_context *brw)
|
||||
{
|
||||
BEGIN_BATCH(4);
|
||||
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
|
||||
GFX6_BINDING_TABLE_MODIFY_VS |
|
||||
GFX6_BINDING_TABLE_MODIFY_GS |
|
||||
GFX6_BINDING_TABLE_MODIFY_PS |
|
||||
(4 - 2));
|
||||
OUT_BATCH(brw->vs.base.bind_bo_offset); /* vs */
|
||||
if (brw->ff_gs.prog_active)
|
||||
OUT_BATCH(brw->ff_gs.bind_bo_offset); /* gs */
|
||||
else
|
||||
OUT_BATCH(brw->gs.base.bind_bo_offset); /* gs */
|
||||
OUT_BATCH(brw->wm.base.bind_bo_offset); /* wm/ps */
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
const struct brw_tracked_state gfx6_binding_table_pointers = {
|
||||
.dirty = {
|
||||
.mesa = 0,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_BINDING_TABLE_POINTERS |
|
||||
BRW_NEW_STATE_BASE_ADDRESS,
|
||||
},
|
||||
.emit = gfx6_upload_binding_table_pointers,
|
||||
};
|
||||
|
||||
/** @} */
|
||||
|
|
@ -1,790 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "main/blit.h"
|
||||
#include "main/context.h"
|
||||
#include "main/enums.h"
|
||||
#include "main/fbobject.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_blit.h"
|
||||
#include "brw_buffers.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "brw_batch.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_BLIT
|
||||
|
||||
static void
|
||||
brw_miptree_set_alpha_to_one(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt,
|
||||
int x, int y, int width, int height);
|
||||
|
||||
static GLuint translate_raster_op(enum gl_logicop_mode logicop)
|
||||
{
|
||||
return logicop | (logicop << 4);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
br13_for_cpp(int cpp)
|
||||
{
|
||||
switch (cpp) {
|
||||
case 16:
|
||||
return BR13_32323232;
|
||||
case 8:
|
||||
return BR13_16161616;
|
||||
case 4:
|
||||
return BR13_8888;
|
||||
case 2:
|
||||
return BR13_565;
|
||||
case 1:
|
||||
return BR13_8;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Emits the packet for switching the blitter from X to Y tiled or back.
|
||||
*
|
||||
* This has to be called in a single BEGIN_BATCH_BLT_TILED() /
|
||||
* ADVANCE_BATCH_TILED(). This is because BCS_SWCTRL is saved and restored as
|
||||
* part of the power context, not a render context, and if the batchbuffer was
|
||||
* to get flushed between setting and blitting, or blitting and restoring, our
|
||||
* tiling state would leak into other unsuspecting applications (like the X
|
||||
* server).
|
||||
*/
|
||||
static uint32_t *
|
||||
set_blitter_tiling(struct brw_context *brw,
|
||||
bool dst_y_tiled, bool src_y_tiled,
|
||||
uint32_t *__map)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
const unsigned n_dwords = devinfo->ver >= 8 ? 5 : 4;
|
||||
assert(devinfo->ver >= 6);
|
||||
|
||||
/* Idle the blitter before we update how tiling is interpreted. */
|
||||
OUT_BATCH(MI_FLUSH_DW | (n_dwords - 2));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
if (n_dwords == 5)
|
||||
OUT_BATCH(0);
|
||||
|
||||
OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
|
||||
OUT_BATCH(BCS_SWCTRL);
|
||||
OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 |
|
||||
(dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) |
|
||||
(src_y_tiled ? BCS_SWCTRL_SRC_Y : 0));
|
||||
return __map;
|
||||
}
|
||||
#define SET_BLITTER_TILING(...) __map = set_blitter_tiling(__VA_ARGS__, __map)
|
||||
|
||||
#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) \
|
||||
unsigned set_tiling_batch_size = 0; \
|
||||
if (dst_y_tiled || src_y_tiled) { \
|
||||
if (devinfo->ver >= 8) \
|
||||
set_tiling_batch_size = 16; \
|
||||
else \
|
||||
set_tiling_batch_size = 14; \
|
||||
} \
|
||||
BEGIN_BATCH_BLT(n + set_tiling_batch_size); \
|
||||
if (dst_y_tiled || src_y_tiled) \
|
||||
SET_BLITTER_TILING(brw, dst_y_tiled, src_y_tiled)
|
||||
|
||||
#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) \
|
||||
if (dst_y_tiled || src_y_tiled) \
|
||||
SET_BLITTER_TILING(brw, false, false); \
|
||||
ADVANCE_BATCH()
|
||||
|
||||
bool
|
||||
brw_miptree_blit_compatible_formats(mesa_format src, mesa_format dst)
|
||||
{
|
||||
/* The BLT doesn't handle sRGB conversion */
|
||||
assert(src == _mesa_get_srgb_format_linear(src));
|
||||
assert(dst == _mesa_get_srgb_format_linear(dst));
|
||||
|
||||
/* No swizzle or format conversions possible, except... */
|
||||
if (src == dst)
|
||||
return true;
|
||||
|
||||
/* ...we can either discard the alpha channel when going from A->X,
|
||||
* or we can fill the alpha channel with 0xff when going from X->A
|
||||
*/
|
||||
if (src == MESA_FORMAT_B8G8R8A8_UNORM || src == MESA_FORMAT_B8G8R8X8_UNORM)
|
||||
return (dst == MESA_FORMAT_B8G8R8A8_UNORM ||
|
||||
dst == MESA_FORMAT_B8G8R8X8_UNORM);
|
||||
|
||||
if (src == MESA_FORMAT_R8G8B8A8_UNORM || src == MESA_FORMAT_R8G8B8X8_UNORM)
|
||||
return (dst == MESA_FORMAT_R8G8B8A8_UNORM ||
|
||||
dst == MESA_FORMAT_R8G8B8X8_UNORM);
|
||||
|
||||
/* We can also discard alpha when going from A2->X2 for 2 bit alpha,
|
||||
* however we can't fill the alpha channel with two 1 bits when going
|
||||
* from X2->A2, because brw_miptree_set_alpha_to_one() is not yet
|
||||
* ready for this / can only handle 8 bit alpha.
|
||||
*/
|
||||
if (src == MESA_FORMAT_B10G10R10A2_UNORM)
|
||||
return (dst == MESA_FORMAT_B10G10R10A2_UNORM ||
|
||||
dst == MESA_FORMAT_B10G10R10X2_UNORM);
|
||||
|
||||
if (src == MESA_FORMAT_R10G10B10A2_UNORM)
|
||||
return (dst == MESA_FORMAT_R10G10B10A2_UNORM ||
|
||||
dst == MESA_FORMAT_R10G10B10X2_UNORM);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
get_blit_intratile_offset_el(const struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt,
|
||||
uint32_t total_x_offset_el,
|
||||
uint32_t total_y_offset_el,
|
||||
uint64_t *tile_offset_B,
|
||||
uint32_t *x_offset_el,
|
||||
uint32_t *y_offset_el)
|
||||
{
|
||||
ASSERTED uint32_t z_offset_el, array_offset;
|
||||
isl_tiling_get_intratile_offset_el(mt->surf.tiling, mt->surf.dim,
|
||||
mt->surf.msaa_layout,
|
||||
mt->cpp * 8, mt->surf.samples,
|
||||
mt->surf.row_pitch_B,
|
||||
mt->surf.array_pitch_el_rows,
|
||||
total_x_offset_el, total_y_offset_el, 0, 0,
|
||||
tile_offset_B,
|
||||
x_offset_el, y_offset_el,
|
||||
&z_offset_el, &array_offset);
|
||||
assert(z_offset_el == 0);
|
||||
assert(array_offset == 0);
|
||||
|
||||
if (mt->surf.tiling == ISL_TILING_LINEAR) {
|
||||
/* From the Broadwell PRM docs for XY_SRC_COPY_BLT::SourceBaseAddress:
|
||||
*
|
||||
* "Base address of the destination surface: X=0, Y=0. Lower 32bits
|
||||
* of the 48bit addressing. When Src Tiling is enabled (Bit_15
|
||||
* enabled), this address must be 4KB-aligned. When Tiling is not
|
||||
* enabled, this address should be CL (64byte) aligned."
|
||||
*
|
||||
* The offsets we get from ISL in the tiled case are already aligned.
|
||||
* In the linear case, we need to do some of our own aligning.
|
||||
*/
|
||||
uint32_t delta = *tile_offset_B & 63;
|
||||
assert(delta % mt->cpp == 0);
|
||||
*tile_offset_B -= delta;
|
||||
*x_offset_el += delta / mt->cpp;
|
||||
} else {
|
||||
assert(*tile_offset_B % 4096 == 0);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
alignment_valid(struct brw_context *brw, unsigned offset,
|
||||
enum isl_tiling tiling)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
/* Tiled buffers must be page-aligned (4K). */
|
||||
if (tiling != ISL_TILING_LINEAR)
|
||||
return (offset & 4095) == 0;
|
||||
|
||||
/* On Gfx8+, linear buffers must be cacheline-aligned. */
|
||||
if (devinfo->ver >= 8)
|
||||
return (offset & 63) == 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
xy_blit_cmd(enum isl_tiling src_tiling, enum isl_tiling dst_tiling,
|
||||
uint32_t cpp)
|
||||
{
|
||||
uint32_t CMD = 0;
|
||||
|
||||
assert(cpp <= 4);
|
||||
switch (cpp) {
|
||||
case 1:
|
||||
case 2:
|
||||
CMD = XY_SRC_COPY_BLT_CMD;
|
||||
break;
|
||||
case 4:
|
||||
CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
|
||||
break;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
|
||||
if (dst_tiling != ISL_TILING_LINEAR)
|
||||
CMD |= XY_DST_TILED;
|
||||
|
||||
if (src_tiling != ISL_TILING_LINEAR)
|
||||
CMD |= XY_SRC_TILED;
|
||||
|
||||
return CMD;
|
||||
}
|
||||
|
||||
/* Copy BitBlt
|
||||
*/
|
||||
static bool
|
||||
emit_copy_blit(struct brw_context *brw,
|
||||
GLuint cpp,
|
||||
int32_t src_pitch,
|
||||
struct brw_bo *src_buffer,
|
||||
GLuint src_offset,
|
||||
enum isl_tiling src_tiling,
|
||||
int32_t dst_pitch,
|
||||
struct brw_bo *dst_buffer,
|
||||
GLuint dst_offset,
|
||||
enum isl_tiling dst_tiling,
|
||||
GLshort src_x, GLshort src_y,
|
||||
GLshort dst_x, GLshort dst_y,
|
||||
GLshort w, GLshort h,
|
||||
enum gl_logicop_mode logic_op)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
GLuint CMD, BR13;
|
||||
int dst_y2 = dst_y + h;
|
||||
int dst_x2 = dst_x + w;
|
||||
bool dst_y_tiled = dst_tiling == ISL_TILING_Y0;
|
||||
bool src_y_tiled = src_tiling == ISL_TILING_Y0;
|
||||
uint32_t src_tile_w, src_tile_h;
|
||||
uint32_t dst_tile_w, dst_tile_h;
|
||||
|
||||
if ((dst_y_tiled || src_y_tiled) && devinfo->ver < 6)
|
||||
return false;
|
||||
|
||||
const unsigned bo_sizes = dst_buffer->size + src_buffer->size;
|
||||
|
||||
/* do space check before going any further */
|
||||
if (!brw_batch_has_aperture_space(brw, bo_sizes))
|
||||
brw_batch_flush(brw);
|
||||
|
||||
if (!brw_batch_has_aperture_space(brw, bo_sizes))
|
||||
return false;
|
||||
|
||||
unsigned length = devinfo->ver >= 8 ? 10 : 8;
|
||||
|
||||
brw_batch_require_space(brw, length * 4);
|
||||
DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
|
||||
__func__,
|
||||
src_buffer, src_pitch, src_offset, src_x, src_y,
|
||||
dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
|
||||
|
||||
isl_get_tile_dims(src_tiling, cpp, &src_tile_w, &src_tile_h);
|
||||
isl_get_tile_dims(dst_tiling, cpp, &dst_tile_w, &dst_tile_h);
|
||||
|
||||
/* For Tiled surfaces, the pitch has to be a multiple of the Tile width
|
||||
* (X direction width of the Tile). This is ensured while allocating the
|
||||
* buffer object.
|
||||
*/
|
||||
assert(src_tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0);
|
||||
assert(dst_tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0);
|
||||
|
||||
/* For big formats (such as floating point), do the copy using 16 or
|
||||
* 32bpp and multiply the coordinates.
|
||||
*/
|
||||
if (cpp > 4) {
|
||||
if (cpp % 4 == 2) {
|
||||
dst_x *= cpp / 2;
|
||||
dst_x2 *= cpp / 2;
|
||||
src_x *= cpp / 2;
|
||||
cpp = 2;
|
||||
} else {
|
||||
assert(cpp % 4 == 0);
|
||||
dst_x *= cpp / 4;
|
||||
dst_x2 *= cpp / 4;
|
||||
src_x *= cpp / 4;
|
||||
cpp = 4;
|
||||
}
|
||||
}
|
||||
|
||||
if (!alignment_valid(brw, dst_offset, dst_tiling))
|
||||
return false;
|
||||
if (!alignment_valid(brw, src_offset, src_tiling))
|
||||
return false;
|
||||
|
||||
/* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop
|
||||
* the low bits. Offsets must be naturally aligned.
|
||||
*/
|
||||
if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
|
||||
dst_pitch % 4 != 0 || dst_offset % cpp != 0)
|
||||
return false;
|
||||
|
||||
assert(cpp <= 4);
|
||||
BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
|
||||
|
||||
CMD = xy_blit_cmd(src_tiling, dst_tiling, cpp);
|
||||
|
||||
/* For tiled source and destination, pitch value should be specified
|
||||
* as a number of Dwords.
|
||||
*/
|
||||
if (dst_tiling != ISL_TILING_LINEAR)
|
||||
dst_pitch /= 4;
|
||||
|
||||
if (src_tiling != ISL_TILING_LINEAR)
|
||||
src_pitch /= 4;
|
||||
|
||||
if (dst_y2 <= dst_y || dst_x2 <= dst_x)
|
||||
return true;
|
||||
|
||||
assert(dst_x < dst_x2);
|
||||
assert(dst_y < dst_y2);
|
||||
|
||||
BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled);
|
||||
OUT_BATCH(CMD | (length - 2));
|
||||
OUT_BATCH(BR13 | (uint16_t)dst_pitch);
|
||||
OUT_BATCH(SET_FIELD(dst_y, BLT_Y) | SET_FIELD(dst_x, BLT_X));
|
||||
OUT_BATCH(SET_FIELD(dst_y2, BLT_Y) | SET_FIELD(dst_x2, BLT_X));
|
||||
if (devinfo->ver >= 8) {
|
||||
OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset);
|
||||
} else {
|
||||
OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset);
|
||||
}
|
||||
OUT_BATCH(SET_FIELD(src_y, BLT_Y) | SET_FIELD(src_x, BLT_X));
|
||||
OUT_BATCH((uint16_t)src_pitch);
|
||||
if (devinfo->ver >= 8) {
|
||||
OUT_RELOC64(src_buffer, 0, src_offset);
|
||||
} else {
|
||||
OUT_RELOC(src_buffer, 0, src_offset);
|
||||
}
|
||||
|
||||
ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled);
|
||||
|
||||
brw_emit_mi_flush(brw);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
emit_miptree_blit(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *src_mt,
|
||||
uint32_t src_x, uint32_t src_y,
|
||||
struct brw_mipmap_tree *dst_mt,
|
||||
uint32_t dst_x, uint32_t dst_y,
|
||||
uint32_t width, uint32_t height,
|
||||
bool reverse, enum gl_logicop_mode logicop)
|
||||
{
|
||||
/* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
|
||||
* Data Size Limitations):
|
||||
*
|
||||
* The BLT engine is capable of transferring very large quantities of
|
||||
* graphics data. Any graphics data read from and written to the
|
||||
* destination is permitted to represent a number of pixels that
|
||||
* occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
|
||||
* at the destination. The maximum number of pixels that may be
|
||||
* represented per scan line’s worth of graphics data depends on the
|
||||
* color depth.
|
||||
*
|
||||
* The blitter's pitch is a signed 16-bit integer, but measured in bytes
|
||||
* for linear surfaces and DWords for tiled surfaces. So the maximum
|
||||
* pitch is 32k linear and 128k tiled.
|
||||
*/
|
||||
if (brw_miptree_blt_pitch(src_mt) >= 32768 ||
|
||||
brw_miptree_blt_pitch(dst_mt) >= 32768) {
|
||||
perf_debug("Falling back due to >= 32k/128k pitch\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* We need to split the blit into chunks that each fit within the blitter's
|
||||
* restrictions. We can't use a chunk size of 32768 because we need to
|
||||
* ensure that src_tile_x + chunk_size fits. We choose 16384 because it's
|
||||
* a nice round power of two, big enough that performance won't suffer, and
|
||||
* small enough to guarantee everything fits.
|
||||
*/
|
||||
const uint32_t max_chunk_size = 16384;
|
||||
|
||||
for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
|
||||
for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
|
||||
const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
|
||||
const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
|
||||
|
||||
uint64_t src_offset;
|
||||
uint32_t src_tile_x, src_tile_y;
|
||||
get_blit_intratile_offset_el(brw, src_mt,
|
||||
src_x + chunk_x, src_y + chunk_y,
|
||||
&src_offset, &src_tile_x, &src_tile_y);
|
||||
|
||||
uint64_t dst_offset;
|
||||
uint32_t dst_tile_x, dst_tile_y;
|
||||
get_blit_intratile_offset_el(brw, dst_mt,
|
||||
dst_x + chunk_x, dst_y + chunk_y,
|
||||
&dst_offset, &dst_tile_x, &dst_tile_y);
|
||||
|
||||
if (!emit_copy_blit(brw,
|
||||
src_mt->cpp,
|
||||
reverse ? -src_mt->surf.row_pitch_B :
|
||||
src_mt->surf.row_pitch_B,
|
||||
src_mt->bo, src_mt->offset + src_offset,
|
||||
src_mt->surf.tiling,
|
||||
dst_mt->surf.row_pitch_B,
|
||||
dst_mt->bo, dst_mt->offset + dst_offset,
|
||||
dst_mt->surf.tiling,
|
||||
src_tile_x, src_tile_y,
|
||||
dst_tile_x, dst_tile_y,
|
||||
chunk_w, chunk_h,
|
||||
logicop)) {
|
||||
/* If this is ever going to fail, it will fail on the first chunk */
|
||||
assert(chunk_x == 0 && chunk_y == 0);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements a rectangular block transfer (blit) of pixels between two
|
||||
* miptrees.
|
||||
*
|
||||
* Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous,
|
||||
* but limited, pitches and sizes allowed.
|
||||
*
|
||||
* The src/dst coordinates are relative to the given level/slice of the
|
||||
* miptree.
|
||||
*
|
||||
* If @src_flip or @dst_flip is set, then the rectangle within that miptree
|
||||
* will be inverted (including scanline order) when copying. This is common
|
||||
* in GL when copying between window system and user-created
|
||||
* renderbuffers/textures.
|
||||
*/
|
||||
bool
|
||||
brw_miptree_blit(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *src_mt,
|
||||
int src_level, int src_slice,
|
||||
uint32_t src_x, uint32_t src_y, bool src_flip,
|
||||
struct brw_mipmap_tree *dst_mt,
|
||||
int dst_level, int dst_slice,
|
||||
uint32_t dst_x, uint32_t dst_y, bool dst_flip,
|
||||
uint32_t width, uint32_t height,
|
||||
enum gl_logicop_mode logicop)
|
||||
{
|
||||
/* The blitter doesn't understand multisampling at all. */
|
||||
if (src_mt->surf.samples > 1 || dst_mt->surf.samples > 1)
|
||||
return false;
|
||||
|
||||
/* No sRGB decode or encode is done by the hardware blitter, which is
|
||||
* consistent with what we want in many callers (glCopyTexSubImage(),
|
||||
* texture validation, etc.).
|
||||
*/
|
||||
mesa_format src_format = _mesa_get_srgb_format_linear(src_mt->format);
|
||||
mesa_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format);
|
||||
|
||||
/* The blitter doesn't support doing any format conversions. We do also
|
||||
* support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into
|
||||
* the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A
|
||||
* channel to 1.0 at the end. Also trivially ARGB2101010 to XRGB2101010,
|
||||
* but not XRGB2101010 to ARGB2101010 yet.
|
||||
*/
|
||||
if (!brw_miptree_blit_compatible_formats(src_format, dst_format)) {
|
||||
perf_debug("%s: Can't use hardware blitter from %s to %s, "
|
||||
"falling back.\n", __func__,
|
||||
_mesa_get_format_name(src_format),
|
||||
_mesa_get_format_name(dst_format));
|
||||
return false;
|
||||
}
|
||||
|
||||
/* The blitter has no idea about HiZ or fast color clears, so we need to
|
||||
* resolve the miptrees before we do anything.
|
||||
*/
|
||||
brw_miptree_access_raw(brw, src_mt, src_level, src_slice, false);
|
||||
brw_miptree_access_raw(brw, dst_mt, dst_level, dst_slice, true);
|
||||
|
||||
if (src_flip) {
|
||||
const unsigned h0 = src_mt->surf.phys_level0_sa.height;
|
||||
src_y = minify(h0, src_level - src_mt->first_level) - src_y - height;
|
||||
}
|
||||
|
||||
if (dst_flip) {
|
||||
const unsigned h0 = dst_mt->surf.phys_level0_sa.height;
|
||||
dst_y = minify(h0, dst_level - dst_mt->first_level) - dst_y - height;
|
||||
}
|
||||
|
||||
uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
|
||||
brw_miptree_get_image_offset(src_mt, src_level, src_slice,
|
||||
&src_image_x, &src_image_y);
|
||||
brw_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
|
||||
&dst_image_x, &dst_image_y);
|
||||
src_x += src_image_x;
|
||||
src_y += src_image_y;
|
||||
dst_x += dst_image_x;
|
||||
dst_y += dst_image_y;
|
||||
|
||||
if (!emit_miptree_blit(brw, src_mt, src_x, src_y,
|
||||
dst_mt, dst_x, dst_y, width, height,
|
||||
src_flip != dst_flip, logicop)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* XXX This could be done in a single pass using XY_FULL_MONO_PATTERN_BLT */
|
||||
if (_mesa_get_format_bits(src_format, GL_ALPHA_BITS) == 0 &&
|
||||
_mesa_get_format_bits(dst_format, GL_ALPHA_BITS) > 0) {
|
||||
brw_miptree_set_alpha_to_one(brw, dst_mt, dst_x, dst_y, width, height);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_miptree_copy(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *src_mt,
|
||||
int src_level, int src_slice,
|
||||
uint32_t src_x, uint32_t src_y,
|
||||
struct brw_mipmap_tree *dst_mt,
|
||||
int dst_level, int dst_slice,
|
||||
uint32_t dst_x, uint32_t dst_y,
|
||||
uint32_t src_width, uint32_t src_height)
|
||||
{
|
||||
/* The blitter doesn't understand multisampling at all. */
|
||||
if (src_mt->surf.samples > 1 || dst_mt->surf.samples > 1)
|
||||
return false;
|
||||
|
||||
if (src_mt->format == MESA_FORMAT_S_UINT8)
|
||||
return false;
|
||||
|
||||
/* The blitter has no idea about HiZ or fast color clears, so we need to
|
||||
* resolve the miptrees before we do anything.
|
||||
*/
|
||||
brw_miptree_access_raw(brw, src_mt, src_level, src_slice, false);
|
||||
brw_miptree_access_raw(brw, dst_mt, dst_level, dst_slice, true);
|
||||
|
||||
uint32_t src_image_x, src_image_y;
|
||||
brw_miptree_get_image_offset(src_mt, src_level, src_slice,
|
||||
&src_image_x, &src_image_y);
|
||||
|
||||
if (_mesa_is_format_compressed(src_mt->format)) {
|
||||
GLuint bw, bh;
|
||||
_mesa_get_format_block_size(src_mt->format, &bw, &bh);
|
||||
|
||||
/* Compressed textures need not have dimensions that are a multiple of
|
||||
* the block size. Rectangles in compressed textures do need to be a
|
||||
* multiple of the block size. The one exception is that the right and
|
||||
* bottom edges may be at the right or bottom edge of the miplevel even
|
||||
* if it's not aligned.
|
||||
*/
|
||||
assert(src_x % bw == 0);
|
||||
assert(src_y % bh == 0);
|
||||
|
||||
assert(src_width % bw == 0 ||
|
||||
src_x + src_width ==
|
||||
minify(src_mt->surf.logical_level0_px.width, src_level));
|
||||
assert(src_height % bh == 0 ||
|
||||
src_y + src_height ==
|
||||
minify(src_mt->surf.logical_level0_px.height, src_level));
|
||||
|
||||
src_x /= (int)bw;
|
||||
src_y /= (int)bh;
|
||||
src_width = DIV_ROUND_UP(src_width, (int)bw);
|
||||
src_height = DIV_ROUND_UP(src_height, (int)bh);
|
||||
}
|
||||
src_x += src_image_x;
|
||||
src_y += src_image_y;
|
||||
|
||||
uint32_t dst_image_x, dst_image_y;
|
||||
brw_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
|
||||
&dst_image_x, &dst_image_y);
|
||||
|
||||
if (_mesa_is_format_compressed(dst_mt->format)) {
|
||||
GLuint bw, bh;
|
||||
_mesa_get_format_block_size(dst_mt->format, &bw, &bh);
|
||||
|
||||
assert(dst_x % bw == 0);
|
||||
assert(dst_y % bh == 0);
|
||||
|
||||
dst_x /= (int)bw;
|
||||
dst_y /= (int)bh;
|
||||
}
|
||||
dst_x += dst_image_x;
|
||||
dst_y += dst_image_y;
|
||||
|
||||
return emit_miptree_blit(brw, src_mt, src_x, src_y,
|
||||
dst_mt, dst_x, dst_y,
|
||||
src_width, src_height, false, COLOR_LOGICOP_COPY);
|
||||
}
|
||||
|
||||
bool
|
||||
brw_emit_immediate_color_expand_blit(struct brw_context *brw,
|
||||
GLuint cpp,
|
||||
GLubyte *src_bits, GLuint src_size,
|
||||
GLuint fg_color,
|
||||
GLshort dst_pitch,
|
||||
struct brw_bo *dst_buffer,
|
||||
GLuint dst_offset,
|
||||
enum isl_tiling dst_tiling,
|
||||
GLshort x, GLshort y,
|
||||
GLshort w, GLshort h,
|
||||
enum gl_logicop_mode logic_op)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
int dwords = ALIGN(src_size, 8) / 4;
|
||||
uint32_t opcode, br13, blit_cmd;
|
||||
|
||||
if (dst_tiling != ISL_TILING_LINEAR) {
|
||||
if (dst_offset & 4095)
|
||||
return false;
|
||||
if (dst_tiling == ISL_TILING_Y0)
|
||||
return false;
|
||||
}
|
||||
|
||||
assert((unsigned) logic_op <= 0x0f);
|
||||
assert(dst_pitch > 0);
|
||||
|
||||
if (w < 0 || h < 0)
|
||||
return true;
|
||||
|
||||
DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
|
||||
__func__,
|
||||
dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
|
||||
|
||||
unsigned xy_setup_blt_length = devinfo->ver >= 8 ? 10 : 8;
|
||||
brw_batch_require_space(brw, (xy_setup_blt_length * 4) +
|
||||
(3 * 4) + dwords * 4);
|
||||
|
||||
opcode = XY_SETUP_BLT_CMD;
|
||||
if (cpp == 4)
|
||||
opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
|
||||
if (dst_tiling != ISL_TILING_LINEAR) {
|
||||
opcode |= XY_DST_TILED;
|
||||
dst_pitch /= 4;
|
||||
}
|
||||
|
||||
br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
|
||||
br13 |= br13_for_cpp(cpp);
|
||||
|
||||
blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
|
||||
if (dst_tiling != ISL_TILING_LINEAR)
|
||||
blit_cmd |= XY_DST_TILED;
|
||||
|
||||
BEGIN_BATCH_BLT(xy_setup_blt_length + 3);
|
||||
OUT_BATCH(opcode | (xy_setup_blt_length - 2));
|
||||
OUT_BATCH(br13);
|
||||
OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
|
||||
OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
|
||||
if (devinfo->ver >= 8) {
|
||||
OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset);
|
||||
} else {
|
||||
OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset);
|
||||
}
|
||||
OUT_BATCH(0); /* bg */
|
||||
OUT_BATCH(fg_color); /* fg */
|
||||
OUT_BATCH(0); /* pattern base addr */
|
||||
if (devinfo->ver >= 8)
|
||||
OUT_BATCH(0);
|
||||
|
||||
OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
|
||||
OUT_BATCH(SET_FIELD(y, BLT_Y) | SET_FIELD(x, BLT_X));
|
||||
OUT_BATCH(SET_FIELD(y + h, BLT_Y) | SET_FIELD(x + w, BLT_X));
|
||||
ADVANCE_BATCH();
|
||||
|
||||
brw_batch_data(brw, src_bits, dwords * 4);
|
||||
|
||||
brw_emit_mi_flush(brw);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to initialize the alpha value of an ARGB8888 miptree after copying
|
||||
* into it from an XRGB8888 source.
|
||||
*
|
||||
* This is very common with glCopyTexImage2D(). Note that the coordinates are
|
||||
* relative to the start of the miptree, not relative to a slice within the
|
||||
* miptree.
|
||||
*/
|
||||
static void
|
||||
brw_miptree_set_alpha_to_one(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt,
|
||||
int x, int y, int width, int height)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
uint32_t BR13, CMD;
|
||||
int pitch, cpp;
|
||||
|
||||
pitch = mt->surf.row_pitch_B;
|
||||
cpp = mt->cpp;
|
||||
|
||||
DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
|
||||
__func__, mt->bo, pitch, x, y, width, height);
|
||||
|
||||
/* Note: Currently only handles 8 bit alpha channel. Extension to < 8 Bit
|
||||
* alpha channel would be likely possible via ROP code 0xfa instead of 0xf0
|
||||
* and writing a suitable bit-mask instead of 0xffffffff.
|
||||
*/
|
||||
BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
|
||||
CMD = XY_COLOR_BLT_CMD;
|
||||
CMD |= XY_BLT_WRITE_ALPHA;
|
||||
|
||||
if (mt->surf.tiling != ISL_TILING_LINEAR) {
|
||||
CMD |= XY_DST_TILED;
|
||||
pitch /= 4;
|
||||
}
|
||||
BR13 |= pitch;
|
||||
|
||||
/* do space check before going any further */
|
||||
if (!brw_batch_has_aperture_space(brw, mt->bo->size))
|
||||
brw_batch_flush(brw);
|
||||
|
||||
unsigned length = devinfo->ver >= 8 ? 7 : 6;
|
||||
const bool dst_y_tiled = mt->surf.tiling == ISL_TILING_Y0;
|
||||
|
||||
/* We need to split the blit into chunks that each fit within the blitter's
|
||||
* restrictions. We can't use a chunk size of 32768 because we need to
|
||||
* ensure that src_tile_x + chunk_size fits. We choose 16384 because it's
|
||||
* a nice round power of two, big enough that performance won't suffer, and
|
||||
* small enough to guarantee everything fits.
|
||||
*/
|
||||
const uint32_t max_chunk_size = 16384;
|
||||
|
||||
for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
|
||||
for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
|
||||
const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
|
||||
const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
|
||||
|
||||
uint64_t offset_B;
|
||||
uint32_t tile_x, tile_y;
|
||||
get_blit_intratile_offset_el(brw, mt,
|
||||
x + chunk_x, y + chunk_y,
|
||||
&offset_B, &tile_x, &tile_y);
|
||||
|
||||
BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, false);
|
||||
OUT_BATCH(CMD | (length - 2));
|
||||
OUT_BATCH(BR13);
|
||||
OUT_BATCH(SET_FIELD(y + chunk_y, BLT_Y) |
|
||||
SET_FIELD(x + chunk_x, BLT_X));
|
||||
OUT_BATCH(SET_FIELD(y + chunk_y + chunk_h, BLT_Y) |
|
||||
SET_FIELD(x + chunk_x + chunk_w, BLT_X));
|
||||
if (devinfo->ver >= 8) {
|
||||
OUT_RELOC64(mt->bo, RELOC_WRITE, mt->offset + offset_B);
|
||||
} else {
|
||||
OUT_RELOC(mt->bo, RELOC_WRITE, mt->offset + offset_B);
|
||||
}
|
||||
OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
|
||||
ADVANCE_BATCH_TILED(dst_y_tiled, false);
|
||||
}
|
||||
}
|
||||
|
||||
brw_emit_mi_flush(brw);
|
||||
}
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_BLIT_H
|
||||
#define BRW_BLIT_H
|
||||
|
||||
#include "brw_context.h"
|
||||
|
||||
bool brw_miptree_blit_compatible_formats(mesa_format src, mesa_format dst);
|
||||
|
||||
bool brw_miptree_blit(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *src_mt,
|
||||
int src_level, int src_slice,
|
||||
uint32_t src_x, uint32_t src_y, bool src_flip,
|
||||
struct brw_mipmap_tree *dst_mt,
|
||||
int dst_level, int dst_slice,
|
||||
uint32_t dst_x, uint32_t dst_y, bool dst_flip,
|
||||
uint32_t width, uint32_t height,
|
||||
enum gl_logicop_mode logicop);
|
||||
|
||||
bool brw_miptree_copy(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *src_mt,
|
||||
int src_level, int src_slice,
|
||||
uint32_t src_x, uint32_t src_y,
|
||||
struct brw_mipmap_tree *dst_mt,
|
||||
int dst_level, int dst_slice,
|
||||
uint32_t dst_x, uint32_t dst_y,
|
||||
uint32_t src_width, uint32_t src_height);
|
||||
|
||||
bool
|
||||
brw_emit_immediate_color_expand_blit(struct brw_context *brw,
|
||||
GLuint cpp,
|
||||
GLubyte *src_bits, GLuint src_size,
|
||||
GLuint fg_color,
|
||||
GLshort dst_pitch,
|
||||
struct brw_bo *dst_buffer,
|
||||
GLuint dst_offset,
|
||||
enum isl_tiling dst_tiling,
|
||||
GLshort x, GLshort y,
|
||||
GLshort w, GLshort h,
|
||||
enum gl_logicop_mode logic_op);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,137 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2012 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_BLORP_H
|
||||
#define BRW_BLORP_H
|
||||
|
||||
#include "blorp/blorp.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
#include "program/prog_instruction.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void brw_blorp_init(struct brw_context *brw);
|
||||
|
||||
void
|
||||
brw_blorp_blit_miptrees(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *src_mt,
|
||||
unsigned src_level, unsigned src_layer,
|
||||
mesa_format src_format, int src_swizzle,
|
||||
struct brw_mipmap_tree *dst_mt,
|
||||
unsigned dst_level, unsigned dst_layer,
|
||||
mesa_format dst_format,
|
||||
float src_x0, float src_y0,
|
||||
float src_x1, float src_y1,
|
||||
float dst_x0, float dst_y0,
|
||||
float dst_x1, float dst_y1,
|
||||
GLenum filter, bool mirror_x, bool mirror_y,
|
||||
bool decode_srgb, bool encode_srgb);
|
||||
|
||||
void
|
||||
brw_blorp_copy_miptrees(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *src_mt,
|
||||
unsigned src_level, unsigned src_logical_layer,
|
||||
struct brw_mipmap_tree *dst_mt,
|
||||
unsigned dst_level, unsigned dst_logical_layer,
|
||||
unsigned src_x, unsigned src_y,
|
||||
unsigned dst_x, unsigned dst_y,
|
||||
unsigned src_width, unsigned src_height);
|
||||
|
||||
void
|
||||
brw_blorp_copy_buffers(struct brw_context *brw,
|
||||
struct brw_bo *src_bo,
|
||||
unsigned src_offset,
|
||||
struct brw_bo *dst_bo,
|
||||
unsigned dst_offset,
|
||||
unsigned size);
|
||||
|
||||
bool
|
||||
brw_blorp_upload_miptree(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *dst_mt,
|
||||
mesa_format dst_format,
|
||||
uint32_t level, uint32_t x, uint32_t y, uint32_t z,
|
||||
uint32_t width, uint32_t height, uint32_t depth,
|
||||
GLenum target, GLenum format, GLenum type,
|
||||
const void *pixels,
|
||||
const struct gl_pixelstore_attrib *packing);
|
||||
|
||||
bool
|
||||
brw_blorp_download_miptree(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *src_mt,
|
||||
mesa_format src_format, uint32_t src_swizzle,
|
||||
uint32_t level, uint32_t x, uint32_t y, uint32_t z,
|
||||
uint32_t width, uint32_t height, uint32_t depth,
|
||||
GLenum target, GLenum format, GLenum type,
|
||||
bool y_flip, const void *pixels,
|
||||
const struct gl_pixelstore_attrib *packing);
|
||||
|
||||
void
|
||||
brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
|
||||
GLbitfield mask, bool partial_clear, bool encode_srgb);
|
||||
void
|
||||
brw_blorp_clear_depth_stencil(struct brw_context *brw,
|
||||
struct gl_framebuffer *fb,
|
||||
GLbitfield mask, bool partial_clear);
|
||||
|
||||
void
|
||||
brw_blorp_resolve_color(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt,
|
||||
unsigned level, unsigned layer,
|
||||
enum isl_aux_op resolve_op);
|
||||
|
||||
void
|
||||
brw_blorp_mcs_partial_resolve(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt,
|
||||
uint32_t start_layer, uint32_t num_layers);
|
||||
|
||||
void
|
||||
brw_hiz_exec(struct brw_context *brw, struct brw_mipmap_tree *mt,
|
||||
unsigned int level, unsigned int start_layer,
|
||||
unsigned int num_layers, enum isl_aux_op op);
|
||||
|
||||
void gfx4_blorp_exec(struct blorp_batch *batch,
|
||||
const struct blorp_params *params);
|
||||
void gfx45_blorp_exec(struct blorp_batch *batch,
|
||||
const struct blorp_params *params);
|
||||
void gfx5_blorp_exec(struct blorp_batch *batch,
|
||||
const struct blorp_params *params);
|
||||
void gfx6_blorp_exec(struct blorp_batch *batch,
|
||||
const struct blorp_params *params);
|
||||
void gfx7_blorp_exec(struct blorp_batch *batch,
|
||||
const struct blorp_params *params);
|
||||
void gfx75_blorp_exec(struct blorp_batch *batch,
|
||||
const struct blorp_params *params);
|
||||
void gfx8_blorp_exec(struct blorp_batch *batch,
|
||||
const struct blorp_params *params);
|
||||
void gfx9_blorp_exec(struct blorp_batch *batch,
|
||||
const struct blorp_params *params);
|
||||
void gfx11_blorp_exec(struct blorp_batch *batch,
|
||||
const struct blorp_params *params);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BRW_BLORP_H */
|
||||
|
|
@ -1,710 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file brw_buffer_objects.c
|
||||
*
|
||||
* This provides core GL buffer object functionality.
|
||||
*/
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "main/macros.h"
|
||||
#include "main/streaming-load-memcpy.h"
|
||||
#include "main/bufferobj.h"
|
||||
#include "x86/common_x86_asm.h"
|
||||
#include "util/u_memory.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_blorp.h"
|
||||
#include "brw_buffer_objects.h"
|
||||
#include "brw_batch.h"
|
||||
|
||||
static void
|
||||
mark_buffer_gpu_usage(struct brw_buffer_object *intel_obj,
|
||||
uint32_t offset, uint32_t size)
|
||||
{
|
||||
intel_obj->gpu_active_start = MIN2(intel_obj->gpu_active_start, offset);
|
||||
intel_obj->gpu_active_end = MAX2(intel_obj->gpu_active_end, offset + size);
|
||||
}
|
||||
|
||||
static void
|
||||
mark_buffer_inactive(struct brw_buffer_object *intel_obj)
|
||||
{
|
||||
intel_obj->gpu_active_start = ~0;
|
||||
intel_obj->gpu_active_end = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
mark_buffer_valid_data(struct brw_buffer_object *intel_obj,
|
||||
uint32_t offset, uint32_t size)
|
||||
{
|
||||
intel_obj->valid_data_start = MIN2(intel_obj->valid_data_start, offset);
|
||||
intel_obj->valid_data_end = MAX2(intel_obj->valid_data_end, offset + size);
|
||||
}
|
||||
|
||||
static void
|
||||
mark_buffer_invalid(struct brw_buffer_object *intel_obj)
|
||||
{
|
||||
intel_obj->valid_data_start = ~0;
|
||||
intel_obj->valid_data_end = 0;
|
||||
}
|
||||
|
||||
/** Allocates a new brw_bo to store the data for the buffer object. */
|
||||
static void
|
||||
alloc_buffer_object(struct brw_context *brw,
|
||||
struct brw_buffer_object *intel_obj)
|
||||
{
|
||||
const struct gl_context *ctx = &brw->ctx;
|
||||
|
||||
uint64_t size = intel_obj->Base.Size;
|
||||
if (ctx->Const.RobustAccess) {
|
||||
/* Pad out buffer objects with an extra 2kB (half a page).
|
||||
*
|
||||
* When pushing UBOs, we need to safeguard against 3DSTATE_CONSTANT_*
|
||||
* reading out of bounds memory. The application might bind a UBO that's
|
||||
* smaller than what the program expects. Ideally, we'd bind an extra
|
||||
* push buffer containing zeros, but we have a limited number of those,
|
||||
* so it's not always viable. Our only safe option is to pad all buffer
|
||||
* objects by the maximum push data length, so that it will never read
|
||||
* past the end of a BO.
|
||||
*
|
||||
* This is unfortunate, but it should result in at most 1 extra page,
|
||||
* which probably isn't too terrible.
|
||||
*/
|
||||
size += 64 * 32; /* max read length of 64 256-bit units */
|
||||
}
|
||||
intel_obj->buffer =
|
||||
brw_bo_alloc(brw->bufmgr, "bufferobj", size, BRW_MEMZONE_OTHER);
|
||||
|
||||
/* the buffer might be bound as a uniform buffer, need to update it
|
||||
*/
|
||||
if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
|
||||
brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
|
||||
if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
|
||||
brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
|
||||
if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
|
||||
brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
|
||||
if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
|
||||
brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
|
||||
|
||||
mark_buffer_inactive(intel_obj);
|
||||
mark_buffer_invalid(intel_obj);
|
||||
}
|
||||
|
||||
static void
|
||||
release_buffer(struct brw_buffer_object *intel_obj)
|
||||
{
|
||||
brw_bo_unreference(intel_obj->buffer);
|
||||
intel_obj->buffer = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* The NewBufferObject() driver hook.
|
||||
*
|
||||
* Allocates a new brw_buffer_object structure and initializes it.
|
||||
*
|
||||
* There is some duplication between mesa's bufferobjects and our
|
||||
* bufmgr buffers. Both have an integer handle and a hashtable to
|
||||
* lookup an opaque structure. It would be nice if the handles and
|
||||
* internal structure where somehow shared.
|
||||
*/
|
||||
static struct gl_buffer_object *
|
||||
brw_new_buffer_object(struct gl_context * ctx, GLuint name)
|
||||
{
|
||||
struct brw_buffer_object *obj = CALLOC_STRUCT(brw_buffer_object);
|
||||
if (!obj) {
|
||||
_mesa_error_no_memory(__func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
_mesa_initialize_buffer_object(ctx, &obj->Base, name);
|
||||
|
||||
obj->buffer = NULL;
|
||||
|
||||
return &obj->Base;
|
||||
}
|
||||
|
||||
/**
|
||||
* The DeleteBuffer() driver hook.
|
||||
*
|
||||
* Deletes a single OpenGL buffer object. Used by glDeleteBuffers().
|
||||
*/
|
||||
static void
|
||||
brw_delete_buffer(struct gl_context * ctx, struct gl_buffer_object *obj)
|
||||
{
|
||||
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
|
||||
|
||||
assert(intel_obj);
|
||||
|
||||
/* Buffer objects are automatically unmapped when deleting according
|
||||
* to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy
|
||||
* (though it does if you call glDeleteBuffers)
|
||||
*/
|
||||
_mesa_buffer_unmap_all_mappings(ctx, obj);
|
||||
|
||||
brw_bo_unreference(intel_obj->buffer);
|
||||
_mesa_delete_buffer_object(ctx, obj);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The BufferData() driver hook.
|
||||
*
|
||||
* Implements glBufferData(), which recreates a buffer object's data store
|
||||
* and populates it with the given data, if present.
|
||||
*
|
||||
* Any data that was previously stored in the buffer object is lost.
|
||||
*
|
||||
* \return true for success, false if out of memory
|
||||
*/
|
||||
static GLboolean
|
||||
brw_buffer_data(struct gl_context *ctx,
|
||||
GLenum target,
|
||||
GLsizeiptrARB size,
|
||||
const GLvoid *data,
|
||||
GLenum usage,
|
||||
GLbitfield storageFlags,
|
||||
struct gl_buffer_object *obj)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
|
||||
|
||||
/* Part of the ABI, but this function doesn't use it.
|
||||
*/
|
||||
(void) target;
|
||||
|
||||
intel_obj->Base.Size = size;
|
||||
intel_obj->Base.Usage = usage;
|
||||
intel_obj->Base.StorageFlags = storageFlags;
|
||||
|
||||
assert(!obj->Mappings[MAP_USER].Pointer); /* Mesa should have unmapped it */
|
||||
assert(!obj->Mappings[MAP_INTERNAL].Pointer);
|
||||
|
||||
if (intel_obj->buffer != NULL)
|
||||
release_buffer(intel_obj);
|
||||
|
||||
if (size != 0) {
|
||||
alloc_buffer_object(brw, intel_obj);
|
||||
if (!intel_obj->buffer)
|
||||
return false;
|
||||
|
||||
if (data != NULL) {
|
||||
brw_bo_subdata(intel_obj->buffer, 0, size, data);
|
||||
mark_buffer_valid_data(intel_obj, 0, size);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static GLboolean
|
||||
brw_buffer_data_mem(struct gl_context *ctx,
|
||||
GLenum target,
|
||||
GLsizeiptrARB size,
|
||||
struct gl_memory_object *memObj,
|
||||
GLuint64 offset,
|
||||
GLenum usage,
|
||||
struct gl_buffer_object *bufObj)
|
||||
{
|
||||
struct brw_buffer_object *intel_obj = brw_buffer_object(bufObj);
|
||||
struct brw_memory_object *intel_memObj = brw_memory_object(memObj);
|
||||
|
||||
/* Part of the ABI, but this function doesn't use it.
|
||||
*/
|
||||
(void) target;
|
||||
|
||||
intel_obj->Base.Size = size;
|
||||
intel_obj->Base.Usage = usage;
|
||||
intel_obj->Base.StorageFlags = 0;
|
||||
|
||||
assert(!bufObj->Mappings[MAP_USER].Pointer); /* Mesa should have unmapped it */
|
||||
assert(!bufObj->Mappings[MAP_INTERNAL].Pointer);
|
||||
|
||||
if (intel_obj->buffer != NULL)
|
||||
release_buffer(intel_obj);
|
||||
|
||||
if (size != 0) {
|
||||
intel_obj->buffer = intel_memObj->bo;
|
||||
mark_buffer_valid_data(intel_obj, offset, size);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* The BufferSubData() driver hook.
|
||||
*
|
||||
* Implements glBufferSubData(), which replaces a portion of the data in a
|
||||
* buffer object.
|
||||
*
|
||||
* If the data range specified by (size + offset) extends beyond the end of
|
||||
* the buffer or if data is NULL, no copy is performed.
|
||||
*/
|
||||
static void
|
||||
brw_buffer_subdata(struct gl_context *ctx,
|
||||
GLintptrARB offset,
|
||||
GLsizeiptrARB size,
|
||||
const GLvoid *data,
|
||||
struct gl_buffer_object *obj)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
|
||||
bool busy;
|
||||
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
assert(intel_obj);
|
||||
|
||||
/* See if we can unsynchronized write the data into the user's BO. This
|
||||
* avoids GPU stalls in unfortunately common user patterns (uploading
|
||||
* sequentially into a BO, with draw calls in between each upload).
|
||||
*
|
||||
* Once we've hit this path, we mark this GL BO as preferring stalling to
|
||||
* blits, so that we can hopefully hit this path again in the future
|
||||
* (otherwise, an app that might occasionally stall but mostly not will end
|
||||
* up with blitting all the time, at the cost of bandwidth)
|
||||
*/
|
||||
if (offset + size <= intel_obj->gpu_active_start ||
|
||||
intel_obj->gpu_active_end <= offset ||
|
||||
offset + size <= intel_obj->valid_data_start ||
|
||||
intel_obj->valid_data_end <= offset) {
|
||||
void *map = brw_bo_map(brw, intel_obj->buffer, MAP_WRITE | MAP_ASYNC);
|
||||
memcpy(map + offset, data, size);
|
||||
brw_bo_unmap(intel_obj->buffer);
|
||||
|
||||
if (intel_obj->gpu_active_end > intel_obj->gpu_active_start)
|
||||
intel_obj->prefer_stall_to_blit = true;
|
||||
|
||||
mark_buffer_valid_data(intel_obj, offset, size);
|
||||
return;
|
||||
}
|
||||
|
||||
busy =
|
||||
brw_bo_busy(intel_obj->buffer) ||
|
||||
brw_batch_references(&brw->batch, intel_obj->buffer);
|
||||
|
||||
if (busy) {
|
||||
if (size == intel_obj->Base.Size ||
|
||||
(intel_obj->valid_data_start >= offset &&
|
||||
intel_obj->valid_data_end <= offset + size)) {
|
||||
/* Replace the current busy bo so the subdata doesn't stall. */
|
||||
brw_bo_unreference(intel_obj->buffer);
|
||||
alloc_buffer_object(brw, intel_obj);
|
||||
} else if (!intel_obj->prefer_stall_to_blit) {
|
||||
perf_debug("Using a blit copy to avoid stalling on "
|
||||
"glBufferSubData(%ld, %ld) (%ldkb) to a busy "
|
||||
"(%d-%d) / valid (%d-%d) buffer object.\n",
|
||||
(long)offset, (long)offset + size, (long)(size/1024),
|
||||
intel_obj->gpu_active_start,
|
||||
intel_obj->gpu_active_end,
|
||||
intel_obj->valid_data_start,
|
||||
intel_obj->valid_data_end);
|
||||
struct brw_bo *temp_bo =
|
||||
brw_bo_alloc(brw->bufmgr, "subdata temp", size, BRW_MEMZONE_OTHER);
|
||||
|
||||
brw_bo_subdata(temp_bo, 0, size, data);
|
||||
|
||||
brw_blorp_copy_buffers(brw,
|
||||
temp_bo, 0,
|
||||
intel_obj->buffer, offset,
|
||||
size);
|
||||
brw_emit_mi_flush(brw);
|
||||
|
||||
brw_bo_unreference(temp_bo);
|
||||
mark_buffer_valid_data(intel_obj, offset, size);
|
||||
return;
|
||||
} else {
|
||||
perf_debug("Stalling on glBufferSubData(%ld, %ld) (%ldkb) to a busy "
|
||||
"(%d-%d) buffer object. Use glMapBufferRange() to "
|
||||
"avoid this.\n",
|
||||
(long)offset, (long)offset + size, (long)(size/1024),
|
||||
intel_obj->gpu_active_start,
|
||||
intel_obj->gpu_active_end);
|
||||
brw_batch_flush(brw);
|
||||
}
|
||||
}
|
||||
|
||||
brw_bo_subdata(intel_obj->buffer, offset, size, data);
|
||||
mark_buffer_inactive(intel_obj);
|
||||
mark_buffer_valid_data(intel_obj, offset, size);
|
||||
}
|
||||
|
||||
/* Typedef for memcpy function (used in brw_get_buffer_subdata below). */
|
||||
typedef void *(*mem_copy_fn)(void *dest, const void *src, size_t n);
|
||||
|
||||
/**
|
||||
* The GetBufferSubData() driver hook.
|
||||
*
|
||||
* Implements glGetBufferSubData(), which copies a subrange of a buffer
|
||||
* object into user memory.
|
||||
*/
|
||||
static void
|
||||
brw_get_buffer_subdata(struct gl_context *ctx,
|
||||
GLintptrARB offset,
|
||||
GLsizeiptrARB size,
|
||||
GLvoid *data,
|
||||
struct gl_buffer_object *obj)
|
||||
{
|
||||
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
|
||||
assert(intel_obj);
|
||||
if (brw_batch_references(&brw->batch, intel_obj->buffer)) {
|
||||
brw_batch_flush(brw);
|
||||
}
|
||||
|
||||
unsigned int map_flags = MAP_READ;
|
||||
mem_copy_fn memcpy_fn = memcpy;
|
||||
#ifdef USE_SSE41
|
||||
if (!intel_obj->buffer->cache_coherent && cpu_has_sse4_1) {
|
||||
/* Rather than acquire a new WB mmaping of the buffer object and pull
|
||||
* it into the CPU cache, keep using the WC mmap that we have for writes,
|
||||
* and use the magic movntd instructions instead.
|
||||
*/
|
||||
map_flags |= MAP_COHERENT;
|
||||
memcpy_fn = (mem_copy_fn) _mesa_streaming_load_memcpy;
|
||||
}
|
||||
#endif
|
||||
|
||||
void *map = brw_bo_map(brw, intel_obj->buffer, map_flags);
|
||||
if (unlikely(!map)) {
|
||||
_mesa_error_no_memory(__func__);
|
||||
return;
|
||||
}
|
||||
memcpy_fn(data, map + offset, size);
|
||||
brw_bo_unmap(intel_obj->buffer);
|
||||
|
||||
mark_buffer_inactive(intel_obj);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The MapBufferRange() driver hook.
|
||||
*
|
||||
* This implements both glMapBufferRange() and glMapBuffer().
|
||||
*
|
||||
* The goal of this extension is to allow apps to accumulate their rendering
|
||||
* at the same time as they accumulate their buffer object. Without it,
|
||||
* you'd end up blocking on execution of rendering every time you mapped
|
||||
* the buffer to put new data in.
|
||||
*
|
||||
* We support it in 3 ways: If unsynchronized, then don't bother
|
||||
* flushing the batchbuffer before mapping the buffer, which can save blocking
|
||||
* in many cases. If we would still block, and they allow the whole buffer
|
||||
* to be invalidated, then just allocate a new buffer to replace the old one.
|
||||
* If not, and we'd block, and they allow the subrange of the buffer to be
|
||||
* invalidated, then we can make a new little BO, let them write into that,
|
||||
* and blit it into the real BO at unmap time.
|
||||
*/
|
||||
static void *
|
||||
brw_map_buffer_range(struct gl_context *ctx,
|
||||
GLintptr offset, GLsizeiptr length,
|
||||
GLbitfield access, struct gl_buffer_object *obj,
|
||||
gl_map_buffer_index index)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
|
||||
|
||||
assert(intel_obj);
|
||||
|
||||
STATIC_ASSERT(GL_MAP_UNSYNCHRONIZED_BIT == MAP_ASYNC);
|
||||
STATIC_ASSERT(GL_MAP_WRITE_BIT == MAP_WRITE);
|
||||
STATIC_ASSERT(GL_MAP_READ_BIT == MAP_READ);
|
||||
STATIC_ASSERT(GL_MAP_PERSISTENT_BIT == MAP_PERSISTENT);
|
||||
STATIC_ASSERT(GL_MAP_COHERENT_BIT == MAP_COHERENT);
|
||||
assert((access & MAP_INTERNAL_MASK) == 0);
|
||||
|
||||
/* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
|
||||
* internally uses our functions directly.
|
||||
*/
|
||||
obj->Mappings[index].Offset = offset;
|
||||
obj->Mappings[index].Length = length;
|
||||
obj->Mappings[index].AccessFlags = access;
|
||||
|
||||
if (intel_obj->buffer == NULL) {
|
||||
obj->Mappings[index].Pointer = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* If the access is synchronized (like a normal buffer mapping), then get
|
||||
* things flushed out so the later mapping syncs appropriately through GEM.
|
||||
* If the user doesn't care about existing buffer contents and mapping would
|
||||
* cause us to block, then throw out the old buffer.
|
||||
*
|
||||
* If they set INVALIDATE_BUFFER, we can pitch the current contents to
|
||||
* achieve the required synchronization.
|
||||
*/
|
||||
if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
|
||||
if (brw_batch_references(&brw->batch, intel_obj->buffer)) {
|
||||
if (access & GL_MAP_INVALIDATE_BUFFER_BIT) {
|
||||
brw_bo_unreference(intel_obj->buffer);
|
||||
alloc_buffer_object(brw, intel_obj);
|
||||
} else {
|
||||
perf_debug("Stalling on the GPU for mapping a busy buffer "
|
||||
"object\n");
|
||||
brw_batch_flush(brw);
|
||||
}
|
||||
} else if (brw_bo_busy(intel_obj->buffer) &&
|
||||
(access & GL_MAP_INVALIDATE_BUFFER_BIT)) {
|
||||
brw_bo_unreference(intel_obj->buffer);
|
||||
alloc_buffer_object(brw, intel_obj);
|
||||
}
|
||||
}
|
||||
|
||||
if (access & MAP_WRITE)
|
||||
mark_buffer_valid_data(intel_obj, offset, length);
|
||||
|
||||
/* If the user is mapping a range of an active buffer object but
|
||||
* doesn't require the current contents of that range, make a new
|
||||
* BO, and we'll copy what they put in there out at unmap or
|
||||
* FlushRange time.
|
||||
*
|
||||
* That is, unless they're looking for a persistent mapping -- we would
|
||||
* need to do blits in the MemoryBarrier call, and it's easier to just do a
|
||||
* GPU stall and do a mapping.
|
||||
*/
|
||||
if (!(access & (GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_PERSISTENT_BIT)) &&
|
||||
(access & GL_MAP_INVALIDATE_RANGE_BIT) &&
|
||||
brw_bo_busy(intel_obj->buffer)) {
|
||||
/* Ensure that the base alignment of the allocation meets the alignment
|
||||
* guarantees the driver has advertised to the application.
|
||||
*/
|
||||
const unsigned alignment = ctx->Const.MinMapBufferAlignment;
|
||||
|
||||
intel_obj->map_extra[index] = (uintptr_t) offset % alignment;
|
||||
intel_obj->range_map_bo[index] =
|
||||
brw_bo_alloc(brw->bufmgr, "BO blit temp",
|
||||
length + intel_obj->map_extra[index],
|
||||
BRW_MEMZONE_OTHER);
|
||||
void *map = brw_bo_map(brw, intel_obj->range_map_bo[index], access);
|
||||
obj->Mappings[index].Pointer = map + intel_obj->map_extra[index];
|
||||
return obj->Mappings[index].Pointer;
|
||||
}
|
||||
|
||||
void *map = brw_bo_map(brw, intel_obj->buffer, access);
|
||||
if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
|
||||
mark_buffer_inactive(intel_obj);
|
||||
}
|
||||
|
||||
obj->Mappings[index].Pointer = map + offset;
|
||||
return obj->Mappings[index].Pointer;
|
||||
}
|
||||
|
||||
/**
|
||||
* The FlushMappedBufferRange() driver hook.
|
||||
*
|
||||
* Implements glFlushMappedBufferRange(), which signifies that modifications
|
||||
* have been made to a range of a mapped buffer, and it should be flushed.
|
||||
*
|
||||
* This is only used for buffers mapped with GL_MAP_FLUSH_EXPLICIT_BIT.
|
||||
*
|
||||
* Ideally we'd use a BO to avoid taking up cache space for the temporary
|
||||
* data, but FlushMappedBufferRange may be followed by further writes to
|
||||
* the pointer, so we would have to re-map after emitting our blit, which
|
||||
* would defeat the point.
|
||||
*/
|
||||
static void
|
||||
brw_flush_mapped_buffer_range(struct gl_context *ctx,
|
||||
GLintptr offset, GLsizeiptr length,
|
||||
struct gl_buffer_object *obj,
|
||||
gl_map_buffer_index index)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
|
||||
|
||||
assert(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT);
|
||||
|
||||
/* If we gave a direct mapping of the buffer instead of using a temporary,
|
||||
* then there's nothing to do.
|
||||
*/
|
||||
if (intel_obj->range_map_bo[index] == NULL)
|
||||
return;
|
||||
|
||||
if (length == 0)
|
||||
return;
|
||||
|
||||
/* Note that we're not unmapping our buffer while executing the blit. We
|
||||
* need to have a mapping still at the end of this call, since the user
|
||||
* gets to make further modifications and glFlushMappedBufferRange() calls.
|
||||
* This is safe, because:
|
||||
*
|
||||
* - On LLC platforms, we're using a CPU mapping that's coherent with the
|
||||
* GPU (except for the render caches), so the kernel doesn't need to do
|
||||
* any flushing work for us except for what happens at batch exec time
|
||||
* anyway.
|
||||
*
|
||||
* - On non-LLC platforms, we're using a GTT mapping that writes directly
|
||||
* to system memory (except for the chipset cache that gets flushed at
|
||||
* batch exec time).
|
||||
*
|
||||
* In both cases we don't need to stall for the previous blit to complete
|
||||
* so we can re-map (and we definitely don't want to, since that would be
|
||||
* slow): If the user edits a part of their buffer that's previously been
|
||||
* blitted, then our lack of synchoronization is fine, because either
|
||||
* they'll get some too-new data in the first blit and not do another blit
|
||||
* of that area (but in that case the results are undefined), or they'll do
|
||||
* another blit of that area and the complete newer data will land the
|
||||
* second time.
|
||||
*/
|
||||
brw_blorp_copy_buffers(brw,
|
||||
intel_obj->range_map_bo[index],
|
||||
intel_obj->map_extra[index] + offset,
|
||||
intel_obj->buffer,
|
||||
obj->Mappings[index].Offset + offset,
|
||||
length);
|
||||
mark_buffer_gpu_usage(intel_obj,
|
||||
obj->Mappings[index].Offset + offset,
|
||||
length);
|
||||
brw_emit_mi_flush(brw);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The UnmapBuffer() driver hook.
|
||||
*
|
||||
* Implements glUnmapBuffer().
|
||||
*/
|
||||
static GLboolean
|
||||
brw_unmap_buffer(struct gl_context *ctx,
|
||||
struct gl_buffer_object *obj,
|
||||
gl_map_buffer_index index)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
|
||||
|
||||
assert(intel_obj);
|
||||
assert(obj->Mappings[index].Pointer);
|
||||
if (intel_obj->range_map_bo[index] != NULL) {
|
||||
brw_bo_unmap(intel_obj->range_map_bo[index]);
|
||||
|
||||
if (!(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT)) {
|
||||
brw_blorp_copy_buffers(brw,
|
||||
intel_obj->range_map_bo[index],
|
||||
intel_obj->map_extra[index],
|
||||
intel_obj->buffer, obj->Mappings[index].Offset,
|
||||
obj->Mappings[index].Length);
|
||||
mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset,
|
||||
obj->Mappings[index].Length);
|
||||
brw_emit_mi_flush(brw);
|
||||
}
|
||||
|
||||
/* Since we've emitted some blits to buffers that will (likely) be used
|
||||
* in rendering operations in other cache domains in this batch, emit a
|
||||
* flush. Once again, we wish for a domain tracker in libdrm to cover
|
||||
* usage inside of a batchbuffer.
|
||||
*/
|
||||
|
||||
brw_bo_unreference(intel_obj->range_map_bo[index]);
|
||||
intel_obj->range_map_bo[index] = NULL;
|
||||
} else if (intel_obj->buffer != NULL) {
|
||||
brw_bo_unmap(intel_obj->buffer);
|
||||
}
|
||||
obj->Mappings[index].Pointer = NULL;
|
||||
obj->Mappings[index].Offset = 0;
|
||||
obj->Mappings[index].Length = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a pointer to the object's BO, and marks the given range as being used
|
||||
* on the GPU.
|
||||
*
|
||||
* Anywhere that uses buffer objects in the pipeline should be using this to
|
||||
* mark the range of the buffer that is being accessed by the pipeline.
|
||||
*/
|
||||
struct brw_bo *
|
||||
brw_bufferobj_buffer(struct brw_context *brw,
|
||||
struct brw_buffer_object *intel_obj,
|
||||
uint32_t offset, uint32_t size, bool write)
|
||||
{
|
||||
/* This is needed so that things like transform feedback and texture buffer
|
||||
* objects that need a BO but don't want to check that they exist for
|
||||
* draw-time validation can just always get a BO from a GL buffer object.
|
||||
*/
|
||||
if (intel_obj->buffer == NULL)
|
||||
alloc_buffer_object(brw, intel_obj);
|
||||
|
||||
mark_buffer_gpu_usage(intel_obj, offset, size);
|
||||
|
||||
/* If writing, (conservatively) mark this section as having valid data. */
|
||||
if (write)
|
||||
mark_buffer_valid_data(intel_obj, offset, size);
|
||||
|
||||
return intel_obj->buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* The CopyBufferSubData() driver hook.
|
||||
*
|
||||
* Implements glCopyBufferSubData(), which copies a portion of one buffer
|
||||
* object's data to another. Independent source and destination offsets
|
||||
* are allowed.
|
||||
*/
|
||||
static void
|
||||
brw_copy_buffer_subdata(struct gl_context *ctx,
|
||||
struct gl_buffer_object *src,
|
||||
struct gl_buffer_object *dst,
|
||||
GLintptr read_offset, GLintptr write_offset,
|
||||
GLsizeiptr size)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_buffer_object *intel_src = brw_buffer_object(src);
|
||||
struct brw_buffer_object *intel_dst = brw_buffer_object(dst);
|
||||
struct brw_bo *src_bo, *dst_bo;
|
||||
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
dst_bo = brw_bufferobj_buffer(brw, intel_dst, write_offset, size, true);
|
||||
src_bo = brw_bufferobj_buffer(brw, intel_src, read_offset, size, false);
|
||||
|
||||
brw_blorp_copy_buffers(brw,
|
||||
src_bo, read_offset,
|
||||
dst_bo, write_offset, size);
|
||||
|
||||
/* Since we've emitted some blits to buffers that will (likely) be used
|
||||
* in rendering operations in other cache domains in this batch, emit a
|
||||
* flush. Once again, we wish for a domain tracker in libdrm to cover
|
||||
* usage inside of a batchbuffer.
|
||||
*/
|
||||
brw_emit_mi_flush(brw);
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_buffer_object_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->NewBufferObject = brw_new_buffer_object;
|
||||
functions->DeleteBuffer = brw_delete_buffer;
|
||||
functions->BufferData = brw_buffer_data;
|
||||
functions->BufferDataMem = brw_buffer_data_mem;
|
||||
functions->BufferSubData = brw_buffer_subdata;
|
||||
functions->GetBufferSubData = brw_get_buffer_subdata;
|
||||
functions->MapBufferRange = brw_map_buffer_range;
|
||||
functions->FlushMappedBufferRange = brw_flush_mapped_buffer_range;
|
||||
functions->UnmapBuffer = brw_unmap_buffer;
|
||||
functions->CopyBufferSubData = brw_copy_buffer_subdata;
|
||||
}
|
||||
|
|
@ -1,141 +0,0 @@
|
|||
/*
|
||||
* Copyright 2005 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_BUFFEROBJ_H
|
||||
#define BRW_BUFFEROBJ_H
|
||||
|
||||
#include "main/mtypes.h"
|
||||
|
||||
struct brw_context;
|
||||
struct gl_buffer_object;
|
||||
|
||||
|
||||
/**
|
||||
* Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object.
|
||||
*/
|
||||
struct brw_buffer_object
|
||||
{
|
||||
struct gl_buffer_object Base;
|
||||
struct brw_bo *buffer; /* the low-level buffer manager's buffer handle */
|
||||
|
||||
struct brw_bo *range_map_bo[MAP_COUNT];
|
||||
|
||||
/**
|
||||
* Alignment offset from the range_map_bo temporary mapping to the returned
|
||||
* obj->Pointer (caused by GL_ARB_map_buffer_alignment).
|
||||
*/
|
||||
unsigned map_extra[MAP_COUNT];
|
||||
|
||||
/** @{
|
||||
* Tracking for what range of the BO may currently be in use by the GPU.
|
||||
*
|
||||
* Users often want to either glBufferSubData() or glMapBufferRange() a
|
||||
* buffer object where some subset of it is busy on the GPU, without either
|
||||
* stalling or doing an extra blit (since our blits are extra expensive,
|
||||
* given that we have to reupload most of the 3D state when switching
|
||||
* rings). We wish they'd just use glMapBufferRange() with the
|
||||
* UNSYNC|INVALIDATE_RANGE flag or the INVALIDATE_BUFFER flag, but lots
|
||||
* don't.
|
||||
*
|
||||
* To work around apps, we track what range of the BO we might have used on
|
||||
* the GPU as vertex data, tranform feedback output, buffer textures, etc.,
|
||||
* and just do glBufferSubData() with an unsynchronized map when they're
|
||||
* outside of that range.
|
||||
*
|
||||
* If gpu_active_start > gpu_active_end, then the GPU is not currently
|
||||
* accessing the BO (and we can map it without synchronization).
|
||||
*/
|
||||
uint32_t gpu_active_start;
|
||||
uint32_t gpu_active_end;
|
||||
|
||||
/** @{
|
||||
* Tracking for what range of the BO may contain valid data.
|
||||
*
|
||||
* Users may create a large buffer object and only fill part of it
|
||||
* with valid data. This is a conservative estimate of what part
|
||||
* of the buffer contains valid data that we have to preserve.
|
||||
*/
|
||||
uint32_t valid_data_start;
|
||||
uint32_t valid_data_end;
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* If we've avoided stalls/blits using the active tracking, flag the buffer
|
||||
* for (occasional) stalling in the future to avoid getting stuck in a
|
||||
* cycle of blitting on buffer wraparound.
|
||||
*/
|
||||
bool prefer_stall_to_blit;
|
||||
/** @} */
|
||||
};
|
||||
|
||||
|
||||
/* Get the bm buffer associated with a GL bufferobject:
|
||||
*/
|
||||
struct brw_bo *brw_bufferobj_buffer(struct brw_context *brw,
|
||||
struct brw_buffer_object *obj,
|
||||
uint32_t offset,
|
||||
uint32_t size,
|
||||
bool write);
|
||||
|
||||
void brw_upload_data(struct brw_uploader *upload,
|
||||
const void *data,
|
||||
uint32_t size,
|
||||
uint32_t alignment,
|
||||
struct brw_bo **out_bo,
|
||||
uint32_t *out_offset);
|
||||
|
||||
void *brw_upload_space(struct brw_uploader *upload,
|
||||
uint32_t size,
|
||||
uint32_t alignment,
|
||||
struct brw_bo **out_bo,
|
||||
uint32_t *out_offset);
|
||||
|
||||
void brw_upload_finish(struct brw_uploader *upload);
|
||||
void brw_upload_init(struct brw_uploader *upload,
|
||||
struct brw_bufmgr *bufmgr,
|
||||
unsigned default_size);
|
||||
|
||||
/* Hook the bufferobject implementation into mesa:
|
||||
*/
|
||||
void brw_init_buffer_object_functions(struct dd_function_table *functions);
|
||||
|
||||
static inline struct brw_buffer_object *
|
||||
brw_buffer_object(struct gl_buffer_object *obj)
|
||||
{
|
||||
return (struct brw_buffer_object *) obj;
|
||||
}
|
||||
|
||||
struct brw_memory_object {
|
||||
struct gl_memory_object Base;
|
||||
struct brw_bo *bo;
|
||||
};
|
||||
|
||||
static inline struct brw_memory_object *
|
||||
brw_memory_object(struct gl_memory_object *obj)
|
||||
{
|
||||
return (struct brw_memory_object *)obj;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_buffers.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
|
||||
#include "main/fbobject.h"
|
||||
#include "main/framebuffer.h"
|
||||
#include "main/renderbuffer.h"
|
||||
|
||||
static void
|
||||
brw_drawbuffer(struct gl_context *ctx)
|
||||
{
|
||||
if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) {
|
||||
struct brw_context *const brw = brw_context(ctx);
|
||||
|
||||
/* If we might be front-buffer rendering on this buffer for the first
|
||||
* time, invalidate our DRI drawable so we'll ask for new buffers
|
||||
* (including the fake front) before we start rendering again.
|
||||
*/
|
||||
if (brw->driContext->driDrawablePriv)
|
||||
dri2InvalidateDrawable(brw->driContext->driDrawablePriv);
|
||||
brw_prepare_render(brw);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
brw_readbuffer(struct gl_context * ctx, GLenum mode)
|
||||
{
|
||||
if (_mesa_is_front_buffer_reading(ctx->ReadBuffer)) {
|
||||
struct brw_context *const brw = brw_context(ctx);
|
||||
|
||||
/* If we might be front-buffer reading on this buffer for the first
|
||||
* time, invalidate our DRI drawable so we'll ask for new buffers
|
||||
* (including the fake front) before we start reading again.
|
||||
*/
|
||||
if (brw->driContext->driDrawablePriv)
|
||||
dri2InvalidateDrawable(brw->driContext->driReadablePriv);
|
||||
brw_prepare_render(brw);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
brw_init_buffer_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->DrawBuffer = brw_drawbuffer;
|
||||
functions->ReadBuffer = brw_readbuffer;
|
||||
}
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
/*
|
||||
* Copyright 2006 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_BUFFERS_H
|
||||
#define BRW_BUFFERS_H
|
||||
|
||||
#include "dri_util.h"
|
||||
#include "drm-uapi/drm.h"
|
||||
#include "brw_context.h"
|
||||
|
||||
extern void brw_init_buffer_functions(struct dd_function_table *functions);
|
||||
|
||||
#endif /* BRW_BUFFERS_H */
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,404 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2008-2012 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Eric Anholt <eric@anholt.net>
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file brw_bufmgr.h
|
||||
*
|
||||
* Public definitions of Intel-specific bufmgr functions.
|
||||
*/
|
||||
|
||||
#ifndef BRW_BUFMGR_H
|
||||
#define BRW_BUFMGR_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "c11/threads.h"
|
||||
#include "util/u_atomic.h"
|
||||
#include "util/list.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct intel_device_info;
|
||||
struct brw_context;
|
||||
|
||||
/**
|
||||
* Memory zones. When allocating a buffer, you can request that it is
|
||||
* placed into a specific region of the virtual address space (PPGTT).
|
||||
*
|
||||
* Most buffers can go anywhere (BRW_MEMZONE_OTHER). Some buffers are
|
||||
* accessed via an offset from a base address. STATE_BASE_ADDRESS has
|
||||
* a maximum 4GB size for each region, so we need to restrict those
|
||||
* buffers to be within 4GB of the base. Each memory zone corresponds
|
||||
* to a particular base address.
|
||||
*
|
||||
* Currently, i965 partitions the address space into two regions:
|
||||
*
|
||||
* - Low 4GB
|
||||
* - Full 48-bit address space
|
||||
*
|
||||
* Eventually, we hope to carve out 4GB of VMA for each base address.
|
||||
*/
|
||||
enum brw_memory_zone {
|
||||
BRW_MEMZONE_LOW_4G,
|
||||
BRW_MEMZONE_OTHER,
|
||||
|
||||
/* Shaders - Instruction State Base Address */
|
||||
BRW_MEMZONE_SHADER = BRW_MEMZONE_LOW_4G,
|
||||
|
||||
/* Scratch - General State Base Address */
|
||||
BRW_MEMZONE_SCRATCH = BRW_MEMZONE_LOW_4G,
|
||||
|
||||
/* Surface State Base Address */
|
||||
BRW_MEMZONE_SURFACE = BRW_MEMZONE_LOW_4G,
|
||||
|
||||
/* Dynamic State Base Address */
|
||||
BRW_MEMZONE_DYNAMIC = BRW_MEMZONE_LOW_4G,
|
||||
};
|
||||
|
||||
#define BRW_MEMZONE_COUNT (BRW_MEMZONE_OTHER + 1)
|
||||
|
||||
struct brw_bo {
|
||||
/**
|
||||
* Size in bytes of the buffer object.
|
||||
*
|
||||
* The size may be larger than the size originally requested for the
|
||||
* allocation, such as being aligned to page size.
|
||||
*/
|
||||
uint64_t size;
|
||||
|
||||
/** Buffer manager context associated with this buffer object */
|
||||
struct brw_bufmgr *bufmgr;
|
||||
|
||||
/** The GEM handle for this buffer object. */
|
||||
uint32_t gem_handle;
|
||||
|
||||
/**
|
||||
* Offset of the buffer inside the Graphics Translation Table.
|
||||
*
|
||||
* This is effectively our GPU address for the buffer and we use it
|
||||
* as our base for all state pointers into the buffer. However, since the
|
||||
* kernel may be forced to move it around during the course of the
|
||||
* buffer's lifetime, we can only know where the buffer was on the last
|
||||
* execbuf. We presume, and are usually right, that the buffer will not
|
||||
* move and so we use that last offset for the next batch and by doing
|
||||
* so we can avoid having the kernel perform a relocation fixup pass as
|
||||
* our pointers inside the batch will be using the correct base offset.
|
||||
*
|
||||
* Since we do use it as a base address for the next batch of pointers,
|
||||
* the kernel treats our offset as a request, and if possible will
|
||||
* arrange the buffer to placed at that address (trying to balance
|
||||
* the cost of buffer migration versus the cost of performing
|
||||
* relocations). Furthermore, we can force the kernel to place the buffer,
|
||||
* or report a failure if we specified a conflicting offset, at our chosen
|
||||
* offset by specifying EXEC_OBJECT_PINNED.
|
||||
*
|
||||
* Note the GTT may be either per context, or shared globally across the
|
||||
* system. On a shared system, our buffers have to contend for address
|
||||
* space with both aperture mappings and framebuffers and so are more
|
||||
* likely to be moved. On a full ppGTT system, each batch exists in its
|
||||
* own GTT, and so each buffer may have their own offset within each
|
||||
* context.
|
||||
*/
|
||||
uint64_t gtt_offset;
|
||||
|
||||
/**
|
||||
* The validation list index for this buffer, or -1 when not in a batch.
|
||||
* Note that a single buffer may be in multiple batches (contexts), and
|
||||
* this is a global field, which refers to the last batch using the BO.
|
||||
* It should not be considered authoritative, but can be used to avoid a
|
||||
* linear walk of the validation list in the common case by guessing that
|
||||
* exec_bos[bo->index] == bo and confirming whether that's the case.
|
||||
*/
|
||||
unsigned index;
|
||||
|
||||
/**
|
||||
* Boolean of whether the GPU is definitely not accessing the buffer.
|
||||
*
|
||||
* This is only valid when reusable, since non-reusable
|
||||
* buffers are those that have been shared with other
|
||||
* processes, so we don't know their state.
|
||||
*/
|
||||
bool idle;
|
||||
|
||||
int refcount;
|
||||
const char *name;
|
||||
|
||||
uint64_t kflags;
|
||||
|
||||
/**
|
||||
* Kenel-assigned global name for this object
|
||||
*
|
||||
* List contains both flink named and prime fd'd objects
|
||||
*/
|
||||
unsigned int global_name;
|
||||
|
||||
/**
|
||||
* Current tiling mode
|
||||
*/
|
||||
uint32_t tiling_mode;
|
||||
uint32_t swizzle_mode;
|
||||
uint32_t stride;
|
||||
|
||||
time_t free_time;
|
||||
|
||||
/** Mapped address for the buffer, saved across map/unmap cycles */
|
||||
void *map_cpu;
|
||||
/** GTT virtual address for the buffer, saved across map/unmap cycles */
|
||||
void *map_gtt;
|
||||
/** WC CPU address for the buffer, saved across map/unmap cycles */
|
||||
void *map_wc;
|
||||
|
||||
/** BO cache list */
|
||||
struct list_head head;
|
||||
|
||||
/**
|
||||
* List of GEM handle exports of this buffer (bo_export).
|
||||
*
|
||||
* Hold bufmgr->lock when using this list.
|
||||
*/
|
||||
struct list_head exports;
|
||||
|
||||
/**
|
||||
* Boolean of whether this buffer can be re-used
|
||||
*/
|
||||
bool reusable;
|
||||
|
||||
/**
|
||||
* Boolean of whether this buffer has been shared with an external client.
|
||||
*/
|
||||
bool external;
|
||||
|
||||
/**
|
||||
* Boolean of whether this buffer is cache coherent
|
||||
*/
|
||||
bool cache_coherent;
|
||||
};
|
||||
|
||||
#define BO_ALLOC_BUSY (1<<0)
|
||||
#define BO_ALLOC_ZEROED (1<<1)
|
||||
|
||||
/**
|
||||
* Allocate a buffer object.
|
||||
*
|
||||
* Buffer objects are not necessarily initially mapped into CPU virtual
|
||||
* address space or graphics device aperture. They must be mapped
|
||||
* using brw_bo_map() to be used by the CPU.
|
||||
*/
|
||||
struct brw_bo *brw_bo_alloc(struct brw_bufmgr *bufmgr, const char *name,
|
||||
uint64_t size, enum brw_memory_zone memzone);
|
||||
|
||||
/**
|
||||
* Allocate a tiled buffer object.
|
||||
*
|
||||
* Alignment for tiled objects is set automatically; the 'flags'
|
||||
* argument provides a hint about how the object will be used initially.
|
||||
*
|
||||
* Valid tiling formats are:
|
||||
* I915_TILING_NONE
|
||||
* I915_TILING_X
|
||||
* I915_TILING_Y
|
||||
*/
|
||||
struct brw_bo *brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr,
|
||||
const char *name,
|
||||
uint64_t size,
|
||||
enum brw_memory_zone memzone,
|
||||
uint32_t tiling_mode,
|
||||
uint32_t pitch,
|
||||
unsigned flags);
|
||||
|
||||
/**
|
||||
* Allocate a tiled buffer object.
|
||||
*
|
||||
* Alignment for tiled objects is set automatically; the 'flags'
|
||||
* argument provides a hint about how the object will be used initially.
|
||||
*
|
||||
* Valid tiling formats are:
|
||||
* I915_TILING_NONE
|
||||
* I915_TILING_X
|
||||
* I915_TILING_Y
|
||||
*
|
||||
* Note the tiling format may be rejected; callers should check the
|
||||
* 'tiling_mode' field on return, as well as the pitch value, which
|
||||
* may have been rounded up to accommodate for tiling restrictions.
|
||||
*/
|
||||
struct brw_bo *brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr,
|
||||
const char *name,
|
||||
int x, int y, int cpp,
|
||||
enum brw_memory_zone memzone,
|
||||
uint32_t tiling_mode,
|
||||
uint32_t *pitch,
|
||||
unsigned flags);
|
||||
|
||||
/** Takes a reference on a buffer object */
|
||||
static inline void
|
||||
brw_bo_reference(struct brw_bo *bo)
|
||||
{
|
||||
p_atomic_inc(&bo->refcount);
|
||||
}
|
||||
|
||||
/**
|
||||
* Releases a reference on a buffer object, freeing the data if
|
||||
* no references remain.
|
||||
*/
|
||||
void brw_bo_unreference(struct brw_bo *bo);
|
||||
|
||||
/* Must match MapBufferRange interface (for convenience) */
|
||||
#define MAP_READ GL_MAP_READ_BIT
|
||||
#define MAP_WRITE GL_MAP_WRITE_BIT
|
||||
#define MAP_ASYNC GL_MAP_UNSYNCHRONIZED_BIT
|
||||
#define MAP_PERSISTENT GL_MAP_PERSISTENT_BIT
|
||||
#define MAP_COHERENT GL_MAP_COHERENT_BIT
|
||||
/* internal */
|
||||
#define MAP_INTERNAL_MASK (0xffu << 24)
|
||||
#define MAP_RAW (0x01 << 24)
|
||||
|
||||
/**
|
||||
* Maps the buffer into userspace.
|
||||
*
|
||||
* This function will block waiting for any existing execution on the
|
||||
* buffer to complete, first. The resulting mapping is returned.
|
||||
*/
|
||||
MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags);
|
||||
|
||||
/**
|
||||
* Reduces the refcount on the userspace mapping of the buffer
|
||||
* object.
|
||||
*/
|
||||
static inline int brw_bo_unmap(UNUSED struct brw_bo *bo) { return 0; }
|
||||
|
||||
/** Write data into an object. */
|
||||
int brw_bo_subdata(struct brw_bo *bo, uint64_t offset,
|
||||
uint64_t size, const void *data);
|
||||
/**
|
||||
* Waits for rendering to an object by the GPU to have completed.
|
||||
*
|
||||
* This is not required for any access to the BO by bo_map,
|
||||
* bo_subdata, etc. It is merely a way for the driver to implement
|
||||
* glFinish.
|
||||
*/
|
||||
void brw_bo_wait_rendering(struct brw_bo *bo);
|
||||
|
||||
/**
|
||||
* Unref a buffer manager instance.
|
||||
*/
|
||||
void brw_bufmgr_unref(struct brw_bufmgr *bufmgr);
|
||||
|
||||
/**
|
||||
* Get the current tiling (and resulting swizzling) mode for the bo.
|
||||
*
|
||||
* \param buf Buffer to get tiling mode for
|
||||
* \param tiling_mode returned tiling mode
|
||||
* \param swizzle_mode returned swizzling mode
|
||||
*/
|
||||
int brw_bo_get_tiling(struct brw_bo *bo, uint32_t *tiling_mode,
|
||||
uint32_t *swizzle_mode);
|
||||
|
||||
/**
|
||||
* Create a visible name for a buffer which can be used by other apps
|
||||
*
|
||||
* \param buf Buffer to create a name for
|
||||
* \param name Returned name
|
||||
*/
|
||||
int brw_bo_flink(struct brw_bo *bo, uint32_t *name);
|
||||
|
||||
/**
|
||||
* Returns 1 if mapping the buffer for write could cause the process
|
||||
* to block, due to the object being active in the GPU.
|
||||
*/
|
||||
int brw_bo_busy(struct brw_bo *bo);
|
||||
|
||||
/**
|
||||
* Specify the volatility of the buffer.
|
||||
* \param bo Buffer to create a name for
|
||||
* \param madv The purgeable status
|
||||
*
|
||||
* Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
|
||||
* reclaimed under memory pressure. If you subsequently require the buffer,
|
||||
* then you must pass I915_MADV_WILLNEED to mark the buffer as required.
|
||||
*
|
||||
* Returns 1 if the buffer was retained, or 0 if it was discarded whilst
|
||||
* marked as I915_MADV_DONTNEED.
|
||||
*/
|
||||
int brw_bo_madvise(struct brw_bo *bo, int madv);
|
||||
|
||||
struct brw_bufmgr *brw_bufmgr_get_for_fd(struct intel_device_info *devinfo,
|
||||
int fd, bool bo_reuse);
|
||||
|
||||
struct brw_bo *brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
|
||||
const char *name,
|
||||
unsigned int handle);
|
||||
|
||||
int brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns);
|
||||
|
||||
uint32_t brw_create_hw_context(struct brw_bufmgr *bufmgr);
|
||||
|
||||
int brw_hw_context_set_priority(struct brw_bufmgr *bufmgr,
|
||||
uint32_t ctx_id,
|
||||
int priority);
|
||||
|
||||
void brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id);
|
||||
|
||||
int brw_bufmgr_get_fd(struct brw_bufmgr *bufmgr);
|
||||
|
||||
int brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd);
|
||||
struct brw_bo *brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr,
|
||||
int prime_fd);
|
||||
struct brw_bo *brw_bo_gem_create_from_prime_tiled(struct brw_bufmgr *bufmgr,
|
||||
int prime_fd,
|
||||
uint32_t tiling_mode,
|
||||
uint32_t stride);
|
||||
|
||||
uint32_t brw_bo_export_gem_handle(struct brw_bo *bo);
|
||||
|
||||
/**
|
||||
* Exports a bo as a GEM handle into a given DRM file descriptor
|
||||
* \param bo Buffer to export
|
||||
* \param drm_fd File descriptor where the new handle is created
|
||||
* \param out_handle Pointer to store the new handle
|
||||
*
|
||||
* Returns 0 if the buffer was successfully exported, a non zero error code
|
||||
* otherwise.
|
||||
*/
|
||||
int brw_bo_export_gem_handle_for_device(struct brw_bo *bo, int drm_fd,
|
||||
uint32_t *out_handle);
|
||||
|
||||
int brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset,
|
||||
uint64_t *result);
|
||||
|
||||
bool brw_using_softpin(struct brw_bufmgr *bufmgr);
|
||||
|
||||
/** @{ */
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif /* BRW_BUFMGR_H */
|
||||
|
|
@ -1,302 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* Copyright 2009, 2012 Intel Corporation.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "main/condrender.h"
|
||||
#include "swrast/swrast.h"
|
||||
#include "drivers/common/meta.h"
|
||||
|
||||
#include "brw_batch.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_blorp.h"
|
||||
#include "brw_defines.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_BLIT
|
||||
|
||||
static const char *buffer_names[] = {
|
||||
[BUFFER_FRONT_LEFT] = "front",
|
||||
[BUFFER_BACK_LEFT] = "back",
|
||||
[BUFFER_FRONT_RIGHT] = "front right",
|
||||
[BUFFER_BACK_RIGHT] = "back right",
|
||||
[BUFFER_DEPTH] = "depth",
|
||||
[BUFFER_STENCIL] = "stencil",
|
||||
[BUFFER_ACCUM] = "accum",
|
||||
[BUFFER_COLOR0] = "color0",
|
||||
[BUFFER_COLOR1] = "color1",
|
||||
[BUFFER_COLOR2] = "color2",
|
||||
[BUFFER_COLOR3] = "color3",
|
||||
[BUFFER_COLOR4] = "color4",
|
||||
[BUFFER_COLOR5] = "color5",
|
||||
[BUFFER_COLOR6] = "color6",
|
||||
[BUFFER_COLOR7] = "color7",
|
||||
};
|
||||
|
||||
static void
|
||||
debug_mask(const char *name, GLbitfield mask)
|
||||
{
|
||||
GLuint i;
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_BLIT)) {
|
||||
DBG("%s clear:", name);
|
||||
for (i = 0; i < BUFFER_COUNT; i++) {
|
||||
if (mask & (1 << i))
|
||||
DBG(" %s", buffer_names[i]);
|
||||
}
|
||||
DBG("\n");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the scissor is a noop (cuts out nothing).
|
||||
*/
|
||||
static bool
|
||||
noop_scissor(struct gl_framebuffer *fb)
|
||||
{
|
||||
return fb->_Xmin <= 0 &&
|
||||
fb->_Ymin <= 0 &&
|
||||
fb->_Xmax >= fb->Width &&
|
||||
fb->_Ymax >= fb->Height;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements fast depth clears on gfx6+.
|
||||
*
|
||||
* Fast clears basically work by setting a flag in each of the subspans
|
||||
* represented in the HiZ buffer that says "When you need the depth values for
|
||||
* this subspan, it's the hardware's current clear value." Then later rendering
|
||||
* can just use the static clear value instead of referencing memory.
|
||||
*
|
||||
* The tricky part of the implementation is that you have to have the clear
|
||||
* value that was used on the depth buffer in place for all further rendering,
|
||||
* at least until a resolve to the real depth buffer happens.
|
||||
*/
|
||||
static bool
|
||||
brw_fast_clear_depth(struct gl_context *ctx)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct gl_framebuffer *fb = ctx->DrawBuffer;
|
||||
struct brw_renderbuffer *depth_irb =
|
||||
brw_get_renderbuffer(fb, BUFFER_DEPTH);
|
||||
struct brw_mipmap_tree *mt = depth_irb->mt;
|
||||
struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR))
|
||||
return false;
|
||||
|
||||
if (devinfo->ver < 6)
|
||||
return false;
|
||||
|
||||
if (!brw_renderbuffer_has_hiz(depth_irb))
|
||||
return false;
|
||||
|
||||
/* We only handle full buffer clears -- otherwise you'd have to track whether
|
||||
* a previous clear had happened at a different clear value and resolve it
|
||||
* first.
|
||||
*/
|
||||
if ((ctx->Scissor.EnableFlags & 1) && !noop_scissor(fb)) {
|
||||
perf_debug("Failed to fast clear %dx%d depth because of scissors. "
|
||||
"Possible 5%% performance win if avoided.\n",
|
||||
mt->surf.logical_level0_px.width,
|
||||
mt->surf.logical_level0_px.height);
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (mt->format) {
|
||||
case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
|
||||
case MESA_FORMAT_Z24_UNORM_S8_UINT:
|
||||
/* From the Sandy Bridge PRM, volume 2 part 1, page 314:
|
||||
*
|
||||
* "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
|
||||
* enabled (the legacy method of clearing must be performed):
|
||||
*
|
||||
* - If the depth buffer format is D32_FLOAT_S8X24_UINT or
|
||||
* D24_UNORM_S8_UINT.
|
||||
*/
|
||||
return false;
|
||||
|
||||
case MESA_FORMAT_Z_UNORM16:
|
||||
/* From the Sandy Bridge PRM, volume 2 part 1, page 314:
|
||||
*
|
||||
* "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
|
||||
* enabled (the legacy method of clearing must be performed):
|
||||
*
|
||||
* - DevSNB{W/A}]: When depth buffer format is D16_UNORM and the
|
||||
* width of the map (LOD0) is not multiple of 16, fast clear
|
||||
* optimization must be disabled.
|
||||
*/
|
||||
if (devinfo->ver == 6 &&
|
||||
(minify(mt->surf.phys_level0_sa.width,
|
||||
depth_irb->mt_level - mt->first_level) % 16) != 0)
|
||||
return false;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Quantize the clear value to what can be stored in the actual depth
|
||||
* buffer. This makes the following check more accurate because it now
|
||||
* checks if the actual depth bits will match. It also prevents us from
|
||||
* getting a too-accurate depth value during depth testing or when sampling
|
||||
* with HiZ enabled.
|
||||
*/
|
||||
float clear_value =
|
||||
mt->format == MESA_FORMAT_Z_FLOAT32 ? ctx->Depth.Clear :
|
||||
_mesa_lroundeven(ctx->Depth.Clear * fb->_DepthMax) / (float)(fb->_DepthMax);
|
||||
|
||||
const uint32_t num_layers = depth_att->Layered ? depth_irb->layer_count : 1;
|
||||
|
||||
/* If we're clearing to a new clear value, then we need to resolve any clear
|
||||
* flags out of the HiZ buffer into the real depth buffer.
|
||||
*/
|
||||
if (mt->fast_clear_color.f32[0] != clear_value) {
|
||||
for (uint32_t level = mt->first_level; level <= mt->last_level; level++) {
|
||||
if (!brw_miptree_level_has_hiz(mt, level))
|
||||
continue;
|
||||
|
||||
const unsigned level_layers = brw_get_num_logical_layers(mt, level);
|
||||
|
||||
for (uint32_t layer = 0; layer < level_layers; layer++) {
|
||||
if (level == depth_irb->mt_level &&
|
||||
layer >= depth_irb->mt_layer &&
|
||||
layer < depth_irb->mt_layer + num_layers) {
|
||||
/* We're going to clear this layer anyway. Leave it alone. */
|
||||
continue;
|
||||
}
|
||||
|
||||
enum isl_aux_state aux_state =
|
||||
brw_miptree_get_aux_state(mt, level, layer);
|
||||
|
||||
if (aux_state != ISL_AUX_STATE_CLEAR &&
|
||||
aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) {
|
||||
/* This slice doesn't have any fast-cleared bits. */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If we got here, then the level may have fast-clear bits that
|
||||
* use the old clear value. We need to do a depth resolve to get
|
||||
* rid of their use of the clear value before we can change it.
|
||||
* Fortunately, few applications ever change their depth clear
|
||||
* value so this shouldn't happen often.
|
||||
*/
|
||||
brw_hiz_exec(brw, mt, level, layer, 1, ISL_AUX_OP_FULL_RESOLVE);
|
||||
brw_miptree_set_aux_state(brw, mt, level, layer, 1,
|
||||
ISL_AUX_STATE_RESOLVED);
|
||||
}
|
||||
}
|
||||
|
||||
const union isl_color_value clear_color = { .f32 = {clear_value, } };
|
||||
brw_miptree_set_clear_color(brw, mt, clear_color);
|
||||
}
|
||||
|
||||
for (unsigned a = 0; a < num_layers; a++) {
|
||||
enum isl_aux_state aux_state =
|
||||
brw_miptree_get_aux_state(mt, depth_irb->mt_level,
|
||||
depth_irb->mt_layer + a);
|
||||
|
||||
if (aux_state != ISL_AUX_STATE_CLEAR) {
|
||||
brw_hiz_exec(brw, mt, depth_irb->mt_level,
|
||||
depth_irb->mt_layer + a, 1,
|
||||
ISL_AUX_OP_FAST_CLEAR);
|
||||
}
|
||||
}
|
||||
|
||||
brw_miptree_set_aux_state(brw, mt, depth_irb->mt_level,
|
||||
depth_irb->mt_layer, num_layers,
|
||||
ISL_AUX_STATE_CLEAR);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called by ctx->Driver.Clear.
|
||||
*/
|
||||
static void
|
||||
brw_clear(struct gl_context *ctx, GLbitfield mask)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct gl_framebuffer *fb = ctx->DrawBuffer;
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
bool partial_clear = ctx->Scissor.EnableFlags && !noop_scissor(fb);
|
||||
|
||||
if (!_mesa_check_conditional_render(ctx))
|
||||
return;
|
||||
|
||||
if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
|
||||
brw->front_buffer_dirty = true;
|
||||
}
|
||||
|
||||
brw_prepare_render(brw);
|
||||
brw_workaround_depthstencil_alignment(brw, partial_clear ? 0 : mask);
|
||||
|
||||
if (mask & BUFFER_BIT_DEPTH) {
|
||||
if (brw_fast_clear_depth(ctx)) {
|
||||
DBG("fast clear: depth\n");
|
||||
mask &= ~BUFFER_BIT_DEPTH;
|
||||
}
|
||||
}
|
||||
|
||||
if (mask & BUFFER_BITS_COLOR) {
|
||||
brw_blorp_clear_color(brw, fb, mask, partial_clear,
|
||||
ctx->Color.sRGBEnabled);
|
||||
debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
|
||||
mask &= ~BUFFER_BITS_COLOR;
|
||||
}
|
||||
|
||||
if (devinfo->ver >= 6 && (mask & BUFFER_BITS_DEPTH_STENCIL)) {
|
||||
brw_blorp_clear_depth_stencil(brw, fb, mask, partial_clear);
|
||||
debug_mask("blorp depth/stencil", mask & BUFFER_BITS_DEPTH_STENCIL);
|
||||
mask &= ~BUFFER_BITS_DEPTH_STENCIL;
|
||||
}
|
||||
|
||||
GLbitfield tri_mask = mask & (BUFFER_BIT_STENCIL |
|
||||
BUFFER_BIT_DEPTH);
|
||||
|
||||
if (tri_mask) {
|
||||
debug_mask("tri", tri_mask);
|
||||
mask &= ~tri_mask;
|
||||
_mesa_meta_glsl_Clear(&brw->ctx, tri_mask);
|
||||
}
|
||||
|
||||
/* Any strange buffers get passed off to swrast. The only thing that
|
||||
* should be left at this point is the accumulation buffer.
|
||||
*/
|
||||
assert((mask & ~BUFFER_BIT_ACCUM) == 0);
|
||||
if (mask) {
|
||||
debug_mask("swrast", mask);
|
||||
_swrast_Clear(ctx, mask);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
brw_init_clear_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->Clear = brw_clear;
|
||||
}
|
||||
|
|
@ -1,210 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
#include "main/macros.h"
|
||||
#include "main/enums.h"
|
||||
|
||||
#include "brw_batch.h"
|
||||
|
||||
#include "brw_defines.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_util.h"
|
||||
#include "brw_state.h"
|
||||
#include "compiler/brw_eu.h"
|
||||
|
||||
#include "util/ralloc.h"
|
||||
|
||||
static void
|
||||
compile_clip_prog(struct brw_context *brw, struct brw_clip_prog_key *key)
|
||||
{
|
||||
const unsigned *program;
|
||||
void *mem_ctx;
|
||||
unsigned program_size;
|
||||
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
|
||||
struct brw_clip_prog_data prog_data;
|
||||
program = brw_compile_clip(brw->screen->compiler, mem_ctx, key, &prog_data,
|
||||
&brw->vue_map_geom_out, &program_size);
|
||||
|
||||
brw_upload_cache(&brw->cache,
|
||||
BRW_CACHE_CLIP_PROG,
|
||||
key, sizeof(*key),
|
||||
program, program_size,
|
||||
&prog_data, sizeof(prog_data),
|
||||
&brw->clip.prog_offset, &brw->clip.prog_data);
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
/* Calculate interpolants for triangle and line rasterization.
|
||||
*/
|
||||
void
|
||||
brw_upload_clip_prog(struct brw_context *brw)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct brw_clip_prog_key key;
|
||||
|
||||
if (!brw_state_dirty(brw,
|
||||
_NEW_BUFFERS |
|
||||
_NEW_LIGHT |
|
||||
_NEW_POLYGON |
|
||||
_NEW_TRANSFORM,
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_FS_PROG_DATA |
|
||||
BRW_NEW_REDUCED_PRIMITIVE |
|
||||
BRW_NEW_VUE_MAP_GEOM_OUT))
|
||||
return;
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
|
||||
/* Populate the key:
|
||||
*/
|
||||
|
||||
/* BRW_NEW_FS_PROG_DATA */
|
||||
const struct brw_wm_prog_data *wm_prog_data =
|
||||
brw_wm_prog_data(brw->wm.base.prog_data);
|
||||
if (wm_prog_data) {
|
||||
key.contains_flat_varying = wm_prog_data->contains_flat_varying;
|
||||
key.contains_noperspective_varying =
|
||||
wm_prog_data->contains_noperspective_varying;
|
||||
|
||||
STATIC_ASSERT(sizeof(key.interp_mode) ==
|
||||
sizeof(wm_prog_data->interp_mode));
|
||||
memcpy(key.interp_mode, wm_prog_data->interp_mode,
|
||||
sizeof(key.interp_mode));
|
||||
}
|
||||
|
||||
/* BRW_NEW_REDUCED_PRIMITIVE */
|
||||
key.primitive = brw->reduced_primitive;
|
||||
/* BRW_NEW_VUE_MAP_GEOM_OUT */
|
||||
key.attrs = brw->vue_map_geom_out.slots_valid;
|
||||
|
||||
/* _NEW_LIGHT */
|
||||
key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
|
||||
/* _NEW_TRANSFORM (also part of VUE map)*/
|
||||
if (ctx->Transform.ClipPlanesEnabled)
|
||||
key.nr_userclip = util_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
|
||||
|
||||
if (devinfo->ver == 5)
|
||||
key.clip_mode = BRW_CLIP_MODE_KERNEL_CLIP;
|
||||
else
|
||||
key.clip_mode = BRW_CLIP_MODE_NORMAL;
|
||||
|
||||
/* _NEW_POLYGON */
|
||||
if (key.primitive == GL_TRIANGLES) {
|
||||
if (ctx->Polygon.CullFlag &&
|
||||
ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
|
||||
key.clip_mode = BRW_CLIP_MODE_REJECT_ALL;
|
||||
else {
|
||||
GLuint fill_front = BRW_CLIP_FILL_MODE_CULL;
|
||||
GLuint fill_back = BRW_CLIP_FILL_MODE_CULL;
|
||||
GLuint offset_front = 0;
|
||||
GLuint offset_back = 0;
|
||||
|
||||
if (!ctx->Polygon.CullFlag ||
|
||||
ctx->Polygon.CullFaceMode != GL_FRONT) {
|
||||
switch (ctx->Polygon.FrontMode) {
|
||||
case GL_FILL:
|
||||
fill_front = BRW_CLIP_FILL_MODE_FILL;
|
||||
offset_front = 0;
|
||||
break;
|
||||
case GL_LINE:
|
||||
fill_front = BRW_CLIP_FILL_MODE_LINE;
|
||||
offset_front = ctx->Polygon.OffsetLine;
|
||||
break;
|
||||
case GL_POINT:
|
||||
fill_front = BRW_CLIP_FILL_MODE_POINT;
|
||||
offset_front = ctx->Polygon.OffsetPoint;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ctx->Polygon.CullFlag ||
|
||||
ctx->Polygon.CullFaceMode != GL_BACK) {
|
||||
switch (ctx->Polygon.BackMode) {
|
||||
case GL_FILL:
|
||||
fill_back = BRW_CLIP_FILL_MODE_FILL;
|
||||
offset_back = 0;
|
||||
break;
|
||||
case GL_LINE:
|
||||
fill_back = BRW_CLIP_FILL_MODE_LINE;
|
||||
offset_back = ctx->Polygon.OffsetLine;
|
||||
break;
|
||||
case GL_POINT:
|
||||
fill_back = BRW_CLIP_FILL_MODE_POINT;
|
||||
offset_back = ctx->Polygon.OffsetPoint;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->Polygon.BackMode != GL_FILL ||
|
||||
ctx->Polygon.FrontMode != GL_FILL) {
|
||||
key.do_unfilled = 1;
|
||||
|
||||
/* Most cases the fixed function units will handle. Cases where
|
||||
* one or more polygon faces are unfilled will require help:
|
||||
*/
|
||||
key.clip_mode = BRW_CLIP_MODE_CLIP_NON_REJECTED;
|
||||
|
||||
if (offset_back || offset_front) {
|
||||
/* _NEW_POLYGON, _NEW_BUFFERS */
|
||||
key.offset_units = ctx->Polygon.OffsetUnits * ctx->DrawBuffer->_MRD * 2;
|
||||
key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD;
|
||||
key.offset_clamp = ctx->Polygon.OffsetClamp * ctx->DrawBuffer->_MRD;
|
||||
}
|
||||
|
||||
if (!brw->polygon_front_bit) {
|
||||
key.fill_ccw = fill_front;
|
||||
key.fill_cw = fill_back;
|
||||
key.offset_ccw = offset_front;
|
||||
key.offset_cw = offset_back;
|
||||
if (ctx->Light.Model.TwoSide &&
|
||||
key.fill_cw != BRW_CLIP_FILL_MODE_CULL)
|
||||
key.copy_bfc_cw = 1;
|
||||
} else {
|
||||
key.fill_cw = fill_front;
|
||||
key.fill_ccw = fill_back;
|
||||
key.offset_cw = offset_front;
|
||||
key.offset_ccw = offset_back;
|
||||
if (ctx->Light.Model.TwoSide &&
|
||||
key.fill_ccw != BRW_CLIP_FILL_MODE_CULL)
|
||||
key.copy_bfc_ccw = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!brw_search_cache(&brw->cache, BRW_CACHE_CLIP_PROG, &key, sizeof(key),
|
||||
&brw->clip.prog_offset, &brw->clip.prog_data, true)) {
|
||||
compile_clip_prog( brw, &key );
|
||||
}
|
||||
}
|
||||
|
|
@ -1,151 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2014 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <sys/errno.h>
|
||||
|
||||
#include "main/condrender.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "main/state.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_draw.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_batch.h"
|
||||
#include "brw_buffer_objects.h"
|
||||
#include "brw_defines.h"
|
||||
|
||||
|
||||
static void
|
||||
brw_dispatch_compute_common(struct gl_context *ctx)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
bool fail_next;
|
||||
|
||||
if (!_mesa_check_conditional_render(ctx))
|
||||
return;
|
||||
|
||||
if (ctx->NewState)
|
||||
_mesa_update_state(ctx);
|
||||
|
||||
brw_validate_textures(brw);
|
||||
|
||||
brw_predraw_resolve_inputs(brw, false, NULL);
|
||||
|
||||
/* Flush the batch if the batch/state buffers are nearly full. We can
|
||||
* grow them if needed, but this is not free, so we'd like to avoid it.
|
||||
*/
|
||||
brw_batch_require_space(brw, 600);
|
||||
brw_require_statebuffer_space(brw, 2500);
|
||||
brw_batch_save_state(brw);
|
||||
fail_next = brw_batch_saved_state_is_empty(brw);
|
||||
|
||||
retry:
|
||||
brw->batch.no_wrap = true;
|
||||
brw_upload_compute_state(brw);
|
||||
|
||||
brw->vtbl.emit_compute_walker(brw);
|
||||
|
||||
brw->batch.no_wrap = false;
|
||||
|
||||
if (!brw_batch_has_aperture_space(brw, 0)) {
|
||||
if (!fail_next) {
|
||||
brw_batch_reset_to_saved(brw);
|
||||
brw_batch_flush(brw);
|
||||
fail_next = true;
|
||||
goto retry;
|
||||
} else {
|
||||
int ret = brw_batch_flush(brw);
|
||||
WARN_ONCE(ret == -ENOSPC,
|
||||
"i965: Single compute shader dispatch "
|
||||
"exceeded available aperture space\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* Now that we know we haven't run out of aperture space, we can safely
|
||||
* reset the dirty bits.
|
||||
*/
|
||||
brw_compute_state_finished(brw);
|
||||
|
||||
if (brw->always_flush_batch)
|
||||
brw_batch_flush(brw);
|
||||
|
||||
brw_program_cache_check_size(brw);
|
||||
|
||||
/* Note: since compute shaders can't write to framebuffers, there's no need
|
||||
* to call brw_postdraw_set_buffers_need_resolve().
|
||||
*/
|
||||
}
|
||||
|
||||
static void
|
||||
brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
|
||||
brw->compute.num_work_groups_bo = NULL;
|
||||
brw->compute.num_work_groups = num_groups;
|
||||
brw->compute.group_size = NULL;
|
||||
ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
|
||||
|
||||
brw_dispatch_compute_common(ctx);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
|
||||
struct gl_buffer_object *indirect_buffer = ctx->DispatchIndirectBuffer;
|
||||
struct brw_bo *bo =
|
||||
brw_bufferobj_buffer(brw,
|
||||
brw_buffer_object(indirect_buffer),
|
||||
indirect, 3 * sizeof(GLuint), false);
|
||||
|
||||
brw->compute.num_work_groups_bo = bo;
|
||||
brw->compute.num_work_groups_offset = indirect;
|
||||
brw->compute.num_work_groups = indirect_group_counts;
|
||||
brw->compute.group_size = NULL;
|
||||
ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
|
||||
|
||||
brw_dispatch_compute_common(ctx);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_dispatch_compute_group_size(struct gl_context *ctx,
|
||||
const GLuint *num_groups,
|
||||
const GLuint *group_size)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
|
||||
brw->compute.num_work_groups_bo = NULL;
|
||||
brw->compute.num_work_groups = num_groups;
|
||||
brw->compute.group_size = group_size;
|
||||
ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
|
||||
|
||||
brw_dispatch_compute_common(ctx);
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_compute_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->DispatchCompute = brw_dispatch_compute;
|
||||
functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
|
||||
functions->DispatchComputeGroupSize = brw_dispatch_compute_group_size;
|
||||
}
|
||||
|
|
@ -1,193 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2014 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Neil Roberts <neil@linux.intel.com>
|
||||
*/
|
||||
|
||||
/** @file brw_conditional_render.c
|
||||
*
|
||||
* Support for conditional rendering based on query objects
|
||||
* (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gfx7+.
|
||||
*/
|
||||
|
||||
#include "main/condrender.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_batch.h"
|
||||
|
||||
static void
|
||||
set_predicate_enable(struct brw_context *brw,
|
||||
bool value)
|
||||
{
|
||||
if (value)
|
||||
brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
|
||||
else
|
||||
brw->predicate.state = BRW_PREDICATE_STATE_DONT_RENDER;
|
||||
}
|
||||
|
||||
static void
|
||||
set_predicate_for_overflow_query(struct brw_context *brw,
|
||||
struct brw_query_object *query,
|
||||
int stream_start, int count)
|
||||
{
|
||||
if (!can_do_mi_math_and_lrr(brw->screen)) {
|
||||
brw->predicate.state = BRW_PREDICATE_STATE_STALL_FOR_QUERY;
|
||||
return;
|
||||
}
|
||||
|
||||
brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
|
||||
|
||||
/* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
|
||||
* command when loading the values into the predicate source registers for
|
||||
* conditional rendering.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
|
||||
|
||||
hsw_overflow_result_to_gpr0(brw, query, count);
|
||||
brw_load_register_reg64(brw, MI_PREDICATE_SRC0, HSW_CS_GPR(0));
|
||||
brw_load_register_imm64(brw, MI_PREDICATE_SRC1, 0ull);
|
||||
}
|
||||
|
||||
static void
|
||||
set_predicate_for_occlusion_query(struct brw_context *brw,
|
||||
struct brw_query_object *query)
|
||||
{
|
||||
if (!brw->predicate.supported) {
|
||||
brw->predicate.state = BRW_PREDICATE_STATE_STALL_FOR_QUERY;
|
||||
return;
|
||||
}
|
||||
|
||||
brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
|
||||
|
||||
/* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
|
||||
* command when loading the values into the predicate source registers for
|
||||
* conditional rendering.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
|
||||
|
||||
brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query->bo, 0 /* offset */);
|
||||
brw_load_register_mem64(brw, MI_PREDICATE_SRC1, query->bo, 8 /* offset */);
|
||||
}
|
||||
|
||||
static void
|
||||
set_predicate_for_result(struct brw_context *brw,
|
||||
struct brw_query_object *query,
|
||||
bool inverted)
|
||||
{
|
||||
int load_op;
|
||||
|
||||
assert(query->bo != NULL);
|
||||
|
||||
switch (query->Base.Target) {
|
||||
case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
|
||||
set_predicate_for_overflow_query(brw, query, 0, 1);
|
||||
break;
|
||||
case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
|
||||
set_predicate_for_overflow_query(brw, query, 0, MAX_VERTEX_STREAMS);
|
||||
break;
|
||||
default:
|
||||
set_predicate_for_occlusion_query(brw, query);
|
||||
}
|
||||
|
||||
if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) {
|
||||
if (inverted)
|
||||
load_op = MI_PREDICATE_LOADOP_LOAD;
|
||||
else
|
||||
load_op = MI_PREDICATE_LOADOP_LOADINV;
|
||||
|
||||
BEGIN_BATCH(1);
|
||||
OUT_BATCH(GFX7_MI_PREDICATE |
|
||||
load_op |
|
||||
MI_PREDICATE_COMBINEOP_SET |
|
||||
MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
brw_begin_conditional_render(struct gl_context *ctx,
|
||||
struct gl_query_object *q,
|
||||
GLenum mode)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_query_object *query = (struct brw_query_object *) q;
|
||||
bool inverted;
|
||||
|
||||
switch (mode) {
|
||||
case GL_QUERY_WAIT:
|
||||
case GL_QUERY_NO_WAIT:
|
||||
case GL_QUERY_BY_REGION_WAIT:
|
||||
case GL_QUERY_BY_REGION_NO_WAIT:
|
||||
inverted = false;
|
||||
break;
|
||||
case GL_QUERY_WAIT_INVERTED:
|
||||
case GL_QUERY_NO_WAIT_INVERTED:
|
||||
case GL_QUERY_BY_REGION_WAIT_INVERTED:
|
||||
case GL_QUERY_BY_REGION_NO_WAIT_INVERTED:
|
||||
inverted = true;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unexpected conditional render mode");
|
||||
}
|
||||
|
||||
/* If there are already samples from a BLT operation or if the query object
|
||||
* is ready then we can avoid looking at the values in the buffer and just
|
||||
* decide whether to draw using the CPU without stalling.
|
||||
*/
|
||||
if (query->Base.Result || query->Base.Ready)
|
||||
set_predicate_enable(brw, (query->Base.Result != 0) ^ inverted);
|
||||
else
|
||||
set_predicate_for_result(brw, query, inverted);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_end_conditional_render(struct gl_context *ctx,
|
||||
struct gl_query_object *q)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
|
||||
/* When there is no longer a conditional render in progress it should
|
||||
* always render.
|
||||
*/
|
||||
brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_conditional_render_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->BeginConditionalRender = brw_begin_conditional_render;
|
||||
functions->EndConditionalRender = brw_end_conditional_render;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_check_conditional_render(struct brw_context *brw)
|
||||
{
|
||||
if (brw->predicate.state == BRW_PREDICATE_STATE_STALL_FOR_QUERY) {
|
||||
perf_debug("Conditional rendering is implemented in software and may "
|
||||
"stall.\n");
|
||||
return _mesa_check_conditional_render(&brw->ctx);
|
||||
}
|
||||
|
||||
return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER;
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,139 +0,0 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 2014 Intel Corporation All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Jason Ekstrand <jason.ekstrand@intel.com>
|
||||
*/
|
||||
|
||||
#include "brw_blorp.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "brw_tex.h"
|
||||
#include "brw_blit.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
#include "main/formats.h"
|
||||
#include "main/teximage.h"
|
||||
#include "drivers/common/meta.h"
|
||||
|
||||
static void
|
||||
copy_miptrees(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *src_mt,
|
||||
int src_x, int src_y, int src_z, unsigned src_level,
|
||||
struct brw_mipmap_tree *dst_mt,
|
||||
int dst_x, int dst_y, int dst_z, unsigned dst_level,
|
||||
int src_width, int src_height)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
if (devinfo->ver <= 5) {
|
||||
/* On gfx4-5, try BLT first.
|
||||
*
|
||||
* Gfx4-5 have a single ring for both 3D and BLT operations, so there's
|
||||
* no inter-ring synchronization issues like on Gfx6+. It is apparently
|
||||
* faster than using the 3D pipeline. Original Gfx4 also has to rebase
|
||||
* and copy miptree slices in order to render to unaligned locations.
|
||||
*/
|
||||
if (brw_miptree_copy(brw, src_mt, src_level, src_z, src_x, src_y,
|
||||
dst_mt, dst_level, dst_z, dst_x, dst_y,
|
||||
src_width, src_height))
|
||||
return;
|
||||
}
|
||||
|
||||
brw_blorp_copy_miptrees(brw,
|
||||
src_mt, src_level, src_z,
|
||||
dst_mt, dst_level, dst_z,
|
||||
src_x, src_y, dst_x, dst_y,
|
||||
src_width, src_height);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_copy_image_sub_data(struct gl_context *ctx,
|
||||
struct gl_texture_image *src_image,
|
||||
struct gl_renderbuffer *src_renderbuffer,
|
||||
int src_x, int src_y, int src_z,
|
||||
struct gl_texture_image *dst_image,
|
||||
struct gl_renderbuffer *dst_renderbuffer,
|
||||
int dst_x, int dst_y, int dst_z,
|
||||
int src_width, int src_height)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_mipmap_tree *src_mt, *dst_mt;
|
||||
unsigned src_level, dst_level;
|
||||
|
||||
if (src_image) {
|
||||
src_mt = brw_texture_image(src_image)->mt;
|
||||
src_level = src_image->Level + src_image->TexObject->Attrib.MinLevel;
|
||||
|
||||
/* Cube maps actually have different images per face */
|
||||
if (src_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
|
||||
src_z = src_image->Face;
|
||||
|
||||
src_z += src_image->TexObject->Attrib.MinLayer;
|
||||
} else {
|
||||
assert(src_renderbuffer);
|
||||
src_mt = brw_renderbuffer(src_renderbuffer)->mt;
|
||||
src_image = src_renderbuffer->TexImage;
|
||||
src_level = 0;
|
||||
}
|
||||
|
||||
if (dst_image) {
|
||||
dst_mt = brw_texture_image(dst_image)->mt;
|
||||
|
||||
dst_level = dst_image->Level + dst_image->TexObject->Attrib.MinLevel;
|
||||
|
||||
/* Cube maps actually have different images per face */
|
||||
if (dst_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
|
||||
dst_z = dst_image->Face;
|
||||
|
||||
dst_z += dst_image->TexObject->Attrib.MinLayer;
|
||||
} else {
|
||||
assert(dst_renderbuffer);
|
||||
dst_mt = brw_renderbuffer(dst_renderbuffer)->mt;
|
||||
dst_image = dst_renderbuffer->TexImage;
|
||||
dst_level = 0;
|
||||
}
|
||||
|
||||
copy_miptrees(brw, src_mt, src_x, src_y, src_z, src_level,
|
||||
dst_mt, dst_x, dst_y, dst_z, dst_level,
|
||||
src_width, src_height);
|
||||
|
||||
/* CopyImage only works for equal formats, texture view equivalence
|
||||
* classes, and a couple special cases for compressed textures.
|
||||
*
|
||||
* Notably, GL_DEPTH_STENCIL does not appear in any equivalence
|
||||
* classes, so we know the formats must be the same, and thus both
|
||||
* will either have stencil, or not. They can't be mismatched.
|
||||
*/
|
||||
assert((src_mt->stencil_mt != NULL) == (dst_mt->stencil_mt != NULL));
|
||||
|
||||
if (dst_mt->stencil_mt) {
|
||||
copy_miptrees(brw, src_mt->stencil_mt, src_x, src_y, src_z, src_level,
|
||||
dst_mt->stencil_mt, dst_x, dst_y, dst_z, dst_level,
|
||||
src_width, src_height);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_copy_image_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->CopyImageSubData = brw_copy_image_sub_data;
|
||||
}
|
||||
|
|
@ -1,220 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 - 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_cs.h"
|
||||
#include "brw_wm.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_batch.h"
|
||||
#include "compiler/brw_nir.h"
|
||||
#include "brw_program.h"
|
||||
#include "compiler/glsl/ir_uniform.h"
|
||||
|
||||
static void
|
||||
assign_cs_binding_table_offsets(const struct intel_device_info *devinfo,
|
||||
const struct gl_program *prog,
|
||||
struct brw_cs_prog_data *prog_data)
|
||||
{
|
||||
uint32_t next_binding_table_offset = 0;
|
||||
|
||||
/* May not be used if the gl_NumWorkGroups variable is not accessed. */
|
||||
prog_data->binding_table.work_groups_start = next_binding_table_offset;
|
||||
next_binding_table_offset++;
|
||||
|
||||
brw_assign_common_binding_table_offsets(devinfo, prog, &prog_data->base,
|
||||
next_binding_table_offset);
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_codegen_cs_prog(struct brw_context *brw,
|
||||
struct brw_program *cp,
|
||||
struct brw_cs_prog_key *key)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
const GLuint *program;
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
struct brw_cs_prog_data prog_data;
|
||||
bool start_busy = false;
|
||||
double start_time = 0;
|
||||
nir_shader *nir = nir_shader_clone(mem_ctx, cp->program.nir);
|
||||
|
||||
memset(&prog_data, 0, sizeof(prog_data));
|
||||
|
||||
if (cp->program.info.shared_size > 64 * 1024) {
|
||||
cp->program.sh.data->LinkStatus = LINKING_FAILURE;
|
||||
const char *error_str =
|
||||
"Compute shader used more than 64KB of shared variables";
|
||||
ralloc_strcat(&cp->program.sh.data->InfoLog, error_str);
|
||||
_mesa_problem(NULL, "Failed to link compute shader: %s\n", error_str);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
assign_cs_binding_table_offsets(devinfo, &cp->program, &prog_data);
|
||||
|
||||
brw_nir_setup_glsl_uniforms(mem_ctx, nir,
|
||||
&cp->program, &prog_data.base, true);
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
start_busy = (brw->batch.last_bo &&
|
||||
brw_bo_busy(brw->batch.last_bo));
|
||||
start_time = get_time();
|
||||
}
|
||||
|
||||
|
||||
brw_nir_lower_cs_intrinsics(nir);
|
||||
|
||||
struct brw_compile_cs_params params = {
|
||||
.nir = nir,
|
||||
.key = key,
|
||||
.prog_data = &prog_data,
|
||||
.log_data = brw,
|
||||
};
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
|
||||
params.shader_time = true;
|
||||
params.shader_time_index =
|
||||
brw_get_shader_time_index(brw, &cp->program, ST_CS, true);
|
||||
}
|
||||
|
||||
program = brw_compile_cs(brw->screen->compiler, mem_ctx, ¶ms);
|
||||
if (program == NULL) {
|
||||
cp->program.sh.data->LinkStatus = LINKING_FAILURE;
|
||||
ralloc_strcat(&cp->program.sh.data->InfoLog, params.error_str);
|
||||
_mesa_problem(NULL, "Failed to compile compute shader: %s\n", params.error_str);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
if (cp->compiled_once) {
|
||||
brw_debug_recompile(brw, MESA_SHADER_COMPUTE, cp->program.Id,
|
||||
&key->base);
|
||||
}
|
||||
cp->compiled_once = true;
|
||||
|
||||
if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
|
||||
perf_debug("CS compile took %.03f ms and stalled the GPU\n",
|
||||
(get_time() - start_time) * 1000);
|
||||
}
|
||||
}
|
||||
|
||||
brw_alloc_stage_scratch(brw, &brw->cs.base, prog_data.base.total_scratch);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.param);
|
||||
ralloc_steal(NULL, prog_data.base.pull_param);
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG,
|
||||
key, sizeof(*key),
|
||||
program, prog_data.base.program_size,
|
||||
&prog_data, sizeof(prog_data),
|
||||
&brw->cs.base.prog_offset, &brw->cs.base.prog_data);
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_COMPUTE_PROGRAM */
|
||||
const struct brw_program *cp =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
/* _NEW_TEXTURE */
|
||||
brw_populate_base_prog_key(ctx, cp, &key->base);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
brw_upload_cs_prog(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct brw_cs_prog_key key;
|
||||
struct brw_program *cp =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
|
||||
|
||||
if (!cp)
|
||||
return;
|
||||
|
||||
if (!brw_state_dirty(brw, _NEW_TEXTURE, BRW_NEW_COMPUTE_PROGRAM))
|
||||
return;
|
||||
|
||||
brw->cs.base.sampler_count =
|
||||
util_last_bit(ctx->ComputeProgram._Current->SamplersUsed);
|
||||
|
||||
brw_cs_populate_key(brw, &key);
|
||||
|
||||
if (brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG, &key, sizeof(key),
|
||||
&brw->cs.base.prog_offset, &brw->cs.base.prog_data,
|
||||
true))
|
||||
return;
|
||||
|
||||
if (brw_disk_cache_upload_program(brw, MESA_SHADER_COMPUTE))
|
||||
return;
|
||||
|
||||
cp = (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
|
||||
cp->id = key.base.program_string_id;
|
||||
|
||||
ASSERTED bool success = brw_codegen_cs_prog(brw, cp, &key);
|
||||
assert(success);
|
||||
}
|
||||
|
||||
void
|
||||
brw_cs_populate_default_key(const struct brw_compiler *compiler,
|
||||
struct brw_cs_prog_key *key,
|
||||
struct gl_program *prog)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
memset(key, 0, sizeof(*key));
|
||||
brw_populate_default_base_prog_key(devinfo, brw_program(prog), &key->base);
|
||||
}
|
||||
|
||||
bool
|
||||
brw_cs_precompile(struct gl_context *ctx, struct gl_program *prog)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_cs_prog_key key;
|
||||
|
||||
struct brw_program *bcp = brw_program(prog);
|
||||
|
||||
brw_cs_populate_default_key(brw->screen->compiler, &key, prog);
|
||||
|
||||
uint32_t old_prog_offset = brw->cs.base.prog_offset;
|
||||
struct brw_stage_prog_data *old_prog_data = brw->cs.base.prog_data;
|
||||
|
||||
bool success = brw_codegen_cs_prog(brw, bcp, &key);
|
||||
|
||||
brw->cs.base.prog_offset = old_prog_offset;
|
||||
brw->cs.base.prog_data = old_prog_data;
|
||||
|
||||
return success;
|
||||
}
|
||||
|
|
@ -1,46 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2014 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef BRW_CS_H
|
||||
#define BRW_CS_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void
|
||||
brw_upload_cs_prog(struct brw_context *brw);
|
||||
|
||||
void
|
||||
brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key);
|
||||
void
|
||||
brw_cs_populate_default_key(const struct brw_compiler *compiler,
|
||||
struct brw_cs_prog_key *key,
|
||||
struct gl_program *prog);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BRW_CS_H */
|
||||
|
|
@ -1,356 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
/** @file brw_curbe.c
|
||||
*
|
||||
* Push constant handling for gfx4/5.
|
||||
*
|
||||
* Push constants are constant values (such as GLSL uniforms) that are
|
||||
* pre-loaded into a shader stage's register space at thread spawn time. On
|
||||
* gfx4 and gfx5, we create a blob in memory containing all the push constants
|
||||
* for all the stages in order. At CMD_CONST_BUFFER time that blob is loaded
|
||||
* into URB space as a constant URB entry (CURBE) so that it can be accessed
|
||||
* quickly at thread setup time. Each individual fixed function unit's state
|
||||
* (brw_vs_state.c for example) tells the hardware which subset of the CURBE
|
||||
* it wants in its register space, and we calculate those areas here under the
|
||||
* BRW_NEW_PUSH_CONSTANT_ALLOCATION state flag. The brw_urb.c allocation will control
|
||||
* how many CURBEs can be loaded into the hardware at once before a pipeline
|
||||
* stall occurs at CMD_CONST_BUFFER time.
|
||||
*
|
||||
* On gfx6+, constant handling becomes a much simpler set of per-unit state.
|
||||
* See gfx6_upload_vec4_push_constants() in gfx6_vs_state.c for that code.
|
||||
*/
|
||||
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "main/context.h"
|
||||
#include "main/macros.h"
|
||||
#include "main/enums.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "program/prog_print.h"
|
||||
#include "program/prog_statevars.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "brw_batch.h"
|
||||
#include "brw_buffer_objects.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_util.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
|
||||
/**
|
||||
* Partition the CURBE between the various users of constant values.
|
||||
*
|
||||
* If the users all fit within the previous allocatation, we avoid changing
|
||||
* the layout because that means reuploading all unit state and uploading new
|
||||
* constant buffers.
|
||||
*/
|
||||
static void calculate_curbe_offsets( struct brw_context *brw )
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_FS_PROG_DATA */
|
||||
const GLuint nr_fp_regs = (brw->wm.base.prog_data->nr_params + 15) / 16;
|
||||
|
||||
/* BRW_NEW_VS_PROG_DATA */
|
||||
const GLuint nr_vp_regs = (brw->vs.base.prog_data->nr_params + 15) / 16;
|
||||
GLuint nr_clip_regs = 0;
|
||||
GLuint total_regs;
|
||||
|
||||
/* _NEW_TRANSFORM */
|
||||
if (ctx->Transform.ClipPlanesEnabled) {
|
||||
GLuint nr_planes = 6 + util_bitcount(ctx->Transform.ClipPlanesEnabled);
|
||||
nr_clip_regs = (nr_planes * 4 + 15) / 16;
|
||||
}
|
||||
|
||||
|
||||
total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
|
||||
|
||||
/* The CURBE allocation size is limited to 32 512-bit units (128 EU
|
||||
* registers, or 1024 floats). See CS_URB_STATE in the gfx4 or gfx5
|
||||
* (volume 1, part 1) PRMs.
|
||||
*
|
||||
* Note that in brw_fs.cpp we're only loading up to 16 EU registers of
|
||||
* values as push constants before spilling to pull constants, and in
|
||||
* brw_vec4.cpp we're loading up to 32 registers of push constants. An EU
|
||||
* register is 1/2 of one of these URB entry units, so that leaves us 16 EU
|
||||
* regs for clip.
|
||||
*/
|
||||
assert(total_regs <= 32);
|
||||
|
||||
/* Lazy resize:
|
||||
*/
|
||||
if (nr_fp_regs > brw->curbe.wm_size ||
|
||||
nr_vp_regs > brw->curbe.vs_size ||
|
||||
nr_clip_regs != brw->curbe.clip_size ||
|
||||
(total_regs < brw->curbe.total_size / 4 &&
|
||||
brw->curbe.total_size > 16)) {
|
||||
|
||||
GLuint reg = 0;
|
||||
|
||||
/* Calculate a new layout:
|
||||
*/
|
||||
reg = 0;
|
||||
brw->curbe.wm_start = reg;
|
||||
brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
|
||||
brw->curbe.clip_start = reg;
|
||||
brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
|
||||
brw->curbe.vs_start = reg;
|
||||
brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
|
||||
brw->curbe.total_size = reg;
|
||||
|
||||
if (0)
|
||||
fprintf(stderr, "curbe wm %d+%d clip %d+%d vs %d+%d\n",
|
||||
brw->curbe.wm_start,
|
||||
brw->curbe.wm_size,
|
||||
brw->curbe.clip_start,
|
||||
brw->curbe.clip_size,
|
||||
brw->curbe.vs_start,
|
||||
brw->curbe.vs_size );
|
||||
|
||||
brw->ctx.NewDriverState |= BRW_NEW_PUSH_CONSTANT_ALLOCATION;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const struct brw_tracked_state brw_curbe_offsets = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_TRANSFORM,
|
||||
.brw = BRW_NEW_CONTEXT |
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_FS_PROG_DATA |
|
||||
BRW_NEW_VS_PROG_DATA,
|
||||
},
|
||||
.emit = calculate_curbe_offsets
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/** Uploads the CS_URB_STATE packet.
|
||||
*
|
||||
* Just like brw_vs_state.c and brw_wm_state.c define a URB entry size and
|
||||
* number of entries for their stages, constant buffers do so using this state
|
||||
* packet. Having multiple CURBEs in the URB at the same time allows the
|
||||
* hardware to avoid a pipeline stall between primitives using different
|
||||
* constant buffer contents.
|
||||
*/
|
||||
void brw_upload_cs_urb_state(struct brw_context *brw)
|
||||
{
|
||||
BEGIN_BATCH(2);
|
||||
OUT_BATCH(CMD_CS_URB_STATE << 16 | (2-2));
|
||||
|
||||
/* BRW_NEW_URB_FENCE */
|
||||
if (brw->urb.csize == 0) {
|
||||
OUT_BATCH(0);
|
||||
} else {
|
||||
/* BRW_NEW_URB_FENCE */
|
||||
assert(brw->urb.nr_cs_entries);
|
||||
OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries);
|
||||
}
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
static const GLfloat fixed_plane[6][4] = {
|
||||
{ 0, 0, -1, 1 },
|
||||
{ 0, 0, 1, 1 },
|
||||
{ 0, -1, 0, 1 },
|
||||
{ 0, 1, 0, 1 },
|
||||
{-1, 0, 0, 1 },
|
||||
{ 1, 0, 0, 1 }
|
||||
};
|
||||
|
||||
/**
|
||||
* Gathers together all the uniform values into a block of memory to be
|
||||
* uploaded into the CURBE, then emits the state packet telling the hardware
|
||||
* the new location.
|
||||
*/
|
||||
static void
|
||||
brw_upload_constant_buffer(struct brw_context *brw)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
|
||||
const GLuint sz = brw->curbe.total_size;
|
||||
const GLuint bufsz = sz * 16 * sizeof(GLfloat);
|
||||
gl_constant_value *buf;
|
||||
GLuint i;
|
||||
gl_clip_plane *clip_planes;
|
||||
|
||||
/* BRW_NEW_FRAGMENT_PROGRAM */
|
||||
struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
|
||||
|
||||
/* BRW_NEW_VERTEX_PROGRAM */
|
||||
struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
|
||||
|
||||
if (sz == 0) {
|
||||
goto emit;
|
||||
}
|
||||
|
||||
buf = brw_upload_space(&brw->upload, bufsz, 64,
|
||||
&brw->curbe.curbe_bo, &brw->curbe.curbe_offset);
|
||||
|
||||
STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
|
||||
|
||||
/* fragment shader constants */
|
||||
if (brw->curbe.wm_size) {
|
||||
_mesa_load_state_parameters(ctx, fp->Parameters);
|
||||
|
||||
/* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
|
||||
GLuint offset = brw->curbe.wm_start * 16;
|
||||
|
||||
/* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
|
||||
brw_populate_constant_data(brw, fp, &brw->wm.base, &buf[offset],
|
||||
brw->wm.base.prog_data->param,
|
||||
brw->wm.base.prog_data->nr_params);
|
||||
}
|
||||
|
||||
/* clipper constants */
|
||||
if (brw->curbe.clip_size) {
|
||||
GLuint offset = brw->curbe.clip_start * 16;
|
||||
GLbitfield mask;
|
||||
|
||||
/* If any planes are going this way, send them all this way:
|
||||
*/
|
||||
for (i = 0; i < 6; i++) {
|
||||
buf[offset + i * 4 + 0].f = fixed_plane[i][0];
|
||||
buf[offset + i * 4 + 1].f = fixed_plane[i][1];
|
||||
buf[offset + i * 4 + 2].f = fixed_plane[i][2];
|
||||
buf[offset + i * 4 + 3].f = fixed_plane[i][3];
|
||||
}
|
||||
|
||||
/* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
|
||||
* clip-space:
|
||||
*/
|
||||
clip_planes = brw_select_clip_planes(ctx);
|
||||
mask = ctx->Transform.ClipPlanesEnabled;
|
||||
while (mask) {
|
||||
const int j = u_bit_scan(&mask);
|
||||
buf[offset + i * 4 + 0].f = clip_planes[j][0];
|
||||
buf[offset + i * 4 + 1].f = clip_planes[j][1];
|
||||
buf[offset + i * 4 + 2].f = clip_planes[j][2];
|
||||
buf[offset + i * 4 + 3].f = clip_planes[j][3];
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
/* vertex shader constants */
|
||||
if (brw->curbe.vs_size) {
|
||||
_mesa_load_state_parameters(ctx, vp->Parameters);
|
||||
|
||||
GLuint offset = brw->curbe.vs_start * 16;
|
||||
|
||||
/* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
|
||||
brw_populate_constant_data(brw, vp, &brw->vs.base, &buf[offset],
|
||||
brw->vs.base.prog_data->param,
|
||||
brw->vs.base.prog_data->nr_params);
|
||||
}
|
||||
|
||||
if (0) {
|
||||
for (i = 0; i < sz*16; i+=4)
|
||||
fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
|
||||
buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f);
|
||||
}
|
||||
|
||||
/* Because this provokes an action (ie copy the constants into the
|
||||
* URB), it shouldn't be shortcircuited if identical to the
|
||||
* previous time - because eg. the urb destination may have
|
||||
* changed, or the urb contents different to last time.
|
||||
*
|
||||
* Note that the data referred to is actually copied internally,
|
||||
* not just used in place according to passed pointer.
|
||||
*
|
||||
* It appears that the CS unit takes care of using each available
|
||||
* URB entry (Const URB Entry == CURBE) in turn, and issuing
|
||||
* flushes as necessary when doublebuffering of CURBEs isn't
|
||||
* possible.
|
||||
*/
|
||||
|
||||
emit:
|
||||
/* BRW_NEW_URB_FENCE: From the gfx4 PRM, volume 1, section 3.9.8
|
||||
* (CONSTANT_BUFFER (CURBE Load)):
|
||||
*
|
||||
* "Modifying the CS URB allocation via URB_FENCE invalidates any
|
||||
* previous CURBE entries. Therefore software must subsequently
|
||||
* [re]issue a CONSTANT_BUFFER command before CURBE data can be used
|
||||
* in the pipeline."
|
||||
*/
|
||||
BEGIN_BATCH(2);
|
||||
if (brw->curbe.total_size == 0) {
|
||||
OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
|
||||
OUT_BATCH(0);
|
||||
} else {
|
||||
OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
|
||||
OUT_RELOC(brw->curbe.curbe_bo, 0,
|
||||
(brw->curbe.total_size - 1) + brw->curbe.curbe_offset);
|
||||
}
|
||||
ADVANCE_BATCH();
|
||||
|
||||
/* Work around a Broadwater/Crestline depth interpolator bug. The
|
||||
* following sequence will cause GPU hangs:
|
||||
*
|
||||
* 1. Change state so that all depth related fields in CC_STATE are
|
||||
* disabled, and in WM_STATE, only "PS Use Source Depth" is enabled.
|
||||
* 2. Emit a CONSTANT_BUFFER packet.
|
||||
* 3. Draw via 3DPRIMITIVE.
|
||||
*
|
||||
* The recommended workaround is to emit a non-pipelined state change after
|
||||
* emitting CONSTANT_BUFFER, in order to drain the windowizer pipeline.
|
||||
*
|
||||
* We arbitrarily choose 3DSTATE_GLOBAL_DEPTH_CLAMP_OFFSET (as it's small),
|
||||
* and always emit it when "PS Use Source Depth" is set. We could be more
|
||||
* precise, but the additional complexity is probably not worth it.
|
||||
*
|
||||
* BRW_NEW_FRAGMENT_PROGRAM
|
||||
*/
|
||||
if (devinfo->verx10 == 40 &&
|
||||
BITSET_TEST(fp->info.system_values_read, SYSTEM_VALUE_FRAG_COORD)) {
|
||||
BEGIN_BATCH(2);
|
||||
OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_constant_buffer = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_PROGRAM_CONSTANTS,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_PUSH_CONSTANT_ALLOCATION |
|
||||
BRW_NEW_FRAGMENT_PROGRAM |
|
||||
BRW_NEW_FS_PROG_DATA |
|
||||
BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
|
||||
BRW_NEW_URB_FENCE |
|
||||
BRW_NEW_VS_PROG_DATA,
|
||||
},
|
||||
.emit = brw_upload_constant_buffer,
|
||||
};
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,417 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2014 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler/glsl/ir_uniform.h"
|
||||
#include "compiler/glsl/shader_cache.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "util/blob.h"
|
||||
#include "util/build_id.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/disk_cache.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/mesa-sha1.h"
|
||||
|
||||
#include "compiler/brw_eu.h"
|
||||
#include "dev/intel_debug.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_program.h"
|
||||
#include "brw_cs.h"
|
||||
#include "brw_gs.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_vs.h"
|
||||
#include "brw_wm.h"
|
||||
|
||||
static bool
|
||||
debug_enabled_for_stage(gl_shader_stage stage)
|
||||
{
|
||||
static const uint64_t stage_debug_flags[] = {
|
||||
DEBUG_VS, DEBUG_TCS, DEBUG_TES, DEBUG_GS, DEBUG_WM, DEBUG_CS,
|
||||
};
|
||||
assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_debug_flags));
|
||||
return INTEL_DEBUG(stage_debug_flags[stage]);
|
||||
}
|
||||
|
||||
static void
|
||||
intel_shader_sha1(struct gl_program *prog, gl_shader_stage stage,
|
||||
void *key, unsigned char *out_sha1)
|
||||
{
|
||||
char sha1_buf[41];
|
||||
unsigned char sha1[20];
|
||||
char manifest[256];
|
||||
int offset = 0;
|
||||
|
||||
_mesa_sha1_format(sha1_buf, prog->sh.data->sha1);
|
||||
offset += snprintf(manifest, sizeof(manifest), "program: %s\n", sha1_buf);
|
||||
|
||||
_mesa_sha1_compute(key, brw_prog_key_size(stage), sha1);
|
||||
_mesa_sha1_format(sha1_buf, sha1);
|
||||
offset += snprintf(manifest + offset, sizeof(manifest) - offset,
|
||||
"%s_key: %s\n", _mesa_shader_stage_to_abbrev(stage),
|
||||
sha1_buf);
|
||||
|
||||
_mesa_sha1_compute(manifest, strlen(manifest), out_sha1);
|
||||
}
|
||||
|
||||
static bool
|
||||
read_blob_program_data(struct blob_reader *binary, struct gl_program *prog,
|
||||
gl_shader_stage stage, const uint8_t **program,
|
||||
struct brw_stage_prog_data *prog_data)
|
||||
{
|
||||
return
|
||||
brw_read_blob_program_data(binary, prog, stage, program, prog_data) &&
|
||||
(binary->current == binary->end);
|
||||
}
|
||||
|
||||
static bool
|
||||
read_and_upload(struct brw_context *brw, struct disk_cache *cache,
|
||||
struct gl_program *prog, gl_shader_stage stage)
|
||||
{
|
||||
unsigned char binary_sha1[20];
|
||||
|
||||
union brw_any_prog_key prog_key;
|
||||
|
||||
switch (stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
brw_vs_populate_key(brw, &prog_key.vs);
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
brw_tcs_populate_key(brw, &prog_key.tcs);
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
brw_tes_populate_key(brw, &prog_key.tes);
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
brw_gs_populate_key(brw, &prog_key.gs);
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
brw_wm_populate_key(brw, &prog_key.wm);
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
brw_cs_populate_key(brw, &prog_key.cs);
|
||||
break;
|
||||
default:
|
||||
unreachable("Unsupported stage!");
|
||||
}
|
||||
|
||||
/* We don't care what instance of the program it is for the disk cache hash
|
||||
* lookup, so set the id to 0 for the sha1 hashing. program_string_id will
|
||||
* be set below.
|
||||
*/
|
||||
prog_key.base.program_string_id = 0;
|
||||
|
||||
intel_shader_sha1(prog, stage, &prog_key, binary_sha1);
|
||||
|
||||
size_t buffer_size;
|
||||
uint8_t *buffer = disk_cache_get(cache, binary_sha1, &buffer_size);
|
||||
if (buffer == NULL) {
|
||||
if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
|
||||
char sha1_buf[41];
|
||||
_mesa_sha1_format(sha1_buf, binary_sha1);
|
||||
fprintf(stderr, "No cached %s binary found for: %s\n",
|
||||
_mesa_shader_stage_to_abbrev(stage), sha1_buf);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
|
||||
char sha1_buf[41];
|
||||
_mesa_sha1_format(sha1_buf, binary_sha1);
|
||||
fprintf(stderr, "attempting to populate bo cache with binary: %s\n",
|
||||
sha1_buf);
|
||||
}
|
||||
|
||||
struct blob_reader binary;
|
||||
blob_reader_init(&binary, buffer, buffer_size);
|
||||
|
||||
const uint8_t *program;
|
||||
struct brw_stage_prog_data *prog_data =
|
||||
ralloc_size(NULL, sizeof(union brw_any_prog_data));
|
||||
if (!read_blob_program_data(&binary, prog, stage, &program, prog_data)) {
|
||||
/* Something very bad has gone wrong discard the item from the cache and
|
||||
* rebuild from source.
|
||||
*/
|
||||
if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
|
||||
fprintf(stderr, "Error reading program from cache (invalid i965 "
|
||||
"cache item)\n");
|
||||
}
|
||||
|
||||
disk_cache_remove(cache, binary_sha1);
|
||||
ralloc_free(prog_data);
|
||||
free(buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
enum brw_cache_id cache_id;
|
||||
struct brw_stage_state *stage_state;
|
||||
|
||||
switch (stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
cache_id = BRW_CACHE_VS_PROG;
|
||||
stage_state = &brw->vs.base;
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
cache_id = BRW_CACHE_TCS_PROG;
|
||||
stage_state = &brw->tcs.base;
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
cache_id = BRW_CACHE_TES_PROG;
|
||||
stage_state = &brw->tes.base;
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
cache_id = BRW_CACHE_GS_PROG;
|
||||
stage_state = &brw->gs.base;
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
cache_id = BRW_CACHE_FS_PROG;
|
||||
stage_state = &brw->wm.base;
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
cache_id = BRW_CACHE_CS_PROG;
|
||||
stage_state = &brw->cs.base;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unsupported stage!");
|
||||
}
|
||||
|
||||
prog_key.base.program_string_id = brw_program(prog)->id;
|
||||
|
||||
brw_alloc_stage_scratch(brw, stage_state, prog_data->total_scratch);
|
||||
|
||||
if (unlikely(debug_enabled_for_stage(stage))) {
|
||||
fprintf(stderr, "NIR for %s program %d loaded from disk shader cache:\n",
|
||||
_mesa_shader_stage_to_abbrev(stage), brw_program(prog)->id);
|
||||
brw_program_deserialize_driver_blob(&brw->ctx, prog, stage);
|
||||
nir_shader *nir = prog->nir;
|
||||
nir_print_shader(nir, stderr);
|
||||
fprintf(stderr, "Native code for %s %s shader %s from disk cache:\n",
|
||||
nir->info.label ? nir->info.label : "unnamed",
|
||||
_mesa_shader_stage_to_string(nir->info.stage), nir->info.name);
|
||||
brw_disassemble_with_labels(&brw->screen->devinfo, program, 0,
|
||||
prog_data->program_size, stderr);
|
||||
}
|
||||
|
||||
brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage),
|
||||
program, prog_data->program_size, prog_data,
|
||||
brw_prog_data_size(stage), &stage_state->prog_offset,
|
||||
&stage_state->prog_data);
|
||||
|
||||
prog->program_written_to_cache = true;
|
||||
|
||||
ralloc_free(prog_data);
|
||||
free(buffer);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage stage)
|
||||
{
|
||||
struct disk_cache *cache = brw->ctx.Cache;
|
||||
if (cache == NULL)
|
||||
return false;
|
||||
|
||||
struct gl_program *prog = brw->ctx._Shader->CurrentProgram[stage];
|
||||
if (prog == NULL)
|
||||
return false;
|
||||
|
||||
if (prog->sh.data->spirv)
|
||||
return false;
|
||||
|
||||
if (brw->ctx._Shader->Flags & GLSL_CACHE_FALLBACK)
|
||||
goto fail;
|
||||
|
||||
if (!read_and_upload(brw, cache, prog, stage))
|
||||
goto fail;
|
||||
|
||||
if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
|
||||
fprintf(stderr, "read gen program from cache\n");
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
fail:
|
||||
prog->program_written_to_cache = false;
|
||||
if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
|
||||
fprintf(stderr, "falling back to nir %s.\n",
|
||||
_mesa_shader_stage_to_abbrev(prog->info.stage));
|
||||
}
|
||||
|
||||
brw_program_deserialize_driver_blob(&brw->ctx, prog, stage);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
write_program_data(struct brw_context *brw, struct gl_program *prog,
|
||||
void *key, struct brw_stage_prog_data *prog_data,
|
||||
uint32_t prog_offset, struct disk_cache *cache,
|
||||
gl_shader_stage stage)
|
||||
{
|
||||
struct blob binary;
|
||||
blob_init(&binary);
|
||||
|
||||
const void *program_map = brw->cache.map + prog_offset;
|
||||
/* TODO: Improve perf for non-LLC. It would be best to save it at program
|
||||
* generation time when the program is in normal memory accessible with
|
||||
* cache to the CPU. Another easier change would be to use
|
||||
* _mesa_streaming_load_memcpy to read from the program mapped memory. */
|
||||
brw_write_blob_program_data(&binary, stage, program_map, prog_data);
|
||||
|
||||
unsigned char sha1[20];
|
||||
char buf[41];
|
||||
intel_shader_sha1(prog, stage, key, sha1);
|
||||
_mesa_sha1_format(buf, sha1);
|
||||
if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
|
||||
fprintf(stderr, "putting binary in cache: %s\n", buf);
|
||||
}
|
||||
|
||||
disk_cache_put(cache, sha1, binary.data, binary.size, NULL);
|
||||
|
||||
prog->program_written_to_cache = true;
|
||||
blob_finish(&binary);
|
||||
}
|
||||
|
||||
void
|
||||
brw_disk_cache_write_render_programs(struct brw_context *brw)
|
||||
{
|
||||
struct disk_cache *cache = brw->ctx.Cache;
|
||||
if (cache == NULL)
|
||||
return;
|
||||
|
||||
struct gl_program *prog;
|
||||
gl_shader_stage stage;
|
||||
for (stage = MESA_SHADER_VERTEX; stage <= MESA_SHADER_FRAGMENT; stage++) {
|
||||
prog = brw->ctx._Shader->CurrentProgram[stage];
|
||||
if (prog && prog->sh.data->spirv)
|
||||
return;
|
||||
}
|
||||
|
||||
prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_VERTEX];
|
||||
if (prog && !prog->program_written_to_cache) {
|
||||
struct brw_vs_prog_key vs_key;
|
||||
brw_vs_populate_key(brw, &vs_key);
|
||||
vs_key.base.program_string_id = 0;
|
||||
|
||||
write_program_data(brw, prog, &vs_key, brw->vs.base.prog_data,
|
||||
brw->vs.base.prog_offset, cache,
|
||||
MESA_SHADER_VERTEX);
|
||||
}
|
||||
|
||||
prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
|
||||
if (prog && !prog->program_written_to_cache) {
|
||||
struct brw_tcs_prog_key tcs_key;
|
||||
brw_tcs_populate_key(brw, &tcs_key);
|
||||
tcs_key.base.program_string_id = 0;
|
||||
|
||||
write_program_data(brw, prog, &tcs_key, brw->tcs.base.prog_data,
|
||||
brw->tcs.base.prog_offset, cache,
|
||||
MESA_SHADER_TESS_CTRL);
|
||||
}
|
||||
|
||||
prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
|
||||
if (prog && !prog->program_written_to_cache) {
|
||||
struct brw_tes_prog_key tes_key;
|
||||
brw_tes_populate_key(brw, &tes_key);
|
||||
tes_key.base.program_string_id = 0;
|
||||
|
||||
write_program_data(brw, prog, &tes_key, brw->tes.base.prog_data,
|
||||
brw->tes.base.prog_offset, cache,
|
||||
MESA_SHADER_TESS_EVAL);
|
||||
}
|
||||
|
||||
prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
|
||||
if (prog && !prog->program_written_to_cache) {
|
||||
struct brw_gs_prog_key gs_key;
|
||||
brw_gs_populate_key(brw, &gs_key);
|
||||
gs_key.base.program_string_id = 0;
|
||||
|
||||
write_program_data(brw, prog, &gs_key, brw->gs.base.prog_data,
|
||||
brw->gs.base.prog_offset, cache,
|
||||
MESA_SHADER_GEOMETRY);
|
||||
}
|
||||
|
||||
prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
|
||||
if (prog && !prog->program_written_to_cache) {
|
||||
struct brw_wm_prog_key wm_key;
|
||||
brw_wm_populate_key(brw, &wm_key);
|
||||
wm_key.base.program_string_id = 0;
|
||||
|
||||
write_program_data(brw, prog, &wm_key, brw->wm.base.prog_data,
|
||||
brw->wm.base.prog_offset, cache,
|
||||
MESA_SHADER_FRAGMENT);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_disk_cache_write_compute_program(struct brw_context *brw)
|
||||
{
|
||||
struct disk_cache *cache = brw->ctx.Cache;
|
||||
if (cache == NULL)
|
||||
return;
|
||||
|
||||
struct gl_program *prog =
|
||||
brw->ctx._Shader->CurrentProgram[MESA_SHADER_COMPUTE];
|
||||
|
||||
if (prog && prog->sh.data->spirv)
|
||||
return;
|
||||
|
||||
if (prog && !prog->program_written_to_cache) {
|
||||
struct brw_cs_prog_key cs_key;
|
||||
brw_cs_populate_key(brw, &cs_key);
|
||||
cs_key.base.program_string_id = 0;
|
||||
|
||||
write_program_data(brw, prog, &cs_key, brw->cs.base.prog_data,
|
||||
brw->cs.base.prog_offset, cache,
|
||||
MESA_SHADER_COMPUTE);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_disk_cache_init(struct brw_screen *screen)
|
||||
{
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
if (INTEL_DEBUG(DEBUG_DISK_CACHE_DISABLE_MASK))
|
||||
return;
|
||||
|
||||
/* array length: print length + null char + 1 extra to verify it is unused */
|
||||
char renderer[11];
|
||||
ASSERTED int len = snprintf(renderer, sizeof(renderer), "i965_%04x",
|
||||
screen->deviceID);
|
||||
assert(len == sizeof(renderer) - 2);
|
||||
|
||||
const struct build_id_note *note =
|
||||
build_id_find_nhdr_for_addr(brw_disk_cache_init);
|
||||
assert(note && build_id_length(note) == 20 /* sha1 */);
|
||||
|
||||
const uint8_t *id_sha1 = build_id_data(note);
|
||||
assert(id_sha1);
|
||||
|
||||
char timestamp[41];
|
||||
_mesa_sha1_format(timestamp, id_sha1);
|
||||
|
||||
const uint64_t driver_flags =
|
||||
brw_get_compiler_config_value(screen->compiler);
|
||||
screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
|
||||
#endif
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,87 +0,0 @@
|
|||
/*
|
||||
* Copyright 2005 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_DRAW_H
|
||||
#define BRW_DRAW_H
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "brw_bufmgr.h"
|
||||
|
||||
struct brw_context;
|
||||
|
||||
uint32_t *
|
||||
brw_emit_vertex_buffer_state(struct brw_context *brw,
|
||||
unsigned buffer_nr,
|
||||
struct brw_bo *bo,
|
||||
unsigned start_offset,
|
||||
unsigned end_offset,
|
||||
unsigned stride,
|
||||
unsigned step_rate,
|
||||
uint32_t *__map);
|
||||
|
||||
#define EMIT_VERTEX_BUFFER_STATE(...) __map = \
|
||||
brw_emit_vertex_buffer_state(__VA_ARGS__, __map)
|
||||
|
||||
void brw_draw_prims(struct gl_context *ctx,
|
||||
const struct _mesa_prim *prims,
|
||||
unsigned nr_prims,
|
||||
const struct _mesa_index_buffer *ib,
|
||||
bool index_bounds_valid,
|
||||
bool primitive_restart,
|
||||
unsigned restart_index,
|
||||
unsigned min_index,
|
||||
unsigned max_index,
|
||||
unsigned num_instances,
|
||||
unsigned base_instance);
|
||||
|
||||
void brw_init_draw_functions(struct dd_function_table *functions);
|
||||
void brw_draw_init( struct brw_context *brw );
|
||||
void brw_draw_destroy( struct brw_context *brw );
|
||||
|
||||
void brw_prepare_shader_draw_parameters(struct brw_context *);
|
||||
|
||||
/* brw_primitive_restart.c */
|
||||
GLboolean
|
||||
brw_handle_primitive_restart(struct gl_context *ctx,
|
||||
const struct _mesa_prim *prims,
|
||||
GLuint nr_prims,
|
||||
const struct _mesa_index_buffer *ib,
|
||||
GLuint num_instances, GLuint base_instance,
|
||||
bool primitive_restart,
|
||||
unsigned restart_index);
|
||||
|
||||
void
|
||||
brw_draw_indirect_prims(struct gl_context *ctx,
|
||||
GLuint mode,
|
||||
struct gl_buffer_object *indirect_data,
|
||||
GLsizeiptr indirect_offset,
|
||||
unsigned draw_count,
|
||||
unsigned stride,
|
||||
struct gl_buffer_object *indirect_params,
|
||||
GLsizeiptr indirect_params_offset,
|
||||
const struct _mesa_index_buffer *ib,
|
||||
bool primitive_restart,
|
||||
unsigned restart_index);
|
||||
#endif
|
||||
|
|
@ -1,801 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/arrayobj.h"
|
||||
#include "main/bufferobj.h"
|
||||
#include "main/context.h"
|
||||
#include "main/enums.h"
|
||||
#include "main/macros.h"
|
||||
#include "main/glformats.h"
|
||||
#include "nir.h"
|
||||
|
||||
#include "brw_draw.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
|
||||
#include "brw_batch.h"
|
||||
#include "brw_buffer_objects.h"
|
||||
|
||||
static const GLuint double_types_float[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R64_FLOAT,
|
||||
ISL_FORMAT_R64G64_FLOAT,
|
||||
ISL_FORMAT_R64G64B64_FLOAT,
|
||||
ISL_FORMAT_R64G64B64A64_FLOAT
|
||||
};
|
||||
|
||||
static const GLuint double_types_passthru[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R64_PASSTHRU,
|
||||
ISL_FORMAT_R64G64_PASSTHRU,
|
||||
ISL_FORMAT_R64G64B64_PASSTHRU,
|
||||
ISL_FORMAT_R64G64B64A64_PASSTHRU
|
||||
};
|
||||
|
||||
static const GLuint float_types[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R32_FLOAT,
|
||||
ISL_FORMAT_R32G32_FLOAT,
|
||||
ISL_FORMAT_R32G32B32_FLOAT,
|
||||
ISL_FORMAT_R32G32B32A32_FLOAT
|
||||
};
|
||||
|
||||
static const GLuint half_float_types[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R16_FLOAT,
|
||||
ISL_FORMAT_R16G16_FLOAT,
|
||||
ISL_FORMAT_R16G16B16_FLOAT,
|
||||
ISL_FORMAT_R16G16B16A16_FLOAT
|
||||
};
|
||||
|
||||
static const GLuint fixed_point_types[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R32_SFIXED,
|
||||
ISL_FORMAT_R32G32_SFIXED,
|
||||
ISL_FORMAT_R32G32B32_SFIXED,
|
||||
ISL_FORMAT_R32G32B32A32_SFIXED,
|
||||
};
|
||||
|
||||
static const GLuint uint_types_direct[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R32_UINT,
|
||||
ISL_FORMAT_R32G32_UINT,
|
||||
ISL_FORMAT_R32G32B32_UINT,
|
||||
ISL_FORMAT_R32G32B32A32_UINT
|
||||
};
|
||||
|
||||
static const GLuint uint_types_norm[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R32_UNORM,
|
||||
ISL_FORMAT_R32G32_UNORM,
|
||||
ISL_FORMAT_R32G32B32_UNORM,
|
||||
ISL_FORMAT_R32G32B32A32_UNORM
|
||||
};
|
||||
|
||||
static const GLuint uint_types_scale[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R32_USCALED,
|
||||
ISL_FORMAT_R32G32_USCALED,
|
||||
ISL_FORMAT_R32G32B32_USCALED,
|
||||
ISL_FORMAT_R32G32B32A32_USCALED
|
||||
};
|
||||
|
||||
static const GLuint int_types_direct[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R32_SINT,
|
||||
ISL_FORMAT_R32G32_SINT,
|
||||
ISL_FORMAT_R32G32B32_SINT,
|
||||
ISL_FORMAT_R32G32B32A32_SINT
|
||||
};
|
||||
|
||||
static const GLuint int_types_norm[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R32_SNORM,
|
||||
ISL_FORMAT_R32G32_SNORM,
|
||||
ISL_FORMAT_R32G32B32_SNORM,
|
||||
ISL_FORMAT_R32G32B32A32_SNORM
|
||||
};
|
||||
|
||||
static const GLuint int_types_scale[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R32_SSCALED,
|
||||
ISL_FORMAT_R32G32_SSCALED,
|
||||
ISL_FORMAT_R32G32B32_SSCALED,
|
||||
ISL_FORMAT_R32G32B32A32_SSCALED
|
||||
};
|
||||
|
||||
static const GLuint ushort_types_direct[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R16_UINT,
|
||||
ISL_FORMAT_R16G16_UINT,
|
||||
ISL_FORMAT_R16G16B16_UINT,
|
||||
ISL_FORMAT_R16G16B16A16_UINT
|
||||
};
|
||||
|
||||
static const GLuint ushort_types_norm[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R16_UNORM,
|
||||
ISL_FORMAT_R16G16_UNORM,
|
||||
ISL_FORMAT_R16G16B16_UNORM,
|
||||
ISL_FORMAT_R16G16B16A16_UNORM
|
||||
};
|
||||
|
||||
static const GLuint ushort_types_scale[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R16_USCALED,
|
||||
ISL_FORMAT_R16G16_USCALED,
|
||||
ISL_FORMAT_R16G16B16_USCALED,
|
||||
ISL_FORMAT_R16G16B16A16_USCALED
|
||||
};
|
||||
|
||||
static const GLuint short_types_direct[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R16_SINT,
|
||||
ISL_FORMAT_R16G16_SINT,
|
||||
ISL_FORMAT_R16G16B16_SINT,
|
||||
ISL_FORMAT_R16G16B16A16_SINT
|
||||
};
|
||||
|
||||
static const GLuint short_types_norm[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R16_SNORM,
|
||||
ISL_FORMAT_R16G16_SNORM,
|
||||
ISL_FORMAT_R16G16B16_SNORM,
|
||||
ISL_FORMAT_R16G16B16A16_SNORM
|
||||
};
|
||||
|
||||
static const GLuint short_types_scale[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R16_SSCALED,
|
||||
ISL_FORMAT_R16G16_SSCALED,
|
||||
ISL_FORMAT_R16G16B16_SSCALED,
|
||||
ISL_FORMAT_R16G16B16A16_SSCALED
|
||||
};
|
||||
|
||||
static const GLuint ubyte_types_direct[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R8_UINT,
|
||||
ISL_FORMAT_R8G8_UINT,
|
||||
ISL_FORMAT_R8G8B8_UINT,
|
||||
ISL_FORMAT_R8G8B8A8_UINT
|
||||
};
|
||||
|
||||
static const GLuint ubyte_types_norm[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R8_UNORM,
|
||||
ISL_FORMAT_R8G8_UNORM,
|
||||
ISL_FORMAT_R8G8B8_UNORM,
|
||||
ISL_FORMAT_R8G8B8A8_UNORM
|
||||
};
|
||||
|
||||
static const GLuint ubyte_types_scale[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R8_USCALED,
|
||||
ISL_FORMAT_R8G8_USCALED,
|
||||
ISL_FORMAT_R8G8B8_USCALED,
|
||||
ISL_FORMAT_R8G8B8A8_USCALED
|
||||
};
|
||||
|
||||
static const GLuint byte_types_direct[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R8_SINT,
|
||||
ISL_FORMAT_R8G8_SINT,
|
||||
ISL_FORMAT_R8G8B8_SINT,
|
||||
ISL_FORMAT_R8G8B8A8_SINT
|
||||
};
|
||||
|
||||
static const GLuint byte_types_norm[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R8_SNORM,
|
||||
ISL_FORMAT_R8G8_SNORM,
|
||||
ISL_FORMAT_R8G8B8_SNORM,
|
||||
ISL_FORMAT_R8G8B8A8_SNORM
|
||||
};
|
||||
|
||||
static const GLuint byte_types_scale[5] = {
|
||||
0,
|
||||
ISL_FORMAT_R8_SSCALED,
|
||||
ISL_FORMAT_R8G8_SSCALED,
|
||||
ISL_FORMAT_R8G8B8_SSCALED,
|
||||
ISL_FORMAT_R8G8B8A8_SSCALED
|
||||
};
|
||||
|
||||
static GLuint
|
||||
double_types(int size, GLboolean doubles)
|
||||
{
|
||||
/* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
|
||||
* "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
|
||||
* 64-bit components are stored in the URB without any conversion."
|
||||
* Also included on BDW PRM, Volume 7, page 470, table "Source Element
|
||||
* Formats Supported in VF Unit"
|
||||
*
|
||||
* Previous PRMs don't include those references, so for gfx7 we can't use
|
||||
* PASSTHRU formats directly. But in any case, we prefer to return passthru
|
||||
* even in that case, because that reflects what we want to achieve, even
|
||||
* if we would need to workaround on gen < 8.
|
||||
*/
|
||||
return (doubles
|
||||
? double_types_passthru[size]
|
||||
: double_types_float[size]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given vertex array type/size/format/normalized info, return
|
||||
* the appopriate hardware surface type.
|
||||
* Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
|
||||
*/
|
||||
unsigned
|
||||
brw_get_vertex_surface_type(struct brw_context *brw,
|
||||
const struct gl_vertex_format *glformat)
|
||||
{
|
||||
int size = glformat->Size;
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
const bool is_ivybridge_or_older =
|
||||
devinfo->verx10 < 70 || devinfo->platform == INTEL_PLATFORM_IVB;
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_VERTS))
|
||||
fprintf(stderr, "type %s size %d normalized %d\n",
|
||||
_mesa_enum_to_string(glformat->Type),
|
||||
glformat->Size, glformat->Normalized);
|
||||
|
||||
if (glformat->Integer) {
|
||||
assert(glformat->Format == GL_RGBA); /* sanity check */
|
||||
switch (glformat->Type) {
|
||||
case GL_INT: return int_types_direct[size];
|
||||
case GL_SHORT:
|
||||
if (is_ivybridge_or_older && size == 3)
|
||||
return short_types_direct[4];
|
||||
else
|
||||
return short_types_direct[size];
|
||||
case GL_BYTE:
|
||||
if (is_ivybridge_or_older && size == 3)
|
||||
return byte_types_direct[4];
|
||||
else
|
||||
return byte_types_direct[size];
|
||||
case GL_UNSIGNED_INT: return uint_types_direct[size];
|
||||
case GL_UNSIGNED_SHORT:
|
||||
if (is_ivybridge_or_older && size == 3)
|
||||
return ushort_types_direct[4];
|
||||
else
|
||||
return ushort_types_direct[size];
|
||||
case GL_UNSIGNED_BYTE:
|
||||
if (is_ivybridge_or_older && size == 3)
|
||||
return ubyte_types_direct[4];
|
||||
else
|
||||
return ubyte_types_direct[size];
|
||||
default: unreachable("not reached");
|
||||
}
|
||||
} else if (glformat->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) {
|
||||
return ISL_FORMAT_R11G11B10_FLOAT;
|
||||
} else if (glformat->Normalized) {
|
||||
switch (glformat->Type) {
|
||||
case GL_DOUBLE: return double_types(size, glformat->Doubles);
|
||||
case GL_FLOAT: return float_types[size];
|
||||
case GL_HALF_FLOAT:
|
||||
case GL_HALF_FLOAT_OES:
|
||||
if (devinfo->ver < 6 && size == 3)
|
||||
return half_float_types[4];
|
||||
else
|
||||
return half_float_types[size];
|
||||
case GL_INT: return int_types_norm[size];
|
||||
case GL_SHORT: return short_types_norm[size];
|
||||
case GL_BYTE: return byte_types_norm[size];
|
||||
case GL_UNSIGNED_INT: return uint_types_norm[size];
|
||||
case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
|
||||
case GL_UNSIGNED_BYTE:
|
||||
if (glformat->Format == GL_BGRA) {
|
||||
/* See GL_EXT_vertex_array_bgra */
|
||||
assert(size == 4);
|
||||
return ISL_FORMAT_B8G8R8A8_UNORM;
|
||||
}
|
||||
else {
|
||||
return ubyte_types_norm[size];
|
||||
}
|
||||
case GL_FIXED:
|
||||
if (devinfo->verx10 >= 75)
|
||||
return fixed_point_types[size];
|
||||
|
||||
/* This produces GL_FIXED inputs as values between INT32_MIN and
|
||||
* INT32_MAX, which will be scaled down by 1/65536 by the VS.
|
||||
*/
|
||||
return int_types_scale[size];
|
||||
/* See GL_ARB_vertex_type_2_10_10_10_rev.
|
||||
* W/A: Pre-Haswell, the hardware doesn't really support the formats we'd
|
||||
* like to use here, so upload everything as UINT and fix
|
||||
* it in the shader
|
||||
*/
|
||||
case GL_INT_2_10_10_10_REV:
|
||||
assert(size == 4);
|
||||
if (devinfo->verx10 >= 75) {
|
||||
return glformat->Format == GL_BGRA
|
||||
? ISL_FORMAT_B10G10R10A2_SNORM
|
||||
: ISL_FORMAT_R10G10B10A2_SNORM;
|
||||
}
|
||||
return ISL_FORMAT_R10G10B10A2_UINT;
|
||||
case GL_UNSIGNED_INT_2_10_10_10_REV:
|
||||
assert(size == 4);
|
||||
if (devinfo->verx10 >= 75) {
|
||||
return glformat->Format == GL_BGRA
|
||||
? ISL_FORMAT_B10G10R10A2_UNORM
|
||||
: ISL_FORMAT_R10G10B10A2_UNORM;
|
||||
}
|
||||
return ISL_FORMAT_R10G10B10A2_UINT;
|
||||
default: unreachable("not reached");
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* See GL_ARB_vertex_type_2_10_10_10_rev.
|
||||
* W/A: the hardware doesn't really support the formats we'd
|
||||
* like to use here, so upload everything as UINT and fix
|
||||
* it in the shader
|
||||
*/
|
||||
if (glformat->Type == GL_INT_2_10_10_10_REV) {
|
||||
assert(size == 4);
|
||||
if (devinfo->verx10 >= 75) {
|
||||
return glformat->Format == GL_BGRA
|
||||
? ISL_FORMAT_B10G10R10A2_SSCALED
|
||||
: ISL_FORMAT_R10G10B10A2_SSCALED;
|
||||
}
|
||||
return ISL_FORMAT_R10G10B10A2_UINT;
|
||||
} else if (glformat->Type == GL_UNSIGNED_INT_2_10_10_10_REV) {
|
||||
assert(size == 4);
|
||||
if (devinfo->verx10 >= 75) {
|
||||
return glformat->Format == GL_BGRA
|
||||
? ISL_FORMAT_B10G10R10A2_USCALED
|
||||
: ISL_FORMAT_R10G10B10A2_USCALED;
|
||||
}
|
||||
return ISL_FORMAT_R10G10B10A2_UINT;
|
||||
}
|
||||
assert(glformat->Format == GL_RGBA); /* sanity check */
|
||||
switch (glformat->Type) {
|
||||
case GL_DOUBLE: return double_types(size, glformat->Doubles);
|
||||
case GL_FLOAT: return float_types[size];
|
||||
case GL_HALF_FLOAT:
|
||||
case GL_HALF_FLOAT_OES:
|
||||
if (devinfo->ver < 6 && size == 3)
|
||||
return half_float_types[4];
|
||||
else
|
||||
return half_float_types[size];
|
||||
case GL_INT: return int_types_scale[size];
|
||||
case GL_SHORT: return short_types_scale[size];
|
||||
case GL_BYTE: return byte_types_scale[size];
|
||||
case GL_UNSIGNED_INT: return uint_types_scale[size];
|
||||
case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
|
||||
case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
|
||||
case GL_FIXED:
|
||||
if (devinfo->verx10 >= 75)
|
||||
return fixed_point_types[size];
|
||||
|
||||
/* This produces GL_FIXED inputs as values between INT32_MIN and
|
||||
* INT32_MAX, which will be scaled down by 1/65536 by the VS.
|
||||
*/
|
||||
return int_types_scale[size];
|
||||
default: unreachable("not reached");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
copy_array_to_vbo_array(struct brw_context *brw,
|
||||
const uint8_t *const ptr, const int src_stride,
|
||||
int min, int max,
|
||||
struct brw_vertex_buffer *buffer,
|
||||
GLuint dst_stride)
|
||||
{
|
||||
const unsigned char *src = ptr + min * src_stride;
|
||||
int count = max - min + 1;
|
||||
GLuint size = count * dst_stride;
|
||||
uint8_t *dst = brw_upload_space(&brw->upload, size, dst_stride,
|
||||
&buffer->bo, &buffer->offset);
|
||||
|
||||
/* The GL 4.5 spec says:
|
||||
* "If any enabled array’s buffer binding is zero when DrawArrays or
|
||||
* one of the other drawing commands defined in section 10.4 is called,
|
||||
* the result is undefined."
|
||||
*
|
||||
* In this case, let's the dst with undefined values
|
||||
*/
|
||||
if (ptr != NULL) {
|
||||
if (dst_stride == src_stride) {
|
||||
memcpy(dst, src, size);
|
||||
} else {
|
||||
while (count--) {
|
||||
memcpy(dst, src, dst_stride);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
buffer->stride = dst_stride;
|
||||
buffer->size = size;
|
||||
}
|
||||
|
||||
void
|
||||
brw_prepare_vertices(struct brw_context *brw)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_VERTEX_PROGRAM */
|
||||
const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
|
||||
/* BRW_NEW_VS_PROG_DATA */
|
||||
const struct brw_vs_prog_data *vs_prog_data =
|
||||
brw_vs_prog_data(brw->vs.base.prog_data);
|
||||
const uint64_t vs_inputs64 =
|
||||
nir_get_single_slot_attribs_mask(vs_prog_data->inputs_read,
|
||||
vp->DualSlotInputs);
|
||||
assert((vs_inputs64 & ~(uint64_t)VERT_BIT_ALL) == 0);
|
||||
unsigned vs_inputs = (unsigned)vs_inputs64;
|
||||
unsigned int min_index = brw->vb.min_index + brw->basevertex;
|
||||
unsigned int max_index = brw->vb.max_index + brw->basevertex;
|
||||
int delta, j;
|
||||
|
||||
/* _NEW_POLYGON
|
||||
*
|
||||
* On gfx6+, edge flags don't end up in the VUE (either in or out of the
|
||||
* VS). Instead, they're uploaded as the last vertex element, and the data
|
||||
* is passed sideband through the fixed function units. So, we need to
|
||||
* prepare the vertex buffer for it, but it's not present in inputs_read.
|
||||
*/
|
||||
if (devinfo->ver >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
|
||||
ctx->Polygon.BackMode != GL_FILL)) {
|
||||
vs_inputs |= VERT_BIT_EDGEFLAG;
|
||||
}
|
||||
|
||||
if (0)
|
||||
fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index);
|
||||
|
||||
/* Accumulate the list of enabled arrays. */
|
||||
brw->vb.nr_enabled = 0;
|
||||
|
||||
unsigned mask = vs_inputs;
|
||||
while (mask) {
|
||||
const gl_vert_attrib attr = u_bit_scan(&mask);
|
||||
struct brw_vertex_element *input = &brw->vb.inputs[attr];
|
||||
brw->vb.enabled[brw->vb.nr_enabled++] = input;
|
||||
}
|
||||
assert(brw->vb.nr_enabled <= VERT_ATTRIB_MAX);
|
||||
|
||||
if (brw->vb.nr_enabled == 0)
|
||||
return;
|
||||
|
||||
if (brw->vb.nr_buffers)
|
||||
return;
|
||||
|
||||
j = 0;
|
||||
const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
|
||||
|
||||
unsigned vbomask = vs_inputs & _mesa_draw_vbo_array_bits(ctx);
|
||||
while (vbomask) {
|
||||
const struct gl_vertex_buffer_binding *const glbinding =
|
||||
_mesa_draw_buffer_binding(vao, ffs(vbomask) - 1);
|
||||
const GLsizei stride = glbinding->Stride;
|
||||
|
||||
assert(glbinding->BufferObj);
|
||||
|
||||
/* Accumulate the range of a single vertex, start with inverted range */
|
||||
uint32_t vertex_range_start = ~(uint32_t)0;
|
||||
uint32_t vertex_range_end = 0;
|
||||
|
||||
const unsigned boundmask = _mesa_draw_bound_attrib_bits(glbinding);
|
||||
unsigned attrmask = vbomask & boundmask;
|
||||
/* Mark the those attributes as processed */
|
||||
vbomask ^= attrmask;
|
||||
/* We can assume that we have an array for the binding */
|
||||
assert(attrmask);
|
||||
/* Walk attributes belonging to the binding */
|
||||
while (attrmask) {
|
||||
const gl_vert_attrib attr = u_bit_scan(&attrmask);
|
||||
const struct gl_array_attributes *const glattrib =
|
||||
_mesa_draw_array_attrib(vao, attr);
|
||||
const uint32_t rel_offset =
|
||||
_mesa_draw_attributes_relative_offset(glattrib);
|
||||
const uint32_t rel_end = rel_offset + glattrib->Format._ElementSize;
|
||||
|
||||
vertex_range_start = MIN2(vertex_range_start, rel_offset);
|
||||
vertex_range_end = MAX2(vertex_range_end, rel_end);
|
||||
|
||||
struct brw_vertex_element *input = &brw->vb.inputs[attr];
|
||||
input->glformat = &glattrib->Format;
|
||||
input->buffer = j;
|
||||
input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
|
||||
input->offset = rel_offset;
|
||||
}
|
||||
assert(vertex_range_start <= vertex_range_end);
|
||||
|
||||
struct brw_buffer_object *intel_buffer =
|
||||
brw_buffer_object(glbinding->BufferObj);
|
||||
struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
|
||||
|
||||
const uint32_t offset = _mesa_draw_binding_offset(glbinding);
|
||||
|
||||
/* If nothing else is known take the buffer size and offset as a bound */
|
||||
uint32_t start = vertex_range_start;
|
||||
uint32_t range = intel_buffer->Base.Size - offset - vertex_range_start;
|
||||
/* Check if we can get a more narrow range */
|
||||
if (glbinding->InstanceDivisor) {
|
||||
if (brw->num_instances) {
|
||||
const uint32_t vertex_size = vertex_range_end - vertex_range_start;
|
||||
start = vertex_range_start + stride * brw->baseinstance;
|
||||
range = (stride * ((brw->num_instances - 1) /
|
||||
glbinding->InstanceDivisor) +
|
||||
vertex_size);
|
||||
}
|
||||
} else {
|
||||
if (brw->vb.index_bounds_valid) {
|
||||
const uint32_t vertex_size = vertex_range_end - vertex_range_start;
|
||||
start = vertex_range_start + stride * min_index;
|
||||
range = (stride * (max_index - min_index) +
|
||||
vertex_size);
|
||||
|
||||
/**
|
||||
* Unreal Engine 4 has a bug in usage of glDrawRangeElements,
|
||||
* causing it to be called with a number of vertices in place
|
||||
* of "end" parameter (which specifies the maximum array index
|
||||
* contained in indices).
|
||||
*
|
||||
* Since there is unknown amount of games affected and we
|
||||
* could not identify that a game is built with UE4 - we are
|
||||
* forced to make a blanket workaround, disregarding max_index
|
||||
* in range calculations. Fortunately all such calls look like:
|
||||
* glDrawRangeElements(GL_TRIANGLES, 0, 3, 3, ...);
|
||||
* So we are able to narrow down this workaround.
|
||||
*
|
||||
* See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2917
|
||||
*/
|
||||
if (unlikely(max_index == 3 && min_index == 0 &&
|
||||
brw->draw.derived_params.is_indexed_draw)) {
|
||||
range = intel_buffer->Base.Size - offset - start;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buffer->offset = offset;
|
||||
buffer->size = start + range;
|
||||
buffer->stride = stride;
|
||||
buffer->step_rate = glbinding->InstanceDivisor;
|
||||
|
||||
buffer->bo = brw_bufferobj_buffer(brw, intel_buffer, offset + start,
|
||||
range, false);
|
||||
brw_bo_reference(buffer->bo);
|
||||
|
||||
j++;
|
||||
}
|
||||
|
||||
/* If we need to upload all the arrays, then we can trim those arrays to
|
||||
* only the used elements [min_index, max_index] so long as we adjust all
|
||||
* the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
|
||||
*/
|
||||
brw->vb.start_vertex_bias = 0;
|
||||
delta = min_index;
|
||||
if ((vs_inputs & _mesa_draw_vbo_array_bits(ctx)) == 0) {
|
||||
brw->vb.start_vertex_bias = -delta;
|
||||
delta = 0;
|
||||
}
|
||||
|
||||
unsigned usermask = vs_inputs & _mesa_draw_user_array_bits(ctx);
|
||||
while (usermask) {
|
||||
const struct gl_vertex_buffer_binding *const glbinding =
|
||||
_mesa_draw_buffer_binding(vao, ffs(usermask) - 1);
|
||||
const GLsizei stride = glbinding->Stride;
|
||||
|
||||
assert(!glbinding->BufferObj);
|
||||
assert(brw->vb.index_bounds_valid);
|
||||
|
||||
/* Accumulate the range of a single vertex, start with inverted range */
|
||||
uint32_t vertex_range_start = ~(uint32_t)0;
|
||||
uint32_t vertex_range_end = 0;
|
||||
|
||||
const unsigned boundmask = _mesa_draw_bound_attrib_bits(glbinding);
|
||||
unsigned attrmask = usermask & boundmask;
|
||||
/* Mark the those attributes as processed */
|
||||
usermask ^= attrmask;
|
||||
/* We can assume that we have an array for the binding */
|
||||
assert(attrmask);
|
||||
/* Walk attributes belonging to the binding */
|
||||
while (attrmask) {
|
||||
const gl_vert_attrib attr = u_bit_scan(&attrmask);
|
||||
const struct gl_array_attributes *const glattrib =
|
||||
_mesa_draw_array_attrib(vao, attr);
|
||||
const uint32_t rel_offset =
|
||||
_mesa_draw_attributes_relative_offset(glattrib);
|
||||
const uint32_t rel_end = rel_offset + glattrib->Format._ElementSize;
|
||||
|
||||
vertex_range_start = MIN2(vertex_range_start, rel_offset);
|
||||
vertex_range_end = MAX2(vertex_range_end, rel_end);
|
||||
|
||||
struct brw_vertex_element *input = &brw->vb.inputs[attr];
|
||||
input->glformat = &glattrib->Format;
|
||||
input->buffer = j;
|
||||
input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
|
||||
input->offset = rel_offset;
|
||||
}
|
||||
assert(vertex_range_start <= vertex_range_end);
|
||||
|
||||
struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
|
||||
|
||||
const uint8_t *ptr = (const uint8_t*)_mesa_draw_binding_offset(glbinding);
|
||||
ptr += vertex_range_start;
|
||||
const uint32_t vertex_size = vertex_range_end - vertex_range_start;
|
||||
if (glbinding->Stride == 0) {
|
||||
/* If the source stride is zero, we just want to upload the current
|
||||
* attribute once and set the buffer's stride to 0. There's no need
|
||||
* to replicate it out.
|
||||
*/
|
||||
copy_array_to_vbo_array(brw, ptr, 0, 0, 0, buffer, vertex_size);
|
||||
} else if (glbinding->InstanceDivisor == 0) {
|
||||
copy_array_to_vbo_array(brw, ptr, stride, min_index,
|
||||
max_index, buffer, vertex_size);
|
||||
} else {
|
||||
/* This is an instanced attribute, since its InstanceDivisor
|
||||
* is not zero. Therefore, its data will be stepped after the
|
||||
* instanced draw has been run InstanceDivisor times.
|
||||
*/
|
||||
uint32_t instanced_attr_max_index =
|
||||
(brw->num_instances - 1) / glbinding->InstanceDivisor;
|
||||
copy_array_to_vbo_array(brw, ptr, stride, 0,
|
||||
instanced_attr_max_index, buffer, vertex_size);
|
||||
}
|
||||
buffer->offset -= delta * buffer->stride + vertex_range_start;
|
||||
buffer->size += delta * buffer->stride + vertex_range_start;
|
||||
buffer->step_rate = glbinding->InstanceDivisor;
|
||||
|
||||
j++;
|
||||
}
|
||||
|
||||
/* Upload the current values */
|
||||
unsigned curmask = vs_inputs & _mesa_draw_current_bits(ctx);
|
||||
if (curmask) {
|
||||
/* For each attribute, upload the maximum possible size. */
|
||||
uint8_t data[VERT_ATTRIB_MAX * sizeof(GLdouble) * 4];
|
||||
uint8_t *cursor = data;
|
||||
|
||||
do {
|
||||
const gl_vert_attrib attr = u_bit_scan(&curmask);
|
||||
const struct gl_array_attributes *const glattrib =
|
||||
_mesa_draw_current_attrib(ctx, attr);
|
||||
const unsigned size = glattrib->Format._ElementSize;
|
||||
const unsigned alignment = align(size, sizeof(GLdouble));
|
||||
memcpy(cursor, glattrib->Ptr, size);
|
||||
if (alignment != size)
|
||||
memset(cursor + size, 0, alignment - size);
|
||||
|
||||
struct brw_vertex_element *input = &brw->vb.inputs[attr];
|
||||
input->glformat = &glattrib->Format;
|
||||
input->buffer = j;
|
||||
input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
|
||||
input->offset = cursor - data;
|
||||
|
||||
cursor += alignment;
|
||||
} while (curmask);
|
||||
|
||||
struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
|
||||
const unsigned size = cursor - data;
|
||||
brw_upload_data(&brw->upload, data, size, size,
|
||||
&buffer->bo, &buffer->offset);
|
||||
buffer->stride = 0;
|
||||
buffer->size = size;
|
||||
buffer->step_rate = 0;
|
||||
|
||||
j++;
|
||||
}
|
||||
brw->vb.nr_buffers = j;
|
||||
}
|
||||
|
||||
void
|
||||
brw_prepare_shader_draw_parameters(struct brw_context *brw)
|
||||
{
|
||||
const struct brw_vs_prog_data *vs_prog_data =
|
||||
brw_vs_prog_data(brw->vs.base.prog_data);
|
||||
|
||||
/* For non-indirect draws, upload the shader draw parameters */
|
||||
if ((vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) &&
|
||||
brw->draw.draw_params_bo == NULL) {
|
||||
brw_upload_data(&brw->upload,
|
||||
&brw->draw.params, sizeof(brw->draw.params), 4,
|
||||
&brw->draw.draw_params_bo,
|
||||
&brw->draw.draw_params_offset);
|
||||
}
|
||||
|
||||
if (vs_prog_data->uses_drawid || vs_prog_data->uses_is_indexed_draw) {
|
||||
brw_upload_data(&brw->upload,
|
||||
&brw->draw.derived_params, sizeof(brw->draw.derived_params), 4,
|
||||
&brw->draw.derived_draw_params_bo,
|
||||
&brw->draw.derived_draw_params_offset);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
brw_upload_indices(struct brw_context *brw)
|
||||
{
|
||||
const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
|
||||
GLuint ib_size;
|
||||
struct brw_bo *old_bo = brw->ib.bo;
|
||||
struct gl_buffer_object *bufferobj;
|
||||
GLuint offset;
|
||||
GLuint ib_type_size;
|
||||
|
||||
if (index_buffer == NULL)
|
||||
return;
|
||||
|
||||
ib_type_size = 1 << index_buffer->index_size_shift;
|
||||
ib_size = index_buffer->count ? ib_type_size * index_buffer->count :
|
||||
index_buffer->obj->Size;
|
||||
bufferobj = index_buffer->obj;
|
||||
|
||||
/* Turn into a proper VBO:
|
||||
*/
|
||||
if (!bufferobj) {
|
||||
/* Get new bufferobj, offset:
|
||||
*/
|
||||
brw_upload_data(&brw->upload, index_buffer->ptr, ib_size, ib_type_size,
|
||||
&brw->ib.bo, &offset);
|
||||
brw->ib.size = brw->ib.bo->size;
|
||||
} else {
|
||||
offset = (GLuint) (unsigned long) index_buffer->ptr;
|
||||
|
||||
struct brw_bo *bo =
|
||||
brw_bufferobj_buffer(brw, brw_buffer_object(bufferobj),
|
||||
offset, ib_size, false);
|
||||
if (bo != brw->ib.bo) {
|
||||
brw_bo_unreference(brw->ib.bo);
|
||||
brw->ib.bo = bo;
|
||||
brw->ib.size = bufferobj->Size;
|
||||
brw_bo_reference(bo);
|
||||
}
|
||||
}
|
||||
|
||||
/* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading
|
||||
* the index buffer state when we're just moving the start index
|
||||
* of our drawing.
|
||||
*/
|
||||
brw->ib.start_vertex_offset = offset / ib_type_size;
|
||||
|
||||
if (brw->ib.bo != old_bo)
|
||||
brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
|
||||
|
||||
unsigned index_size = 1 << index_buffer->index_size_shift;
|
||||
if (index_size != brw->ib.index_size) {
|
||||
brw->ib.index_size = index_size;
|
||||
brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
|
||||
}
|
||||
|
||||
/* We need to re-emit an index buffer state each time
|
||||
* when cut index flag is changed
|
||||
*/
|
||||
if (brw->prim_restart.enable_cut_index != brw->ib.enable_cut_index) {
|
||||
brw->ib.enable_cut_index = brw->prim_restart.enable_cut_index;
|
||||
brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
|
||||
}
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_indices = {
|
||||
.dirty = {
|
||||
.mesa = 0,
|
||||
.brw = BRW_NEW_BLORP |
|
||||
BRW_NEW_INDICES,
|
||||
},
|
||||
.emit = brw_upload_indices,
|
||||
};
|
||||
|
|
@ -1,404 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/version.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_batch.h"
|
||||
|
||||
/**
|
||||
* Initializes potential list of extensions if ctx == NULL, or actually enables
|
||||
* extensions for a context.
|
||||
*/
|
||||
void
|
||||
brw_init_extensions(struct gl_context *ctx)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
assert(devinfo->ver >= 4);
|
||||
|
||||
ctx->Extensions.ARB_arrays_of_arrays = true;
|
||||
ctx->Extensions.ARB_buffer_storage = true;
|
||||
ctx->Extensions.ARB_clear_texture = true;
|
||||
ctx->Extensions.ARB_clip_control = true;
|
||||
ctx->Extensions.ARB_copy_image = true;
|
||||
ctx->Extensions.ARB_depth_buffer_float = true;
|
||||
ctx->Extensions.ARB_depth_clamp = true;
|
||||
ctx->Extensions.ARB_depth_texture = true;
|
||||
ctx->Extensions.ARB_draw_elements_base_vertex = true;
|
||||
ctx->Extensions.ARB_draw_instanced = true;
|
||||
ctx->Extensions.ARB_ES2_compatibility = true;
|
||||
ctx->Extensions.ARB_explicit_attrib_location = true;
|
||||
ctx->Extensions.ARB_explicit_uniform_location = true;
|
||||
ctx->Extensions.ARB_fragment_coord_conventions = true;
|
||||
ctx->Extensions.ARB_fragment_program = true;
|
||||
ctx->Extensions.ARB_fragment_program_shadow = true;
|
||||
ctx->Extensions.ARB_fragment_shader = true;
|
||||
ctx->Extensions.ARB_framebuffer_object = true;
|
||||
ctx->Extensions.ARB_half_float_vertex = true;
|
||||
ctx->Extensions.ARB_instanced_arrays = true;
|
||||
ctx->Extensions.ARB_internalformat_query = true;
|
||||
ctx->Extensions.ARB_internalformat_query2 = true;
|
||||
ctx->Extensions.ARB_map_buffer_range = true;
|
||||
ctx->Extensions.ARB_occlusion_query = true;
|
||||
ctx->Extensions.ARB_occlusion_query2 = true;
|
||||
ctx->Extensions.ARB_point_sprite = true;
|
||||
ctx->Extensions.ARB_polygon_offset_clamp = true;
|
||||
ctx->Extensions.ARB_seamless_cube_map = true;
|
||||
ctx->Extensions.ARB_shader_bit_encoding = true;
|
||||
ctx->Extensions.ARB_shader_draw_parameters = true;
|
||||
ctx->Extensions.ARB_shader_group_vote = true;
|
||||
ctx->Extensions.ARB_shader_texture_lod = true;
|
||||
ctx->Extensions.ARB_shading_language_packing = true;
|
||||
ctx->Extensions.ARB_shadow = true;
|
||||
ctx->Extensions.ARB_sync = true;
|
||||
ctx->Extensions.ARB_texture_border_clamp = true;
|
||||
ctx->Extensions.ARB_texture_compression_rgtc = true;
|
||||
ctx->Extensions.ARB_texture_cube_map = true;
|
||||
ctx->Extensions.ARB_texture_env_combine = true;
|
||||
ctx->Extensions.ARB_texture_env_crossbar = true;
|
||||
ctx->Extensions.ARB_texture_env_dot3 = true;
|
||||
ctx->Extensions.ARB_texture_filter_anisotropic = true;
|
||||
ctx->Extensions.ARB_texture_float = true;
|
||||
ctx->Extensions.ARB_texture_mirror_clamp_to_edge = true;
|
||||
ctx->Extensions.ARB_texture_non_power_of_two = true;
|
||||
ctx->Extensions.ARB_texture_rg = true;
|
||||
ctx->Extensions.ARB_texture_rgb10_a2ui = true;
|
||||
ctx->Extensions.ARB_vertex_program = true;
|
||||
ctx->Extensions.ARB_vertex_shader = true;
|
||||
ctx->Extensions.ARB_vertex_type_2_10_10_10_rev = true;
|
||||
ctx->Extensions.ARB_vertex_type_10f_11f_11f_rev = true;
|
||||
ctx->Extensions.EXT_blend_color = true;
|
||||
ctx->Extensions.EXT_blend_equation_separate = true;
|
||||
ctx->Extensions.EXT_blend_func_separate = true;
|
||||
ctx->Extensions.EXT_blend_minmax = true;
|
||||
ctx->Extensions.EXT_color_buffer_half_float = true;
|
||||
ctx->Extensions.EXT_draw_buffers2 = true;
|
||||
ctx->Extensions.EXT_EGL_image_storage = true;
|
||||
ctx->Extensions.EXT_float_blend = true;
|
||||
ctx->Extensions.EXT_framebuffer_sRGB = true;
|
||||
ctx->Extensions.EXT_gpu_program_parameters = true;
|
||||
ctx->Extensions.EXT_packed_float = true;
|
||||
ctx->Extensions.EXT_pixel_buffer_object = true;
|
||||
ctx->Extensions.EXT_point_parameters = true;
|
||||
ctx->Extensions.EXT_provoking_vertex = true;
|
||||
ctx->Extensions.EXT_render_snorm = true;
|
||||
ctx->Extensions.EXT_sRGB = true;
|
||||
ctx->Extensions.EXT_stencil_two_side = true;
|
||||
ctx->Extensions.EXT_texture_array = true;
|
||||
ctx->Extensions.EXT_texture_env_dot3 = true;
|
||||
ctx->Extensions.EXT_texture_filter_anisotropic = true;
|
||||
ctx->Extensions.EXT_texture_integer = true;
|
||||
ctx->Extensions.EXT_texture_norm16 = true;
|
||||
ctx->Extensions.EXT_texture_shared_exponent = true;
|
||||
ctx->Extensions.EXT_texture_snorm = true;
|
||||
ctx->Extensions.EXT_texture_sRGB = true;
|
||||
ctx->Extensions.EXT_texture_sRGB_decode = true;
|
||||
ctx->Extensions.EXT_texture_sRGB_R8 = true;
|
||||
ctx->Extensions.EXT_texture_swizzle = true;
|
||||
ctx->Extensions.EXT_texture_type_2_10_10_10_REV = true;
|
||||
ctx->Extensions.EXT_vertex_array_bgra = true;
|
||||
ctx->Extensions.KHR_robustness = true;
|
||||
ctx->Extensions.AMD_seamless_cubemap_per_texture = true;
|
||||
ctx->Extensions.APPLE_object_purgeable = true;
|
||||
ctx->Extensions.ATI_texture_env_combine3 = true;
|
||||
ctx->Extensions.MESA_framebuffer_flip_y = true;
|
||||
ctx->Extensions.NV_conditional_render = true;
|
||||
ctx->Extensions.NV_fog_distance = true;
|
||||
ctx->Extensions.NV_primitive_restart = true;
|
||||
ctx->Extensions.NV_texture_barrier = true;
|
||||
ctx->Extensions.NV_texture_env_combine4 = true;
|
||||
ctx->Extensions.NV_texture_rectangle = true;
|
||||
ctx->Extensions.TDFX_texture_compression_FXT1 = true;
|
||||
ctx->Extensions.OES_compressed_ETC1_RGB8_texture = true;
|
||||
ctx->Extensions.OES_draw_texture = true;
|
||||
ctx->Extensions.OES_EGL_image = true;
|
||||
ctx->Extensions.OES_EGL_image_external = true;
|
||||
ctx->Extensions.OES_standard_derivatives = true;
|
||||
ctx->Extensions.OES_texture_float = true;
|
||||
ctx->Extensions.OES_texture_float_linear = true;
|
||||
ctx->Extensions.OES_texture_half_float = true;
|
||||
ctx->Extensions.OES_texture_half_float_linear = true;
|
||||
|
||||
if (devinfo->ver >= 8)
|
||||
ctx->Const.GLSLVersion = 460;
|
||||
else if (devinfo->platform == INTEL_PLATFORM_HSW &&
|
||||
can_do_pipelined_register_writes(brw->screen))
|
||||
ctx->Const.GLSLVersion = 450;
|
||||
else if (devinfo->ver >= 7 && can_do_pipelined_register_writes(brw->screen))
|
||||
ctx->Const.GLSLVersion = 420;
|
||||
else if (devinfo->ver >= 6)
|
||||
ctx->Const.GLSLVersion = 330;
|
||||
else
|
||||
ctx->Const.GLSLVersion = 120;
|
||||
|
||||
if (devinfo->ver >= 6)
|
||||
ctx->Const.GLSLVersionCompat = 130;
|
||||
else
|
||||
ctx->Const.GLSLVersionCompat = 120;
|
||||
|
||||
_mesa_override_glsl_version(&ctx->Const);
|
||||
|
||||
ctx->Extensions.EXT_shader_integer_mix = ctx->Const.GLSLVersion >= 130;
|
||||
ctx->Extensions.MESA_shader_integer_functions = ctx->Const.GLSLVersion >= 130;
|
||||
|
||||
if (devinfo->verx10 >= 45) {
|
||||
ctx->Extensions.EXT_shader_framebuffer_fetch_non_coherent = true;
|
||||
ctx->Extensions.KHR_blend_equation_advanced = true;
|
||||
}
|
||||
|
||||
if (devinfo->ver >= 5) {
|
||||
ctx->Extensions.ARB_texture_query_levels = ctx->Const.GLSLVersion >= 130;
|
||||
ctx->Extensions.ARB_texture_query_lod = true;
|
||||
ctx->Extensions.EXT_timer_query = true;
|
||||
}
|
||||
|
||||
if (devinfo->ver == 6)
|
||||
ctx->Extensions.ARB_transform_feedback2 = true;
|
||||
|
||||
if (devinfo->ver >= 6) {
|
||||
ctx->Extensions.ARB_blend_func_extended =
|
||||
!driQueryOptionb(&brw->screen->optionCache, "disable_blend_func_extended");
|
||||
ctx->Extensions.ARB_conditional_render_inverted = true;
|
||||
ctx->Extensions.ARB_cull_distance = true;
|
||||
ctx->Extensions.ARB_draw_buffers_blend = true;
|
||||
if (ctx->API != API_OPENGL_COMPAT ||
|
||||
ctx->Const.AllowHigherCompatVersion)
|
||||
ctx->Extensions.ARB_enhanced_layouts = true;
|
||||
ctx->Extensions.ARB_ES3_compatibility = true;
|
||||
ctx->Extensions.ARB_fragment_layer_viewport = true;
|
||||
ctx->Extensions.ARB_pipeline_statistics_query = true;
|
||||
ctx->Extensions.ARB_sample_shading = true;
|
||||
ctx->Extensions.ARB_shading_language_420pack = true;
|
||||
if (ctx->API != API_OPENGL_COMPAT ||
|
||||
ctx->Const.AllowHigherCompatVersion) {
|
||||
ctx->Extensions.ARB_texture_buffer_object = true;
|
||||
ctx->Extensions.ARB_texture_buffer_object_rgb32 = true;
|
||||
ctx->Extensions.ARB_texture_buffer_range = true;
|
||||
}
|
||||
ctx->Extensions.ARB_texture_cube_map_array = true;
|
||||
ctx->Extensions.ARB_texture_gather = true;
|
||||
ctx->Extensions.ARB_texture_multisample = true;
|
||||
ctx->Extensions.ARB_uniform_buffer_object = true;
|
||||
ctx->Extensions.EXT_gpu_shader4 = true;
|
||||
ctx->Extensions.EXT_texture_shadow_lod = true;
|
||||
|
||||
if (ctx->API != API_OPENGL_COMPAT ||
|
||||
ctx->Const.AllowHigherCompatVersion)
|
||||
ctx->Extensions.AMD_vertex_shader_layer = true;
|
||||
ctx->Extensions.EXT_framebuffer_multisample = true;
|
||||
ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true;
|
||||
ctx->Extensions.EXT_transform_feedback = true;
|
||||
ctx->Extensions.ARB_transform_feedback_overflow_query = true;
|
||||
ctx->Extensions.OES_depth_texture_cube_map = true;
|
||||
ctx->Extensions.OES_sample_variables = true;
|
||||
|
||||
ctx->Extensions.ARB_timer_query = brw->screen->hw_has_timestamp;
|
||||
ctx->Extensions.EXT_disjoint_timer_query =
|
||||
ctx->Extensions.ARB_timer_query;
|
||||
|
||||
/* Only enable this in core profile because geometry shaders are
|
||||
* required, and Mesa only supports geometry shaders in OpenGL 3.2 and
|
||||
* later. In this driver, that currently means Core profile.
|
||||
*/
|
||||
if (ctx->API == API_OPENGL_CORE ||
|
||||
ctx->Const.AllowHigherCompatVersion) {
|
||||
ctx->Extensions.ARB_shader_viewport_layer_array = true;
|
||||
ctx->Extensions.ARB_viewport_array = true;
|
||||
ctx->Extensions.AMD_vertex_shader_viewport_index = true;
|
||||
}
|
||||
}
|
||||
|
||||
brw->predicate.supported = false;
|
||||
|
||||
if (devinfo->ver >= 7) {
|
||||
ctx->Extensions.ARB_conservative_depth = true;
|
||||
ctx->Extensions.ARB_derivative_control = true;
|
||||
ctx->Extensions.ARB_framebuffer_no_attachments = true;
|
||||
if (ctx->API != API_OPENGL_COMPAT ||
|
||||
ctx->Const.AllowHigherCompatVersion) {
|
||||
ctx->Extensions.ARB_gpu_shader5 = true;
|
||||
ctx->Extensions.ARB_gpu_shader_fp64 = true;
|
||||
}
|
||||
ctx->Extensions.ARB_shader_atomic_counters = true;
|
||||
ctx->Extensions.ARB_shader_atomic_counter_ops = true;
|
||||
ctx->Extensions.ARB_shader_clock = true;
|
||||
ctx->Extensions.ARB_shader_image_load_store = true;
|
||||
ctx->Extensions.ARB_shader_image_size = true;
|
||||
ctx->Extensions.ARB_shader_precision = true;
|
||||
ctx->Extensions.ARB_shader_texture_image_samples = true;
|
||||
if (ctx->API != API_OPENGL_COMPAT ||
|
||||
ctx->Const.AllowHigherCompatVersion)
|
||||
ctx->Extensions.ARB_tessellation_shader = true;
|
||||
ctx->Extensions.ARB_texture_compression_bptc = true;
|
||||
ctx->Extensions.ARB_texture_view = true;
|
||||
ctx->Extensions.ARB_shader_storage_buffer_object = true;
|
||||
ctx->Extensions.ARB_vertex_attrib_64bit = true;
|
||||
ctx->Extensions.EXT_shader_samples_identical = true;
|
||||
ctx->Extensions.OES_primitive_bounding_box = true;
|
||||
ctx->Extensions.OES_texture_buffer = true;
|
||||
|
||||
if (can_do_pipelined_register_writes(brw->screen)) {
|
||||
ctx->Extensions.ARB_draw_indirect = true;
|
||||
ctx->Extensions.ARB_transform_feedback2 = true;
|
||||
ctx->Extensions.ARB_transform_feedback3 = true;
|
||||
ctx->Extensions.ARB_transform_feedback_instanced = true;
|
||||
|
||||
if (can_do_compute_dispatch(brw->screen) &&
|
||||
ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) {
|
||||
ctx->Extensions.ARB_compute_shader = true;
|
||||
ctx->Extensions.ARB_ES3_1_compatibility =
|
||||
devinfo->verx10 >= 75;
|
||||
ctx->Extensions.NV_compute_shader_derivatives = true;
|
||||
ctx->Extensions.ARB_compute_variable_group_size = true;
|
||||
}
|
||||
|
||||
if (can_do_predicate_writes(brw->screen)) {
|
||||
brw->predicate.supported = true;
|
||||
ctx->Extensions.ARB_indirect_parameters = true;
|
||||
}
|
||||
}
|
||||
|
||||
ctx->Extensions.ARB_gl_spirv = true;
|
||||
ctx->Extensions.ARB_spirv_extensions = true;
|
||||
}
|
||||
|
||||
if (devinfo->verx10 >= 75) {
|
||||
ctx->Extensions.ARB_stencil_texturing = true;
|
||||
ctx->Extensions.ARB_texture_stencil8 = true;
|
||||
ctx->Extensions.OES_geometry_shader = true;
|
||||
ctx->Extensions.OES_texture_cube_map_array = true;
|
||||
ctx->Extensions.OES_viewport_array = true;
|
||||
}
|
||||
|
||||
if (devinfo->verx10 >= 75 || devinfo->platform == INTEL_PLATFORM_BYT) {
|
||||
ctx->Extensions.ARB_robust_buffer_access_behavior = true;
|
||||
}
|
||||
|
||||
if (can_do_mi_math_and_lrr(brw->screen)) {
|
||||
ctx->Extensions.ARB_query_buffer_object = true;
|
||||
}
|
||||
|
||||
if (devinfo->ver >= 8 || devinfo->platform == INTEL_PLATFORM_BYT) {
|
||||
/* For now, we can't enable OES_texture_view on Gen 7 because of
|
||||
* some piglit failures coming from
|
||||
* piglit/tests/spec/arb_texture_view/rendering-formats.c that need
|
||||
* investigation.
|
||||
*/
|
||||
ctx->Extensions.OES_texture_view = true;
|
||||
}
|
||||
|
||||
if (devinfo->ver >= 7) {
|
||||
/* We can safely enable OES_copy_image on Gen 7, since we emulate
|
||||
* the ETC2 support using the shadow_miptree to store the
|
||||
* compressed data.
|
||||
*/
|
||||
ctx->Extensions.OES_copy_image = true;
|
||||
}
|
||||
|
||||
/* Gen < 6 still uses the blitter. It's somewhat annoying to add support
|
||||
* for blackhole there... Does anybody actually care anymore anyway?
|
||||
*/
|
||||
if (devinfo->ver >= 6)
|
||||
ctx->Extensions.INTEL_blackhole_render = true;
|
||||
|
||||
if (devinfo->ver >= 8) {
|
||||
ctx->Extensions.ARB_gpu_shader_int64 = true;
|
||||
/* requires ARB_gpu_shader_int64 */
|
||||
ctx->Extensions.ARB_shader_ballot = true;
|
||||
ctx->Extensions.ARB_ES3_2_compatibility = true;
|
||||
|
||||
/* Currently only implemented in the scalar backend, so only enable for
|
||||
* Gfx8+. Eventually Gfx6+ could be supported.
|
||||
*/
|
||||
ctx->Extensions.INTEL_shader_integer_functions2 = true;
|
||||
}
|
||||
|
||||
if (devinfo->ver >= 9) {
|
||||
ctx->Extensions.ANDROID_extension_pack_es31a = true;
|
||||
ctx->Extensions.AMD_depth_clamp_separate = true;
|
||||
ctx->Extensions.ARB_post_depth_coverage = true;
|
||||
ctx->Extensions.ARB_shader_stencil_export = true;
|
||||
ctx->Extensions.EXT_shader_framebuffer_fetch = true;
|
||||
ctx->Extensions.INTEL_conservative_rasterization = true;
|
||||
ctx->Extensions.INTEL_shader_atomic_float_minmax = true;
|
||||
ctx->Extensions.KHR_blend_equation_advanced_coherent = true;
|
||||
ctx->Extensions.KHR_texture_compression_astc_ldr = true;
|
||||
ctx->Extensions.KHR_texture_compression_astc_sliced_3d = true;
|
||||
|
||||
/*
|
||||
* From the Skylake PRM Vol. 7 (Memory Fence Message, page 221):
|
||||
* "A memory fence message issued by a thread causes further messages
|
||||
* issued by the thread to be blocked until all previous data port
|
||||
* messages have completed, or the results can be globally observed from
|
||||
* the point of view of other threads in the system."
|
||||
*
|
||||
* From the Haswell PRM Vol. 7 (Memory Fence, page 256):
|
||||
* "A memory fence message issued by a thread causes further messages
|
||||
* issued by the thread to be blocked until all previous messages issued
|
||||
* by the thread to that data port (data cache or render cache) have
|
||||
* been globally observed from the point of view of other threads in the
|
||||
* system."
|
||||
*
|
||||
* Summarized: For ARB_fragment_shader_interlock to work, we need to
|
||||
* ensure memory access ordering for all messages to the dataport from
|
||||
* all threads. Memory fence messages prior to SKL only provide memory
|
||||
* access ordering for messages from the same thread, so we can only
|
||||
* support the feature from Gfx9 onwards.
|
||||
*
|
||||
*/
|
||||
|
||||
ctx->Extensions.ARB_fragment_shader_interlock = true;
|
||||
}
|
||||
|
||||
if (intel_device_info_is_9lp(devinfo))
|
||||
ctx->Extensions.KHR_texture_compression_astc_hdr = true;
|
||||
|
||||
if (devinfo->ver >= 6)
|
||||
ctx->Extensions.INTEL_performance_query = true;
|
||||
|
||||
if (ctx->API != API_OPENGL_COMPAT ||
|
||||
ctx->Const.AllowHigherCompatVersion)
|
||||
ctx->Extensions.ARB_base_instance = true;
|
||||
if (ctx->API != API_OPENGL_CORE)
|
||||
ctx->Extensions.ARB_color_buffer_float = true;
|
||||
|
||||
ctx->Extensions.EXT_texture_compression_s3tc = true;
|
||||
ctx->Extensions.EXT_texture_compression_s3tc_srgb = true;
|
||||
ctx->Extensions.ANGLE_texture_compression_dxt = true;
|
||||
|
||||
ctx->Extensions.EXT_demote_to_helper_invocation = true;
|
||||
|
||||
ctx->Const.PrimitiveRestartFixedIndex = true;
|
||||
|
||||
if (devinfo->ver >= 7) {
|
||||
ctx->Extensions.EXT_memory_object_fd = true;
|
||||
ctx->Extensions.EXT_memory_object = true;
|
||||
ctx->Extensions.EXT_semaphore = true;
|
||||
ctx->Extensions.EXT_semaphore_fd = true;
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,255 +0,0 @@
|
|||
/*
|
||||
* Copyright 2006 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_FBO_H
|
||||
#define BRW_FBO_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <assert.h>
|
||||
#include "main/formats.h"
|
||||
#include "main/macros.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
#include "brw_screen.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct brw_mipmap_tree;
|
||||
|
||||
/**
|
||||
* Intel renderbuffer, derived from gl_renderbuffer.
|
||||
*/
|
||||
struct brw_renderbuffer
|
||||
{
|
||||
struct swrast_renderbuffer Base;
|
||||
/**
|
||||
* The real renderbuffer storage.
|
||||
*
|
||||
* This is multisampled if NumSamples is > 1.
|
||||
*/
|
||||
struct brw_mipmap_tree *mt;
|
||||
|
||||
/**
|
||||
* Downsampled contents for window-system MSAA renderbuffers.
|
||||
*
|
||||
* For window system MSAA color buffers, the singlesample_mt is shared with
|
||||
* other processes in DRI2 (and in DRI3, it's the image buffer managed by
|
||||
* glx_dri3.c), while mt is private to our process. To do a swapbuffers,
|
||||
* we have to downsample out of mt into singlesample_mt. For depth and
|
||||
* stencil buffers, the singlesample_mt is also private, and since we don't
|
||||
* expect to need to do resolves (except if someone does a glReadPixels()
|
||||
* or glCopyTexImage()), we just temporarily allocate singlesample_mt when
|
||||
* asked to map the renderbuffer.
|
||||
*/
|
||||
struct brw_mipmap_tree *singlesample_mt;
|
||||
|
||||
/* Gen < 6 doesn't have layer specifier for render targets or depth. Driver
|
||||
* needs to manually offset surfaces to correct level/layer. There are,
|
||||
* however, alignment restrictions to respect as well and in come cases
|
||||
* the only option is to use temporary single slice surface which driver
|
||||
* copies after rendering to the full miptree.
|
||||
*
|
||||
* See brw_renderbuffer_move_to_temp().
|
||||
*/
|
||||
struct brw_mipmap_tree *align_wa_mt;
|
||||
|
||||
/**
|
||||
* \name Miptree view
|
||||
* \{
|
||||
*
|
||||
* Multiple renderbuffers may simultaneously wrap a single texture and each
|
||||
* provide a different view into that texture. The fields below indicate
|
||||
* which miptree slice is wrapped by this renderbuffer. The fields' values
|
||||
* are consistent with the 'level' and 'layer' parameters of
|
||||
* glFramebufferTextureLayer().
|
||||
*
|
||||
* For renderbuffers not created with glFramebufferTexture*(), mt_level and
|
||||
* mt_layer are 0.
|
||||
*/
|
||||
unsigned int mt_level;
|
||||
unsigned int mt_layer;
|
||||
|
||||
/* The number of attached logical layers. */
|
||||
unsigned int layer_count;
|
||||
/** \} */
|
||||
|
||||
GLuint draw_x, draw_y; /**< Offset of drawing within the region */
|
||||
|
||||
/**
|
||||
* Set to true at every draw call, to indicate if a window-system
|
||||
* renderbuffer needs to be downsampled before using singlesample_mt.
|
||||
*/
|
||||
bool need_downsample;
|
||||
|
||||
/**
|
||||
* Set to true when doing an brw_renderbuffer_map()/unmap() that requires
|
||||
* an upsample at the end.
|
||||
*/
|
||||
bool need_map_upsample;
|
||||
|
||||
/**
|
||||
* Set to true if singlesample_mt is temporary storage that persists only
|
||||
* for the duration of a mapping.
|
||||
*/
|
||||
bool singlesample_mt_is_tmp;
|
||||
|
||||
/**
|
||||
* Set to true when application specifically asked for a sRGB visual.
|
||||
*/
|
||||
bool need_srgb;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* gl_renderbuffer is a base class which we subclass. The Class field
|
||||
* is used for simple run-time type checking.
|
||||
*/
|
||||
#define INTEL_RB_CLASS 0x12345678
|
||||
|
||||
|
||||
/**
|
||||
* Return a gl_renderbuffer ptr casted to brw_renderbuffer.
|
||||
* NULL will be returned if the rb isn't really an brw_renderbuffer.
|
||||
* This is determined by checking the ClassID.
|
||||
*/
|
||||
static inline struct brw_renderbuffer *
|
||||
brw_renderbuffer(struct gl_renderbuffer *rb)
|
||||
{
|
||||
struct brw_renderbuffer *irb = (struct brw_renderbuffer *) rb;
|
||||
if (irb && irb->Base.Base.ClassID == INTEL_RB_CLASS)
|
||||
return irb;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct brw_mipmap_tree *
|
||||
brw_renderbuffer_get_mt(struct brw_renderbuffer *irb)
|
||||
{
|
||||
if (!irb)
|
||||
return NULL;
|
||||
|
||||
return (irb->align_wa_mt) ? irb->align_wa_mt : irb->mt;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Return the framebuffer attachment specified by attIndex.
|
||||
*
|
||||
* If the framebuffer lacks the specified attachment, then return null.
|
||||
*
|
||||
* If the attached renderbuffer is a wrapper, then return wrapped
|
||||
* renderbuffer.
|
||||
*/
|
||||
static inline struct brw_renderbuffer *
|
||||
brw_get_renderbuffer(struct gl_framebuffer *fb, gl_buffer_index attIndex)
|
||||
{
|
||||
struct gl_renderbuffer *rb;
|
||||
|
||||
assert((unsigned)attIndex < ARRAY_SIZE(fb->Attachment));
|
||||
|
||||
rb = fb->Attachment[attIndex].Renderbuffer;
|
||||
if (!rb)
|
||||
return NULL;
|
||||
|
||||
return brw_renderbuffer(rb);
|
||||
}
|
||||
|
||||
|
||||
static inline mesa_format
|
||||
brw_rb_format(const struct brw_renderbuffer *rb)
|
||||
{
|
||||
return rb->Base.Base.Format;
|
||||
}
|
||||
|
||||
extern struct brw_renderbuffer *
|
||||
brw_create_winsys_renderbuffer(struct brw_screen *screen,
|
||||
mesa_format format, unsigned num_samples);
|
||||
|
||||
struct brw_renderbuffer *
|
||||
brw_create_private_renderbuffer(struct brw_screen *screen,
|
||||
mesa_format format, unsigned num_samples);
|
||||
|
||||
struct gl_renderbuffer*
|
||||
brw_create_wrapped_renderbuffer(struct gl_context *ctx,
|
||||
int width, int height,
|
||||
mesa_format format);
|
||||
|
||||
extern void
|
||||
brw_fbo_init(struct brw_context *brw);
|
||||
|
||||
void
|
||||
brw_renderbuffer_set_draw_offset(struct brw_renderbuffer *irb);
|
||||
|
||||
static inline uint32_t
|
||||
brw_renderbuffer_get_tile_offsets(struct brw_renderbuffer *irb,
|
||||
uint32_t *tile_x,
|
||||
uint32_t *tile_y)
|
||||
{
|
||||
if (irb->align_wa_mt) {
|
||||
*tile_x = 0;
|
||||
*tile_y = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return brw_miptree_get_tile_offsets(irb->mt, irb->mt_level, irb->mt_layer,
|
||||
tile_x, tile_y);
|
||||
}
|
||||
|
||||
bool
|
||||
brw_renderbuffer_has_hiz(struct brw_renderbuffer *irb);
|
||||
|
||||
|
||||
void brw_renderbuffer_move_to_temp(struct brw_context *brw,
|
||||
struct brw_renderbuffer *irb,
|
||||
bool invalidate);
|
||||
|
||||
void
|
||||
brw_renderbuffer_downsample(struct brw_context *brw,
|
||||
struct brw_renderbuffer *irb);
|
||||
|
||||
void
|
||||
brw_renderbuffer_upsample(struct brw_context *brw,
|
||||
struct brw_renderbuffer *irb);
|
||||
|
||||
void brw_cache_sets_clear(struct brw_context *brw);
|
||||
void brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo);
|
||||
void brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo,
|
||||
enum isl_format format,
|
||||
enum isl_aux_usage aux_usage);
|
||||
void brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo);
|
||||
void brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo,
|
||||
enum isl_format format,
|
||||
enum isl_aux_usage aux_usage);
|
||||
void brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo);
|
||||
|
||||
unsigned
|
||||
brw_quantize_num_samples(struct brw_screen *intel, unsigned num_samples);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BRW_FBO_H */
|
||||
|
|
@ -1,178 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
#include "main/macros.h"
|
||||
#include "main/enums.h"
|
||||
#include "main/transformfeedback.h"
|
||||
|
||||
#include "brw_batch.h"
|
||||
|
||||
#include "brw_defines.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_util.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_ff_gs.h"
|
||||
#include "util/ralloc.h"
|
||||
|
||||
static void
|
||||
compile_ff_gs_prog(struct brw_context *brw,
|
||||
struct brw_ff_gs_prog_key *key)
|
||||
{
|
||||
const GLuint *program;
|
||||
void *mem_ctx;
|
||||
GLuint program_size;
|
||||
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
|
||||
struct brw_ff_gs_prog_data prog_data;
|
||||
program = brw_compile_ff_gs_prog(brw->screen->compiler, mem_ctx, key,
|
||||
&prog_data,
|
||||
&brw_vue_prog_data(brw->vs.base.prog_data)->vue_map,
|
||||
&program_size);
|
||||
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_FF_GS_PROG,
|
||||
key, sizeof(*key),
|
||||
program, program_size,
|
||||
&prog_data, sizeof(prog_data),
|
||||
&brw->ff_gs.prog_offset, &brw->ff_gs.prog_data);
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_ff_gs_state_dirty(const struct brw_context *brw)
|
||||
{
|
||||
return brw_state_dirty(brw,
|
||||
_NEW_LIGHT,
|
||||
BRW_NEW_PRIMITIVE |
|
||||
BRW_NEW_TRANSFORM_FEEDBACK |
|
||||
BRW_NEW_VS_PROG_DATA);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_ff_gs_populate_key(struct brw_context *brw,
|
||||
struct brw_ff_gs_prog_key *key)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
static const unsigned swizzle_for_offset[4] = {
|
||||
BRW_SWIZZLE4(0, 1, 2, 3),
|
||||
BRW_SWIZZLE4(1, 2, 3, 3),
|
||||
BRW_SWIZZLE4(2, 3, 3, 3),
|
||||
BRW_SWIZZLE4(3, 3, 3, 3)
|
||||
};
|
||||
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
|
||||
assert(devinfo->ver < 7);
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
/* BRW_NEW_VS_PROG_DATA (part of VUE map) */
|
||||
key->attrs = brw_vue_prog_data(brw->vs.base.prog_data)->vue_map.slots_valid;
|
||||
|
||||
/* BRW_NEW_PRIMITIVE */
|
||||
key->primitive = brw->primitive;
|
||||
|
||||
/* _NEW_LIGHT */
|
||||
key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
|
||||
if (key->primitive == _3DPRIM_QUADLIST && ctx->Light.ShadeModel != GL_FLAT) {
|
||||
/* Provide consistent primitive order with brw_set_prim's
|
||||
* optimization of single quads to trifans.
|
||||
*/
|
||||
key->pv_first = true;
|
||||
}
|
||||
|
||||
if (devinfo->ver == 6) {
|
||||
/* On Gfx6, GS is used for transform feedback. */
|
||||
/* BRW_NEW_TRANSFORM_FEEDBACK */
|
||||
if (_mesa_is_xfb_active_and_unpaused(ctx)) {
|
||||
const struct gl_program *prog =
|
||||
ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
|
||||
const struct gl_transform_feedback_info *linked_xfb_info =
|
||||
prog->sh.LinkedTransformFeedback;
|
||||
int i;
|
||||
|
||||
/* Make sure that the VUE slots won't overflow the unsigned chars in
|
||||
* key->transform_feedback_bindings[].
|
||||
*/
|
||||
STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
|
||||
|
||||
/* Make sure that we don't need more binding table entries than we've
|
||||
* set aside for use in transform feedback. (We shouldn't, since we
|
||||
* set aside enough binding table entries to have one per component).
|
||||
*/
|
||||
assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
|
||||
|
||||
key->need_gs_prog = true;
|
||||
key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
|
||||
for (i = 0; i < key->num_transform_feedback_bindings; ++i) {
|
||||
key->transform_feedback_bindings[i] =
|
||||
linked_xfb_info->Outputs[i].OutputRegister;
|
||||
key->transform_feedback_swizzles[i] =
|
||||
swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Pre-gfx6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
|
||||
* into simpler primitives.
|
||||
*/
|
||||
key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST ||
|
||||
brw->primitive == _3DPRIM_QUADSTRIP ||
|
||||
brw->primitive == _3DPRIM_LINELOOP);
|
||||
}
|
||||
}
|
||||
|
||||
/* Calculate interpolants for triangle and line rasterization.
|
||||
*/
|
||||
void
|
||||
brw_upload_ff_gs_prog(struct brw_context *brw)
|
||||
{
|
||||
struct brw_ff_gs_prog_key key;
|
||||
|
||||
if (!brw_ff_gs_state_dirty(brw))
|
||||
return;
|
||||
|
||||
/* Populate the key:
|
||||
*/
|
||||
brw_ff_gs_populate_key(brw, &key);
|
||||
|
||||
if (brw->ff_gs.prog_active != key.need_gs_prog) {
|
||||
brw->ctx.NewDriverState |= BRW_NEW_FF_GS_PROG_DATA;
|
||||
brw->ff_gs.prog_active = key.need_gs_prog;
|
||||
}
|
||||
|
||||
if (brw->ff_gs.prog_active) {
|
||||
if (!brw_search_cache(&brw->cache, BRW_CACHE_FF_GS_PROG, &key,
|
||||
sizeof(key), &brw->ff_gs.prog_offset,
|
||||
&brw->ff_gs.prog_data, true)) {
|
||||
compile_ff_gs_prog(brw, &key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
|
||||
#ifndef BRW_GS_H
|
||||
#define BRW_GS_H
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "compiler/brw_eu.h"
|
||||
|
||||
void
|
||||
brw_upload_ff_gs_prog(struct brw_context *brw);
|
||||
|
||||
#endif
|
||||
|
|
@ -1,119 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
#include "main/context.h"
|
||||
#include "main/formatquery.h"
|
||||
#include "main/glformats.h"
|
||||
|
||||
static size_t
|
||||
brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
|
||||
GLenum internalFormat, int samples[16])
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
(void) target;
|
||||
(void) internalFormat;
|
||||
|
||||
switch (devinfo->ver) {
|
||||
case 11:
|
||||
case 10:
|
||||
case 9:
|
||||
samples[0] = 16;
|
||||
samples[1] = 8;
|
||||
samples[2] = 4;
|
||||
samples[3] = 2;
|
||||
return 4;
|
||||
|
||||
case 8:
|
||||
samples[0] = 8;
|
||||
samples[1] = 4;
|
||||
samples[2] = 2;
|
||||
return 3;
|
||||
|
||||
case 7:
|
||||
if (internalFormat == GL_RGBA32F && _mesa_is_gles(ctx)) {
|
||||
/* For GLES, we are allowed to return a smaller number of samples for
|
||||
* GL_RGBA32F. See OpenGLES 3.2 spec, section 20.3.1 Internal Format
|
||||
* Query Parameters, under SAMPLES:
|
||||
*
|
||||
* "A value less than or equal to the value of MAX_SAMPLES, if
|
||||
* internalformat is RGBA16F, R32F, RG32F, or RGBA32F."
|
||||
*
|
||||
* In brw_render_target_supported, we prevent formats with a size
|
||||
* greater than 8 bytes from using 8x MSAA on gfx7.
|
||||
*/
|
||||
samples[0] = 4;
|
||||
return 1;
|
||||
} else {
|
||||
samples[0] = 8;
|
||||
samples[1] = 4;
|
||||
return 2;
|
||||
}
|
||||
|
||||
case 6:
|
||||
samples[0] = 4;
|
||||
return 1;
|
||||
|
||||
default:
|
||||
assert(devinfo->ver < 6);
|
||||
samples[0] = 1;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_query_internal_format(struct gl_context *ctx, GLenum target,
|
||||
GLenum internalFormat, GLenum pname, GLint *params)
|
||||
{
|
||||
/* The Mesa layer gives us a temporary params buffer that is guaranteed
|
||||
* to be non-NULL, and have at least 16 elements.
|
||||
*/
|
||||
assert(params != NULL);
|
||||
|
||||
switch (pname) {
|
||||
case GL_SAMPLES:
|
||||
brw_query_samples_for_format(ctx, target, internalFormat, params);
|
||||
break;
|
||||
|
||||
case GL_NUM_SAMPLE_COUNTS: {
|
||||
size_t num_samples;
|
||||
GLint dummy_buffer[16];
|
||||
|
||||
num_samples = brw_query_samples_for_format(ctx, target, internalFormat,
|
||||
dummy_buffer);
|
||||
params[0] = (GLint) num_samples;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
/* By default, we call the driver hook's fallback function from the frontend,
|
||||
* which has generic implementation for all pnames.
|
||||
*/
|
||||
_mesa_query_internal_format_default(ctx, target, internalFormat, pname,
|
||||
params);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,144 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/mipmap.h"
|
||||
#include "main/teximage.h"
|
||||
#include "brw_blorp.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_tex.h"
|
||||
#include "drivers/common/meta.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_BLORP
|
||||
|
||||
|
||||
/**
|
||||
* The GenerateMipmap() driver hook.
|
||||
*/
|
||||
void
|
||||
brw_generate_mipmap(struct gl_context *ctx, GLenum target,
|
||||
struct gl_texture_object *tex_obj)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct brw_texture_object *intel_obj = brw_texture_object(tex_obj);
|
||||
const unsigned base_level = tex_obj->Attrib.BaseLevel;
|
||||
unsigned last_level, first_layer, last_layer;
|
||||
|
||||
/* Blorp doesn't handle combined depth/stencil surfaces on Gfx4-5 yet. */
|
||||
if (devinfo->ver <= 5 &&
|
||||
(tex_obj->Image[0][base_level]->_BaseFormat == GL_DEPTH_COMPONENT ||
|
||||
tex_obj->Image[0][base_level]->_BaseFormat == GL_DEPTH_STENCIL)) {
|
||||
_mesa_meta_GenerateMipmap(ctx, target, tex_obj);
|
||||
return;
|
||||
}
|
||||
|
||||
/* find expected last mipmap level to generate */
|
||||
last_level = _mesa_compute_num_levels(ctx, tex_obj, target) - 1;
|
||||
|
||||
if (last_level == 0)
|
||||
return;
|
||||
|
||||
/* The texture isn't in a "complete" state yet so set the expected
|
||||
* last_level here; we're not going through normal texture validation.
|
||||
*/
|
||||
intel_obj->_MaxLevel = last_level;
|
||||
|
||||
if (!tex_obj->Immutable) {
|
||||
_mesa_prepare_mipmap_levels(ctx, tex_obj, base_level, last_level);
|
||||
|
||||
/* At this point, memory for all the texture levels has been
|
||||
* allocated. However, the base level image may be in one resource
|
||||
* while the subsequent/smaller levels may be in another resource.
|
||||
* Finalizing the texture will copy the base images from the former
|
||||
* resource to the latter.
|
||||
*
|
||||
* After this, we'll have all mipmap levels in one resource.
|
||||
*/
|
||||
brw_finalize_mipmap_tree(brw, tex_obj);
|
||||
}
|
||||
|
||||
struct brw_mipmap_tree *mt = intel_obj->mt;
|
||||
if (!mt) {
|
||||
_mesa_error(ctx, GL_OUT_OF_MEMORY, "mipmap generation");
|
||||
return;
|
||||
}
|
||||
|
||||
const mesa_format format = intel_obj->_Format;
|
||||
|
||||
/* Fall back to the CPU for non-renderable cases.
|
||||
*
|
||||
* TODO: 3D textures require blending data from multiple slices,
|
||||
* which means we need custom shaders. For now, fall back.
|
||||
*/
|
||||
if (!brw->mesa_format_supports_render[format] || target == GL_TEXTURE_3D) {
|
||||
_mesa_generate_mipmap(ctx, target, tex_obj);
|
||||
return;
|
||||
}
|
||||
|
||||
const struct isl_extent4d *base_size = &mt->surf.logical_level0_px;
|
||||
|
||||
if (mt->target == GL_TEXTURE_CUBE_MAP) {
|
||||
first_layer = _mesa_tex_target_to_face(target);
|
||||
last_layer = first_layer;
|
||||
} else {
|
||||
first_layer = 0;
|
||||
last_layer = base_size->array_len - 1;
|
||||
}
|
||||
|
||||
/* The GL_EXT_texture_sRGB_decode extension's issues section says:
|
||||
*
|
||||
* "10) How is mipmap generation of sRGB textures affected by the
|
||||
* TEXTURE_SRGB_DECODE_EXT parameter?
|
||||
*
|
||||
* RESOLVED: When the TEXTURE_SRGB_DECODE parameter is DECODE_EXT
|
||||
* for an sRGB texture, mipmap generation should decode sRGB texels
|
||||
* to a linear RGB color space, perform downsampling, then encode
|
||||
* back to an sRGB color space. (Issue 24 in the EXT_texture_sRGB
|
||||
* specification provides a rationale for why.) When the parameter
|
||||
* is SKIP_DECODE_EXT instead, mipmap generation skips the encode
|
||||
* and decode steps during mipmap generation. By skipping the
|
||||
* encode and decode steps, sRGB mipmap generation should match
|
||||
* the mipmap generation for a non-sRGB texture."
|
||||
*/
|
||||
bool do_srgb = tex_obj->Sampler.Attrib.sRGBDecode == GL_DECODE_EXT;
|
||||
|
||||
for (unsigned dst_level = base_level + 1;
|
||||
dst_level <= last_level;
|
||||
dst_level++) {
|
||||
|
||||
const unsigned src_level = dst_level - 1;
|
||||
|
||||
for (unsigned layer = first_layer; layer <= last_layer; layer++) {
|
||||
brw_blorp_blit_miptrees(brw, mt, src_level, layer, format,
|
||||
SWIZZLE_XYZW, mt, dst_level, layer, format,
|
||||
0, 0,
|
||||
minify(base_size->width, src_level),
|
||||
minify(base_size->height, src_level),
|
||||
0, 0,
|
||||
minify(base_size->width, dst_level),
|
||||
minify(base_size->height, dst_level),
|
||||
GL_LINEAR, false, false,
|
||||
do_srgb, do_srgb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,256 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file brw_vec4_gs.c
|
||||
*
|
||||
* State atom for client-programmable geometry shaders, and support code.
|
||||
*/
|
||||
|
||||
#include "brw_gs.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_ff_gs.h"
|
||||
#include "compiler/brw_nir.h"
|
||||
#include "brw_program.h"
|
||||
#include "compiler/glsl/ir_uniform.h"
|
||||
|
||||
static void
|
||||
assign_gs_binding_table_offsets(const struct intel_device_info *devinfo,
|
||||
const struct gl_program *prog,
|
||||
struct brw_gs_prog_data *prog_data)
|
||||
{
|
||||
/* In gfx6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform
|
||||
* feedback surfaces.
|
||||
*/
|
||||
uint32_t reserved = devinfo->ver == 6 ? BRW_MAX_SOL_BINDINGS : 0;
|
||||
|
||||
brw_assign_common_binding_table_offsets(devinfo, prog,
|
||||
&prog_data->base.base, reserved);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_gfx6_xfb_setup(const struct gl_transform_feedback_info *linked_xfb_info,
|
||||
struct brw_gs_prog_data *gs_prog_data)
|
||||
{
|
||||
static const unsigned swizzle_for_offset[4] = {
|
||||
BRW_SWIZZLE4(0, 1, 2, 3),
|
||||
BRW_SWIZZLE4(1, 2, 3, 3),
|
||||
BRW_SWIZZLE4(2, 3, 3, 3),
|
||||
BRW_SWIZZLE4(3, 3, 3, 3)
|
||||
};
|
||||
|
||||
int i;
|
||||
|
||||
/* Make sure that the VUE slots won't overflow the unsigned chars in
|
||||
* prog_data->transform_feedback_bindings[].
|
||||
*/
|
||||
STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
|
||||
|
||||
/* Make sure that we don't need more binding table entries than we've
|
||||
* set aside for use in transform feedback. (We shouldn't, since we
|
||||
* set aside enough binding table entries to have one per component).
|
||||
*/
|
||||
assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
|
||||
|
||||
gs_prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
|
||||
for (i = 0; i < gs_prog_data->num_transform_feedback_bindings; i++) {
|
||||
gs_prog_data->transform_feedback_bindings[i] =
|
||||
linked_xfb_info->Outputs[i].OutputRegister;
|
||||
gs_prog_data->transform_feedback_swizzles[i] =
|
||||
swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
|
||||
}
|
||||
}
|
||||
static bool
|
||||
brw_codegen_gs_prog(struct brw_context *brw,
|
||||
struct brw_program *gp,
|
||||
struct brw_gs_prog_key *key)
|
||||
{
|
||||
struct brw_compiler *compiler = brw->screen->compiler;
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct brw_stage_state *stage_state = &brw->gs.base;
|
||||
struct brw_gs_prog_data prog_data;
|
||||
bool start_busy = false;
|
||||
double start_time = 0;
|
||||
|
||||
memset(&prog_data, 0, sizeof(prog_data));
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
nir_shader *nir = nir_shader_clone(mem_ctx, gp->program.nir);
|
||||
|
||||
assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data);
|
||||
|
||||
brw_nir_setup_glsl_uniforms(mem_ctx, nir, &gp->program,
|
||||
&prog_data.base.base,
|
||||
compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
|
||||
if (brw->can_push_ubos) {
|
||||
brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
|
||||
prog_data.base.base.ubo_ranges);
|
||||
}
|
||||
|
||||
uint64_t outputs_written = nir->info.outputs_written;
|
||||
|
||||
brw_compute_vue_map(devinfo,
|
||||
&prog_data.base.vue_map, outputs_written,
|
||||
gp->program.info.separate_shader, 1);
|
||||
|
||||
if (devinfo->ver == 6)
|
||||
brw_gfx6_xfb_setup(gp->program.sh.LinkedTransformFeedback,
|
||||
&prog_data);
|
||||
|
||||
int st_index = -1;
|
||||
if (INTEL_DEBUG(DEBUG_SHADER_TIME))
|
||||
st_index = brw_get_shader_time_index(brw, &gp->program, ST_GS, true);
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo);
|
||||
start_time = get_time();
|
||||
}
|
||||
|
||||
char *error_str;
|
||||
const unsigned *program =
|
||||
brw_compile_gs(brw->screen->compiler, brw, mem_ctx, key,
|
||||
&prog_data, nir, st_index,
|
||||
NULL, &error_str);
|
||||
if (program == NULL) {
|
||||
ralloc_strcat(&gp->program.sh.data->InfoLog, error_str);
|
||||
_mesa_problem(NULL, "Failed to compile geometry shader: %s\n", error_str);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
if (gp->compiled_once) {
|
||||
brw_debug_recompile(brw, MESA_SHADER_GEOMETRY, gp->program.Id,
|
||||
&key->base);
|
||||
}
|
||||
if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
|
||||
perf_debug("GS compile took %.03f ms and stalled the GPU\n",
|
||||
(get_time() - start_time) * 1000);
|
||||
}
|
||||
gp->compiled_once = true;
|
||||
}
|
||||
|
||||
/* Scratch space is used for register spilling */
|
||||
brw_alloc_stage_scratch(brw, stage_state,
|
||||
prog_data.base.base.total_scratch);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.base.param);
|
||||
ralloc_steal(NULL, prog_data.base.base.pull_param);
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
|
||||
key, sizeof(*key),
|
||||
program, prog_data.base.base.program_size,
|
||||
&prog_data, sizeof(prog_data),
|
||||
&stage_state->prog_offset, &brw->gs.base.prog_data);
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_gs_state_dirty(const struct brw_context *brw)
|
||||
{
|
||||
return brw_state_dirty(brw,
|
||||
_NEW_TEXTURE,
|
||||
BRW_NEW_GEOMETRY_PROGRAM |
|
||||
BRW_NEW_TRANSFORM_FEEDBACK);
|
||||
}
|
||||
|
||||
void
|
||||
brw_gs_populate_key(struct brw_context *brw,
|
||||
struct brw_gs_prog_key *key)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct brw_program *gp =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
brw_populate_base_prog_key(ctx, gp, &key->base);
|
||||
}
|
||||
|
||||
void
|
||||
brw_upload_gs_prog(struct brw_context *brw)
|
||||
{
|
||||
struct brw_stage_state *stage_state = &brw->gs.base;
|
||||
struct brw_gs_prog_key key;
|
||||
/* BRW_NEW_GEOMETRY_PROGRAM */
|
||||
struct brw_program *gp =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
|
||||
|
||||
if (!brw_gs_state_dirty(brw))
|
||||
return;
|
||||
|
||||
brw_gs_populate_key(brw, &key);
|
||||
|
||||
if (brw_search_cache(&brw->cache, BRW_CACHE_GS_PROG, &key, sizeof(key),
|
||||
&stage_state->prog_offset, &brw->gs.base.prog_data,
|
||||
true))
|
||||
return;
|
||||
|
||||
if (brw_disk_cache_upload_program(brw, MESA_SHADER_GEOMETRY))
|
||||
return;
|
||||
|
||||
gp = (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
|
||||
gp->id = key.base.program_string_id;
|
||||
|
||||
ASSERTED bool success = brw_codegen_gs_prog(brw, gp, &key);
|
||||
assert(success);
|
||||
}
|
||||
|
||||
void
|
||||
brw_gs_populate_default_key(const struct brw_compiler *compiler,
|
||||
struct brw_gs_prog_key *key,
|
||||
struct gl_program *prog)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
brw_populate_default_base_prog_key(devinfo, brw_program(prog),
|
||||
&key->base);
|
||||
}
|
||||
|
||||
bool
|
||||
brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_gs_prog_key key;
|
||||
uint32_t old_prog_offset = brw->gs.base.prog_offset;
|
||||
struct brw_stage_prog_data *old_prog_data = brw->gs.base.prog_data;
|
||||
bool success;
|
||||
|
||||
struct brw_program *bgp = brw_program(prog);
|
||||
|
||||
brw_gs_populate_default_key(brw->screen->compiler, &key, prog);
|
||||
|
||||
success = brw_codegen_gs_prog(brw, bgp, &key);
|
||||
|
||||
brw->gs.base.prog_offset = old_prog_offset;
|
||||
brw->gs.base.prog_data = old_prog_data;
|
||||
|
||||
return success;
|
||||
}
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_VEC4_GS_H
|
||||
#define BRW_VEC4_GS_H
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "brw_context.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct gl_shader_program;
|
||||
|
||||
void
|
||||
brw_upload_gs_prog(struct brw_context *brw);
|
||||
|
||||
void
|
||||
brw_gs_populate_key(struct brw_context *brw,
|
||||
struct brw_gs_prog_key *key);
|
||||
void
|
||||
brw_gs_populate_default_key(const struct brw_compiler *compiler,
|
||||
struct brw_gs_prog_key *key,
|
||||
struct gl_program *prog);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BRW_VEC4_GS_H */
|
||||
|
|
@ -1,117 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "main/shaderapi.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
|
||||
|
||||
/* Creates a new GS constant buffer reflecting the current GS program's
|
||||
* constants, if needed by the GS program.
|
||||
*
|
||||
* Otherwise, constants go through the CURBEs using the brw_constant_buffer
|
||||
* state atom.
|
||||
*/
|
||||
static void
|
||||
brw_upload_gs_pull_constants(struct brw_context *brw)
|
||||
{
|
||||
struct brw_stage_state *stage_state = &brw->gs.base;
|
||||
|
||||
/* BRW_NEW_GEOMETRY_PROGRAM */
|
||||
struct brw_program *gp =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
|
||||
|
||||
if (!gp)
|
||||
return;
|
||||
|
||||
/* BRW_NEW_GS_PROG_DATA */
|
||||
const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
|
||||
|
||||
_mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_GEOMETRY);
|
||||
/* _NEW_PROGRAM_CONSTANTS */
|
||||
brw_upload_pull_constants(brw, BRW_NEW_GS_CONSTBUF, &gp->program,
|
||||
stage_state, prog_data);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_gs_pull_constants = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_PROGRAM_CONSTANTS,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_GEOMETRY_PROGRAM |
|
||||
BRW_NEW_GS_PROG_DATA,
|
||||
},
|
||||
.emit = brw_upload_gs_pull_constants,
|
||||
};
|
||||
|
||||
static void
|
||||
brw_upload_gs_ubo_surfaces(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
|
||||
/* _NEW_PROGRAM */
|
||||
struct gl_program *prog =
|
||||
ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
|
||||
|
||||
/* BRW_NEW_GS_PROG_DATA */
|
||||
struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
|
||||
|
||||
brw_upload_ubo_surfaces(brw, prog, &brw->gs.base, prog_data);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_gs_ubo_surfaces = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_PROGRAM,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_GS_PROG_DATA |
|
||||
BRW_NEW_UNIFORM_BUFFER,
|
||||
},
|
||||
.emit = brw_upload_gs_ubo_surfaces,
|
||||
};
|
||||
|
||||
static void
|
||||
brw_upload_gs_image_surfaces(struct brw_context *brw)
|
||||
{
|
||||
/* BRW_NEW_GEOMETRY_PROGRAM */
|
||||
const struct gl_program *gp = brw->programs[MESA_SHADER_GEOMETRY];
|
||||
|
||||
if (gp) {
|
||||
/* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
|
||||
brw_upload_image_surfaces(brw, gp, &brw->gs.base,
|
||||
brw->gs.base.prog_data);
|
||||
}
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_gs_image_surfaces = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_TEXTURE,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_AUX_STATE |
|
||||
BRW_NEW_GEOMETRY_PROGRAM |
|
||||
BRW_NEW_GS_PROG_DATA |
|
||||
BRW_NEW_IMAGE_UNITS,
|
||||
},
|
||||
.emit = brw_upload_gs_image_surfaces,
|
||||
};
|
||||
|
|
@ -1,122 +0,0 @@
|
|||
/*
|
||||
* Copyright 2006 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_IMAGE_H
|
||||
#define BRW_IMAGE_H
|
||||
|
||||
/** @file intel_image.h
|
||||
*
|
||||
* Structure definitions and prototypes for __DRIimage, the driver-private
|
||||
* structure backing EGLImage or a drawable in DRI3.
|
||||
*
|
||||
* The __DRIimage is passed around the loader code (src/glx and src/egl), but
|
||||
* it's opaque to that code and may only be accessed by loader extensions
|
||||
* (mostly located in brw_screen.c).
|
||||
*/
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <xf86drm.h>
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "brw_bufmgr.h"
|
||||
#include <GL/internal/dri_interface.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Used with images created with image_from_names
|
||||
* to help support planar images.
|
||||
*/
|
||||
struct brw_image_format {
|
||||
int fourcc;
|
||||
int components;
|
||||
int nplanes;
|
||||
struct {
|
||||
int buffer_index;
|
||||
int width_shift;
|
||||
int height_shift;
|
||||
uint32_t dri_format;
|
||||
int cpp;
|
||||
} planes[3];
|
||||
float scaling_factor;
|
||||
};
|
||||
|
||||
struct __DRIimageRec {
|
||||
struct brw_screen *screen;
|
||||
struct brw_bo *bo;
|
||||
uint32_t pitch; /**< in bytes */
|
||||
GLenum internal_format;
|
||||
uint32_t dri_format;
|
||||
GLuint format; /**< mesa_format or mesa_array_format */
|
||||
uint64_t modifier; /**< fb modifier (fourcc) */
|
||||
uint32_t offset;
|
||||
|
||||
/*
|
||||
* Need to save these here between calls to
|
||||
* image_from_names and calls to image_from_planar.
|
||||
*/
|
||||
uint32_t strides[3];
|
||||
uint32_t offsets[3];
|
||||
const struct brw_image_format *planar_format;
|
||||
|
||||
/* particular miptree level */
|
||||
GLuint width;
|
||||
GLuint height;
|
||||
GLuint tile_x;
|
||||
GLuint tile_y;
|
||||
bool has_depthstencil;
|
||||
bool imported_dmabuf;
|
||||
|
||||
/** Offset of the auxiliary compression surface in the bo. */
|
||||
uint32_t aux_offset;
|
||||
|
||||
/** Pitch of the auxiliary compression surface. */
|
||||
uint32_t aux_pitch;
|
||||
|
||||
/** Total size in bytes of the auxiliary compression surface. */
|
||||
uint32_t aux_size;
|
||||
|
||||
/**
|
||||
* Provided by EGL_EXT_image_dma_buf_import.
|
||||
* \{
|
||||
*/
|
||||
enum __DRIYUVColorSpace yuv_color_space;
|
||||
enum __DRISampleRange sample_range;
|
||||
enum __DRIChromaSiting horizontal_siting;
|
||||
enum __DRIChromaSiting vertical_siting;
|
||||
/* \} */
|
||||
|
||||
__DRIscreen *driScrnPriv;
|
||||
|
||||
void *loader_private;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -1,401 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "compiler/brw_nir.h"
|
||||
#include "brw_program.h"
|
||||
#include "compiler/glsl/gl_nir.h"
|
||||
#include "compiler/glsl/gl_nir_linker.h"
|
||||
#include "compiler/glsl/ir.h"
|
||||
#include "compiler/glsl/ir_optimization.h"
|
||||
#include "compiler/glsl/program.h"
|
||||
#include "compiler/nir/nir_serialize.h"
|
||||
#include "program/program.h"
|
||||
#include "main/glspirv.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "main/shaderapi.h"
|
||||
#include "main/shaderobj.h"
|
||||
#include "main/uniforms.h"
|
||||
|
||||
/**
|
||||
* Performs a compile of the shader stages even when we don't know
|
||||
* what non-orthogonal state will be set, in the hope that it reflects
|
||||
* the eventual NOS used, and thus allows us to produce link failures.
|
||||
*/
|
||||
static bool
|
||||
brw_shader_precompile(struct gl_context *ctx,
|
||||
struct gl_shader_program *sh_prog)
|
||||
{
|
||||
struct gl_linked_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
|
||||
struct gl_linked_shader *tcs = sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
|
||||
struct gl_linked_shader *tes = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
|
||||
struct gl_linked_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
|
||||
struct gl_linked_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
|
||||
struct gl_linked_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
|
||||
|
||||
if (fs && !brw_fs_precompile(ctx, fs->Program))
|
||||
return false;
|
||||
|
||||
if (gs && !brw_gs_precompile(ctx, gs->Program))
|
||||
return false;
|
||||
|
||||
if (tes && !brw_tes_precompile(ctx, sh_prog, tes->Program))
|
||||
return false;
|
||||
|
||||
if (tcs && !brw_tcs_precompile(ctx, sh_prog, tcs->Program))
|
||||
return false;
|
||||
|
||||
if (vs && !brw_vs_precompile(ctx, vs->Program))
|
||||
return false;
|
||||
|
||||
if (cs && !brw_cs_precompile(ctx, cs->Program))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_lower_packing_builtins(struct brw_context *brw,
|
||||
exec_list *ir)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
/* Gens < 7 don't have instructions to convert to or from half-precision,
|
||||
* and Gens < 6 don't expose that functionality.
|
||||
*/
|
||||
if (devinfo->ver != 6)
|
||||
return;
|
||||
|
||||
lower_packing_builtins(ir, LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16);
|
||||
}
|
||||
|
||||
static void
|
||||
process_glsl_ir(struct brw_context *brw,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_linked_shader *shader)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
|
||||
/* Temporary memory context for any new IR. */
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
ralloc_adopt(mem_ctx, shader->ir);
|
||||
|
||||
if (shader->Stage == MESA_SHADER_FRAGMENT) {
|
||||
lower_blend_equation_advanced(
|
||||
shader, ctx->Extensions.KHR_blend_equation_advanced_coherent);
|
||||
}
|
||||
|
||||
/* lower_packing_builtins() inserts arithmetic instructions, so it
|
||||
* must precede lower_instructions().
|
||||
*/
|
||||
brw_lower_packing_builtins(brw, shader->ir);
|
||||
do_mat_op_to_vec(shader->ir);
|
||||
|
||||
unsigned instructions_to_lower = (DIV_TO_MUL_RCP |
|
||||
SUB_TO_ADD_NEG |
|
||||
EXP_TO_EXP2 |
|
||||
LOG_TO_LOG2 |
|
||||
DFREXP_DLDEXP_TO_ARITH);
|
||||
if (devinfo->ver < 7) {
|
||||
instructions_to_lower |= BIT_COUNT_TO_MATH |
|
||||
EXTRACT_TO_SHIFTS |
|
||||
INSERT_TO_SHIFTS |
|
||||
REVERSE_TO_SHIFTS;
|
||||
}
|
||||
|
||||
lower_instructions(shader->ir, instructions_to_lower);
|
||||
|
||||
/* Pre-gfx6 HW can only nest if-statements 16 deep. Beyond this,
|
||||
* if-statements need to be flattened.
|
||||
*/
|
||||
if (devinfo->ver < 6)
|
||||
lower_if_to_cond_assign(shader->Stage, shader->ir, 16);
|
||||
|
||||
do_vec_index_to_cond_assign(shader->ir);
|
||||
lower_vector_insert(shader->ir, true);
|
||||
lower_offset_arrays(shader->ir);
|
||||
lower_quadop_vector(shader->ir, false);
|
||||
|
||||
validate_ir_tree(shader->ir);
|
||||
|
||||
/* Now that we've finished altering the linked IR, reparent any live IR back
|
||||
* to the permanent memory context, and free the temporary one (discarding any
|
||||
* junk we optimized away).
|
||||
*/
|
||||
reparent_ir(shader->ir, shader->ir);
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
if (ctx->_Shader->Flags & GLSL_DUMP) {
|
||||
fprintf(stderr, "\n");
|
||||
if (shader->ir) {
|
||||
fprintf(stderr, "GLSL IR for linked %s program %d:\n",
|
||||
_mesa_shader_stage_to_string(shader->Stage),
|
||||
shader_prog->Name);
|
||||
_mesa_print_ir(stderr, shader->ir, NULL);
|
||||
} else {
|
||||
fprintf(stderr, "No GLSL IR for linked %s program %d (shader may be "
|
||||
"from cache)\n", _mesa_shader_stage_to_string(shader->Stage),
|
||||
shader_prog->Name);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
unify_interfaces(struct shader_info **infos)
|
||||
{
|
||||
struct shader_info *prev_info = NULL;
|
||||
|
||||
for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) {
|
||||
if (!infos[i])
|
||||
continue;
|
||||
|
||||
if (prev_info) {
|
||||
prev_info->outputs_written |= infos[i]->inputs_read &
|
||||
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
|
||||
infos[i]->inputs_read |= prev_info->outputs_written &
|
||||
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
|
||||
|
||||
prev_info->patch_outputs_written |= infos[i]->patch_inputs_read;
|
||||
infos[i]->patch_inputs_read |= prev_info->patch_outputs_written;
|
||||
}
|
||||
prev_info = infos[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
update_xfb_info(struct gl_transform_feedback_info *xfb_info,
|
||||
struct shader_info *info)
|
||||
{
|
||||
if (!xfb_info)
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < xfb_info->NumOutputs; i++) {
|
||||
struct gl_transform_feedback_output *output = &xfb_info->Outputs[i];
|
||||
|
||||
/* The VUE header contains three scalar fields packed together:
|
||||
* - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
|
||||
* - gl_Layer is stored in VARYING_SLOT_PSIZ.y
|
||||
* - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
|
||||
*/
|
||||
switch (output->OutputRegister) {
|
||||
case VARYING_SLOT_LAYER:
|
||||
assert(output->NumComponents == 1);
|
||||
output->OutputRegister = VARYING_SLOT_PSIZ;
|
||||
output->ComponentOffset = 1;
|
||||
break;
|
||||
case VARYING_SLOT_VIEWPORT:
|
||||
assert(output->NumComponents == 1);
|
||||
output->OutputRegister = VARYING_SLOT_PSIZ;
|
||||
output->ComponentOffset = 2;
|
||||
break;
|
||||
case VARYING_SLOT_PSIZ:
|
||||
assert(output->NumComponents == 1);
|
||||
output->ComponentOffset = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
info->outputs_written |= 1ull << output->OutputRegister;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" GLboolean
|
||||
brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct brw_compiler *compiler = brw->screen->compiler;
|
||||
unsigned int stage;
|
||||
struct shader_info *infos[MESA_SHADER_STAGES] = { 0, };
|
||||
|
||||
if (shProg->data->LinkStatus == LINKING_SKIPPED)
|
||||
return GL_TRUE;
|
||||
|
||||
for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
|
||||
struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
|
||||
if (!shader)
|
||||
continue;
|
||||
|
||||
struct gl_program *prog = shader->Program;
|
||||
prog->Parameters = _mesa_new_parameter_list();
|
||||
|
||||
if (!shader->spirv_data)
|
||||
process_glsl_ir(brw, shProg, shader);
|
||||
|
||||
_mesa_copy_linked_program_data(shProg, shader);
|
||||
|
||||
prog->ShadowSamplers = shader->shadow_samplers;
|
||||
|
||||
bool debug_enabled =
|
||||
INTEL_DEBUG(intel_debug_flag_for_shader_stage(shader->Stage));
|
||||
|
||||
if (debug_enabled && shader->ir) {
|
||||
fprintf(stderr, "GLSL IR for native %s shader %d:\n",
|
||||
_mesa_shader_stage_to_string(shader->Stage), shProg->Name);
|
||||
_mesa_print_ir(stderr, shader->ir, NULL);
|
||||
fprintf(stderr, "\n\n");
|
||||
}
|
||||
|
||||
prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
|
||||
compiler->scalar_stage[stage]);
|
||||
}
|
||||
|
||||
/* TODO: Verify if its feasible to split up the NIR linking work into a
|
||||
* per-stage part (that fill out information we need for the passes) and a
|
||||
* actual linking part, so that we could fold back brw_nir_lower_resources
|
||||
* back into brw_create_nir.
|
||||
*/
|
||||
|
||||
/* SPIR-V programs use a NIR linker */
|
||||
if (shProg->data->spirv) {
|
||||
static const gl_nir_linker_options opts = {
|
||||
.fill_parameters = false,
|
||||
};
|
||||
if (!gl_nir_link_spirv(ctx, shProg, &opts))
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
|
||||
struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
|
||||
if (!shader)
|
||||
continue;
|
||||
|
||||
struct gl_program *prog = shader->Program;
|
||||
|
||||
brw_nir_lower_resources(prog->nir, shProg, prog, &brw->screen->devinfo);
|
||||
|
||||
NIR_PASS_V(prog->nir, brw_nir_lower_gl_images, prog);
|
||||
}
|
||||
|
||||
/* Determine first and last stage. */
|
||||
unsigned first = MESA_SHADER_STAGES;
|
||||
unsigned last = 0;
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
if (!shProg->_LinkedShaders[i])
|
||||
continue;
|
||||
if (first == MESA_SHADER_STAGES)
|
||||
first = i;
|
||||
last = i;
|
||||
}
|
||||
|
||||
/* Linking the stages in the opposite order (from fragment to vertex)
|
||||
* ensures that inter-shader outputs written to in an earlier stage
|
||||
* are eliminated if they are (transitively) not used in a later
|
||||
* stage.
|
||||
*
|
||||
* TODO: Look into Shadow of Mordor regressions on HSW and enable this for
|
||||
* all platforms. See: https://bugs.freedesktop.org/show_bug.cgi?id=103537
|
||||
*/
|
||||
if (first != last && brw->screen->devinfo.ver >= 8) {
|
||||
int next = last;
|
||||
for (int i = next - 1; i >= 0; i--) {
|
||||
if (shProg->_LinkedShaders[i] == NULL)
|
||||
continue;
|
||||
|
||||
brw_nir_link_shaders(compiler,
|
||||
shProg->_LinkedShaders[i]->Program->nir,
|
||||
shProg->_LinkedShaders[next]->Program->nir);
|
||||
next = i;
|
||||
}
|
||||
}
|
||||
|
||||
for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
|
||||
struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
|
||||
if (!shader)
|
||||
continue;
|
||||
|
||||
struct gl_program *prog = shader->Program;
|
||||
|
||||
_mesa_update_shader_textures_used(shProg, prog);
|
||||
|
||||
brw_shader_gather_info(prog->nir, prog);
|
||||
|
||||
NIR_PASS_V(prog->nir, gl_nir_lower_atomics, shProg, false);
|
||||
NIR_PASS_V(prog->nir, nir_lower_atomics_to_ssbo);
|
||||
|
||||
nir_sweep(prog->nir);
|
||||
|
||||
infos[stage] = &prog->nir->info;
|
||||
|
||||
update_xfb_info(prog->sh.LinkedTransformFeedback, infos[stage]);
|
||||
|
||||
/* Make a pass over the IR to add state references for any built-in
|
||||
* uniforms that are used. This has to be done now (during linking).
|
||||
* Code generation doesn't happen until the first time this shader is
|
||||
* used for rendering. Waiting until then to generate the parameters is
|
||||
* too late. At that point, the values for the built-in uniforms won't
|
||||
* get sent to the shader.
|
||||
*/
|
||||
nir_foreach_uniform_variable(var, prog->nir) {
|
||||
const nir_state_slot *const slots = var->state_slots;
|
||||
for (unsigned int i = 0; i < var->num_state_slots; i++) {
|
||||
assert(slots != NULL);
|
||||
_mesa_add_state_reference(prog->Parameters, slots[i].tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* The linker tries to dead code eliminate unused varying components,
|
||||
* and make sure interfaces match. But it isn't able to do so in all
|
||||
* cases. So, explicitly make the interfaces match by OR'ing together
|
||||
* the inputs_read/outputs_written bitfields of adjacent stages.
|
||||
*/
|
||||
if (!shProg->SeparateShader)
|
||||
unify_interfaces(infos);
|
||||
|
||||
if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
|
||||
for (unsigned i = 0; i < shProg->NumShaders; i++) {
|
||||
const struct gl_shader *sh = shProg->Shaders[i];
|
||||
if (!sh)
|
||||
continue;
|
||||
|
||||
fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
|
||||
_mesa_shader_stage_to_string(sh->Stage),
|
||||
i, shProg->Name);
|
||||
fprintf(stderr, "%s", sh->Source);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (brw->precompile && !brw_shader_precompile(ctx, shProg))
|
||||
return GL_FALSE;
|
||||
|
||||
/* SPIR-V programs build its resource list from linked NIR shaders. */
|
||||
if (!shProg->data->spirv)
|
||||
build_program_resource_list(ctx, shProg, false);
|
||||
else
|
||||
nir_build_program_resource_list(ctx, shProg, true);
|
||||
|
||||
for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
|
||||
struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
|
||||
if (!shader)
|
||||
continue;
|
||||
|
||||
/* The GLSL IR won't be needed anymore. */
|
||||
ralloc_free(shader->ir);
|
||||
shader->ir = NULL;
|
||||
}
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
|
@ -1,422 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2014 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "brw_meta_util.h"
|
||||
#include "brw_state.h"
|
||||
#include "main/blend.h"
|
||||
#include "main/fbobject.h"
|
||||
#include "util/format_srgb.h"
|
||||
|
||||
/**
|
||||
* Helper function for handling mirror image blits.
|
||||
*
|
||||
* If coord0 > coord1, swap them and invert the "mirror" boolean.
|
||||
*/
|
||||
static inline void
|
||||
fixup_mirroring(bool *mirror, float *coord0, float *coord1)
|
||||
{
|
||||
if (*coord0 > *coord1) {
|
||||
*mirror = !*mirror;
|
||||
float tmp = *coord0;
|
||||
*coord0 = *coord1;
|
||||
*coord1 = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the number of pixels to clip for each side of a rect
|
||||
*
|
||||
* \param x0 The rect's left coordinate
|
||||
* \param y0 The rect's bottom coordinate
|
||||
* \param x1 The rect's right coordinate
|
||||
* \param y1 The rect's top coordinate
|
||||
* \param min_x The clipping region's left coordinate
|
||||
* \param min_y The clipping region's bottom coordinate
|
||||
* \param max_x The clipping region's right coordinate
|
||||
* \param max_y The clipping region's top coordinate
|
||||
* \param clipped_x0 The number of pixels to clip from the left side
|
||||
* \param clipped_y0 The number of pixels to clip from the bottom side
|
||||
* \param clipped_x1 The number of pixels to clip from the right side
|
||||
* \param clipped_y1 The number of pixels to clip from the top side
|
||||
*
|
||||
* \return false if we clip everything away, true otherwise
|
||||
*/
|
||||
static inline bool
|
||||
compute_pixels_clipped(float x0, float y0, float x1, float y1,
|
||||
float min_x, float min_y, float max_x, float max_y,
|
||||
float *clipped_x0, float *clipped_y0, float *clipped_x1, float *clipped_y1)
|
||||
{
|
||||
/* If we are going to clip everything away, stop. */
|
||||
if (!(min_x <= max_x &&
|
||||
min_y <= max_y &&
|
||||
x0 <= max_x &&
|
||||
y0 <= max_y &&
|
||||
min_x <= x1 &&
|
||||
min_y <= y1 &&
|
||||
x0 <= x1 &&
|
||||
y0 <= y1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (x0 < min_x)
|
||||
*clipped_x0 = min_x - x0;
|
||||
else
|
||||
*clipped_x0 = 0;
|
||||
if (max_x < x1)
|
||||
*clipped_x1 = x1 - max_x;
|
||||
else
|
||||
*clipped_x1 = 0;
|
||||
|
||||
if (y0 < min_y)
|
||||
*clipped_y0 = min_y - y0;
|
||||
else
|
||||
*clipped_y0 = 0;
|
||||
if (max_y < y1)
|
||||
*clipped_y1 = y1 - max_y;
|
||||
else
|
||||
*clipped_y1 = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clips a coordinate (left, right, top or bottom) for the src or dst rect
|
||||
* (whichever requires the largest clip) and adjusts the coordinate
|
||||
* for the other rect accordingly.
|
||||
*
|
||||
* \param mirror true if mirroring is required
|
||||
* \param src the source rect coordinate (for example srcX0)
|
||||
* \param dst0 the dst rect coordinate (for example dstX0)
|
||||
* \param dst1 the opposite dst rect coordinate (for example dstX1)
|
||||
* \param clipped_src0 number of pixels to clip from the src coordinate
|
||||
* \param clipped_dst0 number of pixels to clip from the dst coordinate
|
||||
* \param clipped_dst1 number of pixels to clip from the opposite dst coordinate
|
||||
* \param scale the src vs dst scale involved for that coordinate
|
||||
* \param isLeftOrBottom true if we are clipping the left or bottom sides
|
||||
* of the rect.
|
||||
*/
|
||||
static inline void
|
||||
clip_coordinates(bool mirror,
|
||||
float *src, float *dst0, float *dst1,
|
||||
float clipped_src0,
|
||||
float clipped_dst0,
|
||||
float clipped_dst1,
|
||||
float scale,
|
||||
bool isLeftOrBottom)
|
||||
{
|
||||
/* When clipping we need to add or subtract pixels from the original
|
||||
* coordinates depending on whether we are acting on the left/bottom
|
||||
* or right/top sides of the rect respectively. We assume we have to
|
||||
* add them in the code below, and multiply by -1 when we should
|
||||
* subtract.
|
||||
*/
|
||||
int mult = isLeftOrBottom ? 1 : -1;
|
||||
|
||||
if (!mirror) {
|
||||
if (clipped_src0 >= clipped_dst0 * scale) {
|
||||
*src += clipped_src0 * mult;
|
||||
*dst0 += clipped_src0 / scale * mult;
|
||||
} else {
|
||||
*dst0 += clipped_dst0 * mult;
|
||||
*src += clipped_dst0 * scale * mult;
|
||||
}
|
||||
} else {
|
||||
if (clipped_src0 >= clipped_dst1 * scale) {
|
||||
*src += clipped_src0 * mult;
|
||||
*dst1 -= clipped_src0 / scale * mult;
|
||||
} else {
|
||||
*dst1 -= clipped_dst1 * mult;
|
||||
*src += clipped_dst1 * scale * mult;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
brw_meta_mirror_clip_and_scissor(const struct gl_context *ctx,
|
||||
const struct gl_framebuffer *read_fb,
|
||||
const struct gl_framebuffer *draw_fb,
|
||||
GLfloat *srcX0, GLfloat *srcY0,
|
||||
GLfloat *srcX1, GLfloat *srcY1,
|
||||
GLfloat *dstX0, GLfloat *dstY0,
|
||||
GLfloat *dstX1, GLfloat *dstY1,
|
||||
bool *mirror_x, bool *mirror_y)
|
||||
{
|
||||
*mirror_x = false;
|
||||
*mirror_y = false;
|
||||
|
||||
/* Detect if the blit needs to be mirrored */
|
||||
fixup_mirroring(mirror_x, srcX0, srcX1);
|
||||
fixup_mirroring(mirror_x, dstX0, dstX1);
|
||||
fixup_mirroring(mirror_y, srcY0, srcY1);
|
||||
fixup_mirroring(mirror_y, dstY0, dstY1);
|
||||
|
||||
/* Compute number of pixels to clip for each side of both rects. Return
|
||||
* early if we are going to clip everything away.
|
||||
*/
|
||||
float clip_src_x0;
|
||||
float clip_src_x1;
|
||||
float clip_src_y0;
|
||||
float clip_src_y1;
|
||||
float clip_dst_x0;
|
||||
float clip_dst_x1;
|
||||
float clip_dst_y0;
|
||||
float clip_dst_y1;
|
||||
|
||||
if (!compute_pixels_clipped(*srcX0, *srcY0, *srcX1, *srcY1,
|
||||
0, 0, read_fb->Width, read_fb->Height,
|
||||
&clip_src_x0, &clip_src_y0, &clip_src_x1, &clip_src_y1))
|
||||
return true;
|
||||
|
||||
if (!compute_pixels_clipped(*dstX0, *dstY0, *dstX1, *dstY1,
|
||||
draw_fb->_Xmin, draw_fb->_Ymin, draw_fb->_Xmax, draw_fb->_Ymax,
|
||||
&clip_dst_x0, &clip_dst_y0, &clip_dst_x1, &clip_dst_y1))
|
||||
return true;
|
||||
|
||||
/* When clipping any of the two rects we need to adjust the coordinates in
|
||||
* the other rect considering the scaling factor involved. To obtain the best
|
||||
* precision we want to make sure that we only clip once per side to avoid
|
||||
* accumulating errors due to the scaling adjustment.
|
||||
*
|
||||
* For example, if srcX0 and dstX0 need both to be clipped we want to avoid
|
||||
* the situation where we clip srcX0 first, then adjust dstX0 accordingly
|
||||
* but then we realize that the resulting dstX0 still needs to be clipped,
|
||||
* so we clip dstX0 and adjust srcX0 again. Because we are applying scaling
|
||||
* factors to adjust the coordinates in each clipping pass we lose some
|
||||
* precision and that can affect the results of the blorp blit operation
|
||||
* slightly. What we want to do here is detect the rect that we should
|
||||
* clip first for each side so that when we adjust the other rect we ensure
|
||||
* the resulting coordinate does not need to be clipped again.
|
||||
*
|
||||
* The code below implements this by comparing the number of pixels that
|
||||
* we need to clip for each side of both rects considering the scales
|
||||
* involved. For example, clip_src_x0 represents the number of pixels to be
|
||||
* clipped for the src rect's left side, so if clip_src_x0 = 5,
|
||||
* clip_dst_x0 = 4 and scaleX = 2 it means that we are clipping more from
|
||||
* the dst rect so we should clip dstX0 only and adjust srcX0. This is
|
||||
* because clipping 4 pixels in the dst is equivalent to clipping
|
||||
* 4 * 2 = 8 > 5 in the src.
|
||||
*/
|
||||
|
||||
if (*srcX0 == *srcX1 || *srcY0 == *srcY1
|
||||
|| *dstX0 == *dstX1 || *dstY0 == *dstY1)
|
||||
return true;
|
||||
|
||||
float scaleX = (float) (*srcX1 - *srcX0) / (*dstX1 - *dstX0);
|
||||
float scaleY = (float) (*srcY1 - *srcY0) / (*dstY1 - *dstY0);
|
||||
|
||||
/* Clip left side */
|
||||
clip_coordinates(*mirror_x,
|
||||
srcX0, dstX0, dstX1,
|
||||
clip_src_x0, clip_dst_x0, clip_dst_x1,
|
||||
scaleX, true);
|
||||
|
||||
/* Clip right side */
|
||||
clip_coordinates(*mirror_x,
|
||||
srcX1, dstX1, dstX0,
|
||||
clip_src_x1, clip_dst_x1, clip_dst_x0,
|
||||
scaleX, false);
|
||||
|
||||
/* Clip bottom side */
|
||||
clip_coordinates(*mirror_y,
|
||||
srcY0, dstY0, dstY1,
|
||||
clip_src_y0, clip_dst_y0, clip_dst_y1,
|
||||
scaleY, true);
|
||||
|
||||
/* Clip top side */
|
||||
clip_coordinates(*mirror_y,
|
||||
srcY1, dstY1, dstY0,
|
||||
clip_src_y1, clip_dst_y1, clip_dst_y0,
|
||||
scaleY, false);
|
||||
|
||||
/* Account for the fact that in the system framebuffer, the origin is at
|
||||
* the lower left.
|
||||
*/
|
||||
if (read_fb->FlipY) {
|
||||
GLint tmp = read_fb->Height - *srcY0;
|
||||
*srcY0 = read_fb->Height - *srcY1;
|
||||
*srcY1 = tmp;
|
||||
*mirror_y = !*mirror_y;
|
||||
}
|
||||
if (draw_fb->FlipY) {
|
||||
GLint tmp = draw_fb->Height - *dstY0;
|
||||
*dstY0 = draw_fb->Height - *dstY1;
|
||||
*dstY1 = tmp;
|
||||
*mirror_y = !*mirror_y;
|
||||
}
|
||||
|
||||
/* Check for invalid bounds
|
||||
* Can't blit for 0-dimensions
|
||||
*/
|
||||
return *srcX0 == *srcX1 || *srcY0 == *srcY1
|
||||
|| *dstX0 == *dstX1 || *dstY0 == *dstY1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if fast color clear supports the given clear color.
|
||||
*
|
||||
* Fast color clear can only clear to color values of 1.0 or 0.0. At the
|
||||
* moment we only support floating point, unorm, and snorm buffers.
|
||||
*/
|
||||
bool
|
||||
brw_is_color_fast_clear_compatible(struct brw_context *brw,
|
||||
const struct brw_mipmap_tree *mt,
|
||||
const union gl_color_union *color)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
const struct gl_context *ctx = &brw->ctx;
|
||||
|
||||
/* If we're mapping the render format to a different format than the
|
||||
* format we use for texturing then it is a bit questionable whether it
|
||||
* should be possible to use a fast clear. Although we only actually
|
||||
* render using a renderable format, without the override workaround it
|
||||
* wouldn't be possible to have a non-renderable surface in a fast clear
|
||||
* state so the hardware probably legitimately doesn't need to support
|
||||
* this case. At least on Gfx9 this really does seem to cause problems.
|
||||
*/
|
||||
if (devinfo->ver >= 9 &&
|
||||
brw_isl_format_for_mesa_format(mt->format) !=
|
||||
brw->mesa_to_isl_render_format[mt->format])
|
||||
return false;
|
||||
|
||||
const mesa_format format = _mesa_get_render_format(ctx, mt->format);
|
||||
if (_mesa_is_format_integer_color(format)) {
|
||||
if (devinfo->ver >= 8) {
|
||||
perf_debug("Integer fast clear not enabled for (%s)",
|
||||
_mesa_get_format_name(format));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (!_mesa_format_has_color_component(format, i)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (devinfo->ver < 9 &&
|
||||
color->f[i] != 0.0f && color->f[i] != 1.0f) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the given color to a bitfield suitable for ORing into DWORD 7 of
|
||||
* SURFACE_STATE (DWORD 12-15 on SKL+).
|
||||
*/
|
||||
union isl_color_value
|
||||
brw_meta_convert_fast_clear_color(const struct brw_context *brw,
|
||||
const struct brw_mipmap_tree *mt,
|
||||
const union gl_color_union *color)
|
||||
{
|
||||
union isl_color_value override_color = {
|
||||
.u32 = {
|
||||
color->ui[0],
|
||||
color->ui[1],
|
||||
color->ui[2],
|
||||
color->ui[3],
|
||||
},
|
||||
};
|
||||
|
||||
/* The sampler doesn't look at the format of the surface when the fast
|
||||
* clear color is used so we need to implement luminance, intensity and
|
||||
* missing components manually.
|
||||
*/
|
||||
switch (_mesa_get_format_base_format(mt->format)) {
|
||||
case GL_INTENSITY:
|
||||
override_color.u32[3] = override_color.u32[0];
|
||||
FALLTHROUGH;
|
||||
case GL_LUMINANCE:
|
||||
case GL_LUMINANCE_ALPHA:
|
||||
override_color.u32[1] = override_color.u32[0];
|
||||
override_color.u32[2] = override_color.u32[0];
|
||||
break;
|
||||
default:
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (!_mesa_format_has_color_component(mt->format, i))
|
||||
override_color.u32[i] = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
switch (_mesa_get_format_datatype(mt->format)) {
|
||||
case GL_UNSIGNED_NORMALIZED:
|
||||
for (int i = 0; i < 4; i++)
|
||||
override_color.f32[i] = SATURATE(override_color.f32[i]);
|
||||
break;
|
||||
|
||||
case GL_SIGNED_NORMALIZED:
|
||||
for (int i = 0; i < 4; i++)
|
||||
override_color.f32[i] = CLAMP(override_color.f32[i], -1.0f, 1.0f);
|
||||
break;
|
||||
|
||||
case GL_UNSIGNED_INT:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
unsigned bits = _mesa_get_format_bits(mt->format, GL_RED_BITS + i);
|
||||
if (bits < 32) {
|
||||
uint32_t max = (1u << bits) - 1;
|
||||
override_color.u32[i] = MIN2(override_color.u32[i], max);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case GL_INT:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
unsigned bits = _mesa_get_format_bits(mt->format, GL_RED_BITS + i);
|
||||
if (bits < 32) {
|
||||
int32_t max = (1 << (bits - 1)) - 1;
|
||||
int32_t min = -(1 << (bits - 1));
|
||||
override_color.i32[i] = CLAMP(override_color.i32[i], min, max);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case GL_FLOAT:
|
||||
if (!_mesa_is_format_signed(mt->format)) {
|
||||
for (int i = 0; i < 4; i++)
|
||||
override_color.f32[i] = MAX2(override_color.f32[i], 0.0f);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (!_mesa_format_has_color_component(mt->format, 3)) {
|
||||
if (_mesa_is_format_integer_color(mt->format))
|
||||
override_color.u32[3] = 1;
|
||||
else
|
||||
override_color.f32[3] = 1.0f;
|
||||
}
|
||||
|
||||
/* Handle linear to SRGB conversion */
|
||||
if (brw->ctx.Color.sRGBEnabled &&
|
||||
_mesa_get_srgb_format_linear(mt->format) != mt->format) {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
override_color.f32[i] =
|
||||
util_format_linear_to_srgb_float(override_color.f32[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return override_color;
|
||||
}
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2014 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_META_UTIL_H
|
||||
#define BRW_META_UTIL_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "main/mtypes.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
bool
|
||||
brw_meta_mirror_clip_and_scissor(const struct gl_context *ctx,
|
||||
const struct gl_framebuffer *read_fb,
|
||||
const struct gl_framebuffer *draw_fb,
|
||||
GLfloat *srcX0, GLfloat *srcY0,
|
||||
GLfloat *srcX1, GLfloat *srcY1,
|
||||
GLfloat *dstX0, GLfloat *dstY0,
|
||||
GLfloat *dstX1, GLfloat *dstY1,
|
||||
bool *mirror_x, bool *mirror_y);
|
||||
|
||||
union isl_color_value
|
||||
brw_meta_convert_fast_clear_color(const struct brw_context *brw,
|
||||
const struct brw_mipmap_tree *mt,
|
||||
const union gl_color_union *color);
|
||||
|
||||
bool
|
||||
brw_is_color_fast_clear_compatible(struct brw_context *brw,
|
||||
const struct brw_mipmap_tree *mt,
|
||||
const union gl_color_union *color);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BRW_META_UTIL_H */
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,741 +0,0 @@
|
|||
/*
|
||||
* Copyright 2006 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** @file intel_mipmap_tree.h
|
||||
*
|
||||
* This file defines the structure that wraps a BO and describes how the
|
||||
* mipmap levels and slices of a texture are laid out.
|
||||
*
|
||||
* The hardware has a fixed layout of a texture depending on parameters such
|
||||
* as the target/type (2D, 3D, CUBE), width, height, pitch, and number of
|
||||
* mipmap levels. The individual level/layer slices are each 2D rectangles of
|
||||
* pixels at some x/y offset from the start of the brw_bo.
|
||||
*
|
||||
* Original OpenGL allowed texture miplevels to be specified in arbitrary
|
||||
* order, and a texture may change size over time. Thus, each
|
||||
* brw_texture_image has a reference to a miptree that contains the pixel
|
||||
* data sized appropriately for it, which will later be referenced by/copied
|
||||
* to the brw_texture_object at draw time (brw_finalize_mipmap_tree()) so
|
||||
* that there's a single miptree for the complete texture.
|
||||
*/
|
||||
|
||||
#ifndef BRW_MIPMAP_TREE_H
|
||||
#define BRW_MIPMAP_TREE_H
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "isl/isl.h"
|
||||
#include "blorp/blorp.h"
|
||||
#include "brw_bufmgr.h"
|
||||
#include "brw_context.h"
|
||||
#include <GL/internal/dri_interface.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct brw_context;
|
||||
struct brw_renderbuffer;
|
||||
|
||||
struct brw_texture_image;
|
||||
|
||||
/**
|
||||
* This bit extends the set of GL_MAP_*_BIT enums.
|
||||
*
|
||||
* When calling brw_miptree_map() on an ETC-transcoded-to-RGB miptree or a
|
||||
* depthstencil-split-to-separate-stencil miptree, we'll normally make a
|
||||
* temporary and recreate the kind of data requested by Mesa core, since we're
|
||||
* satisfying some glGetTexImage() request or something.
|
||||
*
|
||||
* However, occasionally you want to actually map the miptree's current data
|
||||
* without transcoding back. This flag to brw_miptree_map() gets you that.
|
||||
*/
|
||||
#define BRW_MAP_DIRECT_BIT 0x80000000
|
||||
|
||||
struct brw_miptree_map {
|
||||
/** Bitfield of GL_MAP_*_BIT and BRW_MAP_*_BIT. */
|
||||
GLbitfield mode;
|
||||
/** Region of interest for the map. */
|
||||
int x, y, w, h;
|
||||
/** Possibly malloced temporary buffer for the mapping. */
|
||||
void *buffer;
|
||||
/** Possible pointer to a temporary linear miptree for the mapping. */
|
||||
struct brw_mipmap_tree *linear_mt;
|
||||
/** Pointer to the start of (map_x, map_y) returned by the mapping. */
|
||||
void *ptr;
|
||||
/** Stride of the mapping. */
|
||||
int stride;
|
||||
|
||||
void (*unmap)(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt,
|
||||
struct brw_miptree_map *map,
|
||||
unsigned int level,
|
||||
unsigned int slice);
|
||||
};
|
||||
|
||||
/**
|
||||
* Describes the location of each texture image within a miptree.
|
||||
*/
|
||||
struct brw_mipmap_level
|
||||
{
|
||||
/** Offset to this miptree level, used in computing x_offset. */
|
||||
GLuint level_x;
|
||||
/** Offset to this miptree level, used in computing y_offset. */
|
||||
GLuint level_y;
|
||||
|
||||
/**
|
||||
* \brief Is HiZ enabled for this level?
|
||||
*
|
||||
* If \c mt->level[l].has_hiz is set, then (1) \c mt->hiz_mt has been
|
||||
* allocated and (2) the HiZ memory for the slices in this level reside at
|
||||
* \c mt->hiz_mt->level[l].
|
||||
*/
|
||||
bool has_hiz;
|
||||
|
||||
/**
|
||||
* \brief List of 2D images in this mipmap level.
|
||||
*
|
||||
* This may be a list of cube faces, array slices in 2D array texture, or
|
||||
* layers in a 3D texture. The list's length is \c depth.
|
||||
*/
|
||||
struct brw_mipmap_slice {
|
||||
/**
|
||||
* Mapping information. Persistent for the duration of
|
||||
* brw_miptree_map/unmap on this slice.
|
||||
*/
|
||||
struct brw_miptree_map *map;
|
||||
} *slice;
|
||||
};
|
||||
|
||||
/**
|
||||
* Miptree aux buffer. These buffers are associated with a miptree, but the
|
||||
* format is managed by the hardware.
|
||||
*
|
||||
* For Gfx7+, we always give the hardware the start of the buffer, and let it
|
||||
* handle all accesses to the buffer. Therefore we don't need the full miptree
|
||||
* layout structure for this buffer.
|
||||
*/
|
||||
struct brw_miptree_aux_buffer
|
||||
{
|
||||
struct isl_surf surf;
|
||||
|
||||
/**
|
||||
* Buffer object containing the pixel data.
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
|
||||
* @see 3DSTATE_HIER_DEPTH_BUFFER.AuxiliarySurfaceBaseAddress
|
||||
*/
|
||||
struct brw_bo *bo;
|
||||
|
||||
/**
|
||||
* Offset into bo where the surface starts.
|
||||
*
|
||||
* @see brw_mipmap_aux_buffer::bo
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
|
||||
* @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
|
||||
* @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
|
||||
* @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
|
||||
*/
|
||||
uint32_t offset;
|
||||
|
||||
/**
|
||||
* Buffer object containing the indirect clear color.
|
||||
*
|
||||
* @see create_ccs_buf_for_image
|
||||
* @see RENDER_SURFACE_STATE.ClearValueAddress
|
||||
*/
|
||||
struct brw_bo *clear_color_bo;
|
||||
|
||||
/**
|
||||
* Offset into bo where the clear color can be found.
|
||||
*
|
||||
* @see create_ccs_buf_for_image
|
||||
* @see RENDER_SURFACE_STATE.ClearValueAddress
|
||||
*/
|
||||
uint32_t clear_color_offset;
|
||||
};
|
||||
|
||||
struct brw_mipmap_tree
|
||||
{
|
||||
struct isl_surf surf;
|
||||
|
||||
/**
|
||||
* Buffer object containing the surface.
|
||||
*
|
||||
* @see brw_mipmap_tree::offset
|
||||
* @see RENDER_SURFACE_STATE.SurfaceBaseAddress
|
||||
* @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
|
||||
* @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
|
||||
* @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
|
||||
* @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
|
||||
*/
|
||||
struct brw_bo *bo;
|
||||
|
||||
/**
|
||||
* @brief One of GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, etc.
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.SurfaceType
|
||||
* @see RENDER_SURFACE_STATE.SurfaceArray
|
||||
* @see 3DSTATE_DEPTH_BUFFER.SurfaceType
|
||||
*/
|
||||
GLenum target;
|
||||
|
||||
/**
|
||||
* Generally, this is just the same as the gl_texture_image->TexFormat or
|
||||
* gl_renderbuffer->Format.
|
||||
*
|
||||
* However, for textures and renderbuffers with packed depth/stencil formats
|
||||
* on hardware where we want or need to use separate stencil, there will be
|
||||
* two miptrees for storing the data. If the depthstencil texture or rb is
|
||||
* MESA_FORMAT_Z32_FLOAT_S8X24_UINT, then mt->format will be
|
||||
* MESA_FORMAT_Z_FLOAT32, otherwise for MESA_FORMAT_Z24_UNORM_S8_UINT objects it will be
|
||||
* MESA_FORMAT_Z24_UNORM_X8_UINT.
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.SurfaceFormat
|
||||
* @see 3DSTATE_DEPTH_BUFFER.SurfaceFormat
|
||||
*/
|
||||
mesa_format format;
|
||||
|
||||
GLuint first_level;
|
||||
GLuint last_level;
|
||||
|
||||
/** Bytes per pixel (or bytes per block if compressed) */
|
||||
GLuint cpp;
|
||||
|
||||
bool compressed;
|
||||
|
||||
/* Includes image offset tables: */
|
||||
struct brw_mipmap_level level[MAX_TEXTURE_LEVELS];
|
||||
|
||||
/**
|
||||
* Offset into bo where the surface starts.
|
||||
*
|
||||
* @see brw_mipmap_tree::bo
|
||||
*
|
||||
* @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
|
||||
* @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
|
||||
* @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
|
||||
* @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
|
||||
*/
|
||||
uint32_t offset;
|
||||
|
||||
/**
|
||||
* \brief The type of auxiliary compression used by this miptree.
|
||||
*
|
||||
* This describes the type of auxiliary compression that is intended to be
|
||||
* used by this miptree. An aux usage of ISL_AUX_USAGE_NONE means that
|
||||
* auxiliary compression is permanently disabled. An aux usage other than
|
||||
* ISL_AUX_USAGE_NONE does not imply that the auxiliary buffer has actually
|
||||
* been allocated nor does it imply that auxiliary compression will always
|
||||
* be enabled for this surface. For instance, with CCS_D, we may allocate
|
||||
* the CCS on-the-fly and it may not be used for texturing if the miptree
|
||||
* is fully resolved.
|
||||
*/
|
||||
enum isl_aux_usage aux_usage;
|
||||
|
||||
/**
|
||||
* \brief Whether or not this miptree supports fast clears.
|
||||
*/
|
||||
bool supports_fast_clear;
|
||||
|
||||
/**
|
||||
* \brief Maps miptree slices to their current aux state
|
||||
*
|
||||
* This two-dimensional array is indexed as [level][layer] and stores an
|
||||
* aux state for each slice.
|
||||
*/
|
||||
enum isl_aux_state **aux_state;
|
||||
|
||||
/**
|
||||
* \brief Stencil miptree for depthstencil textures.
|
||||
*
|
||||
* This miptree is used for depthstencil textures and renderbuffers that
|
||||
* require separate stencil. It always has the true copy of the stencil
|
||||
* bits, regardless of mt->format.
|
||||
*
|
||||
* \see 3DSTATE_STENCIL_BUFFER
|
||||
* \see brw_miptree_map_depthstencil()
|
||||
* \see brw_miptree_unmap_depthstencil()
|
||||
*/
|
||||
struct brw_mipmap_tree *stencil_mt;
|
||||
|
||||
/**
|
||||
* \brief Shadow miptree for sampling when the main isn't supported by HW.
|
||||
*
|
||||
* To workaround various sampler bugs and limitations, we blit the main
|
||||
* texture into a new texture that can be sampled.
|
||||
*
|
||||
* This miptree may be used for:
|
||||
* - Stencil texturing (pre-BDW) as required by GL_ARB_stencil_texturing.
|
||||
* - To store the decompressed ETC/EAC data in case we emulate the ETC
|
||||
* compression on Gen 7 or earlier GPUs.
|
||||
*/
|
||||
struct brw_mipmap_tree *shadow_mt;
|
||||
bool shadow_needs_update;
|
||||
|
||||
/**
|
||||
* \brief CCS, MCS, or HiZ auxiliary buffer.
|
||||
*
|
||||
* NULL if no auxiliary buffer is in use for this surface.
|
||||
*
|
||||
* For single-sampled color miptrees:
|
||||
* This buffer contains the Color Control Surface, which stores the
|
||||
* necessary information to implement lossless color compression (CCS_E)
|
||||
* and "fast color clear" (CCS_D) behaviour.
|
||||
*
|
||||
* For multi-sampled color miptrees:
|
||||
* This buffer contains the Multisample Control Surface, which stores the
|
||||
* necessary information to implement compressed MSAA
|
||||
* (INTEL_MSAA_FORMAT_CMS).
|
||||
*
|
||||
* For depth miptrees:
|
||||
* This buffer contains the Hierarchical Depth Buffer, which stores the
|
||||
* necessary information to implement lossless depth compression and fast
|
||||
* depth clear behavior.
|
||||
*
|
||||
* To determine if HiZ is enabled, do not check this pointer. Instead,
|
||||
* use brw_miptree_level_has_hiz().
|
||||
*/
|
||||
struct brw_miptree_aux_buffer *aux_buf;
|
||||
|
||||
/**
|
||||
* Planes 1 and 2 in case this is a planar surface.
|
||||
*/
|
||||
struct brw_mipmap_tree *plane[2];
|
||||
|
||||
/**
|
||||
* Fast clear color for this surface. For depth surfaces, the clear value
|
||||
* is stored as a float32 in the red component.
|
||||
*/
|
||||
union isl_color_value fast_clear_color;
|
||||
|
||||
/**
|
||||
* For external surfaces, this is DRM format modifier that was used to
|
||||
* create or import the surface. For internal surfaces, this will always
|
||||
* be DRM_FORMAT_MOD_INVALID.
|
||||
*/
|
||||
uint64_t drm_modifier;
|
||||
|
||||
/* These are also refcounted:
|
||||
*/
|
||||
GLuint refcount;
|
||||
};
|
||||
|
||||
bool
|
||||
brw_miptree_alloc_aux(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt);
|
||||
|
||||
enum brw_miptree_create_flags {
|
||||
/** No miptree create flags */
|
||||
MIPTREE_CREATE_DEFAULT = 0,
|
||||
|
||||
/** Miptree creation should try to allocate a currently busy BO
|
||||
*
|
||||
* This may be advantageous if we know the next thing to touch the BO will
|
||||
* be the GPU because the BO will likely already be in the GTT and maybe
|
||||
* even in some caches. If there is a chance that the next thing to touch
|
||||
* the miptree BO will be the CPU, this flag should not be set.
|
||||
*/
|
||||
MIPTREE_CREATE_BUSY = 1 << 0,
|
||||
|
||||
/** Create the miptree with auxiliary compression disabled
|
||||
*
|
||||
* This does not prevent the caller of brw_miptree_create from coming
|
||||
* along later and turning auxiliary compression back on but it does mean
|
||||
* that the miptree will be created with mt->aux_usage == NONE.
|
||||
*/
|
||||
MIPTREE_CREATE_NO_AUX = 1 << 1,
|
||||
};
|
||||
|
||||
struct brw_mipmap_tree *brw_miptree_create(struct brw_context *brw,
|
||||
GLenum target,
|
||||
mesa_format format,
|
||||
GLuint first_level,
|
||||
GLuint last_level,
|
||||
GLuint width0,
|
||||
GLuint height0,
|
||||
GLuint depth0,
|
||||
GLuint num_samples,
|
||||
enum brw_miptree_create_flags flags);
|
||||
|
||||
struct brw_mipmap_tree *
|
||||
brw_miptree_create_for_bo(struct brw_context *brw,
|
||||
struct brw_bo *bo,
|
||||
mesa_format format,
|
||||
uint32_t offset,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t depth,
|
||||
int pitch,
|
||||
enum isl_tiling tiling,
|
||||
enum brw_miptree_create_flags flags);
|
||||
|
||||
struct brw_mipmap_tree *
|
||||
brw_miptree_create_for_dri_image(struct brw_context *brw,
|
||||
__DRIimage *image,
|
||||
GLenum target,
|
||||
mesa_format format,
|
||||
bool allow_internal_aux);
|
||||
|
||||
bool
|
||||
brw_update_winsys_renderbuffer_miptree(struct brw_context *intel,
|
||||
struct brw_renderbuffer *irb,
|
||||
struct brw_mipmap_tree *singlesample_mt,
|
||||
uint32_t width, uint32_t height,
|
||||
uint32_t pitch);
|
||||
|
||||
/**
|
||||
* Create a miptree appropriate as the storage for a non-texture renderbuffer.
|
||||
* The miptree has the following properties:
|
||||
* - The target is GL_TEXTURE_2D.
|
||||
* - There are no levels other than the base level 0.
|
||||
* - Depth is 1.
|
||||
*/
|
||||
struct brw_mipmap_tree*
|
||||
brw_miptree_create_for_renderbuffer(struct brw_context *brw,
|
||||
mesa_format format,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t num_samples);
|
||||
|
||||
mesa_format
|
||||
brw_depth_format_for_depthstencil_format(mesa_format format);
|
||||
|
||||
mesa_format
|
||||
brw_lower_compressed_format(struct brw_context *brw, mesa_format format);
|
||||
|
||||
unsigned
|
||||
brw_get_num_logical_layers(const struct brw_mipmap_tree *mt, unsigned level);
|
||||
|
||||
/** \brief Assert that the level and layer are valid for the miptree. */
|
||||
void
|
||||
brw_miptree_check_level_layer(const struct brw_mipmap_tree *mt,
|
||||
uint32_t level,
|
||||
uint32_t layer);
|
||||
|
||||
void brw_miptree_reference(struct brw_mipmap_tree **dst,
|
||||
struct brw_mipmap_tree *src);
|
||||
|
||||
void brw_miptree_release(struct brw_mipmap_tree **mt);
|
||||
|
||||
/* Check if an image fits an existing mipmap tree layout
|
||||
*/
|
||||
bool brw_miptree_match_image(struct brw_mipmap_tree *mt,
|
||||
struct gl_texture_image *image);
|
||||
|
||||
void
|
||||
brw_miptree_get_image_offset(const struct brw_mipmap_tree *mt,
|
||||
GLuint level, GLuint slice,
|
||||
GLuint *x, GLuint *y);
|
||||
|
||||
enum isl_surf_dim
|
||||
get_isl_surf_dim(GLenum target);
|
||||
|
||||
enum isl_dim_layout
|
||||
get_isl_dim_layout(const struct intel_device_info *devinfo,
|
||||
enum isl_tiling tiling, GLenum target);
|
||||
|
||||
void
|
||||
brw_get_image_dims(struct gl_texture_image *image,
|
||||
int *width, int *height, int *depth);
|
||||
|
||||
uint32_t
|
||||
brw_miptree_get_tile_offsets(const struct brw_mipmap_tree *mt,
|
||||
GLuint level, GLuint slice,
|
||||
uint32_t *tile_x,
|
||||
uint32_t *tile_y);
|
||||
uint32_t
|
||||
brw_miptree_get_aligned_offset(const struct brw_mipmap_tree *mt,
|
||||
uint32_t x, uint32_t y);
|
||||
|
||||
void
|
||||
brw_miptree_copy_slice(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *src_mt,
|
||||
unsigned src_level, unsigned src_layer,
|
||||
struct brw_mipmap_tree *dst_mt,
|
||||
unsigned dst_level, unsigned dst_layer);
|
||||
|
||||
void
|
||||
brw_miptree_copy_teximage(struct brw_context *brw,
|
||||
struct brw_texture_image *brw_image,
|
||||
struct brw_mipmap_tree *dst_mt);
|
||||
|
||||
/**
|
||||
* \name Miptree HiZ functions
|
||||
* \{
|
||||
*
|
||||
* It is safe to call the "slice_set_need_resolve" and "slice_resolve"
|
||||
* functions on a miptree without HiZ. In that case, each function is a no-op.
|
||||
*/
|
||||
|
||||
bool
|
||||
brw_miptree_level_has_hiz(const struct brw_mipmap_tree *mt, uint32_t level);
|
||||
|
||||
/**\}*/
|
||||
|
||||
bool
|
||||
brw_miptree_has_color_unresolved(const struct brw_mipmap_tree *mt,
|
||||
unsigned start_level, unsigned num_levels,
|
||||
unsigned start_layer, unsigned num_layers);
|
||||
|
||||
|
||||
#define INTEL_REMAINING_LAYERS UINT32_MAX
|
||||
#define INTEL_REMAINING_LEVELS UINT32_MAX
|
||||
|
||||
/** Prepare a miptree for access
|
||||
*
|
||||
* This function should be called prior to any access to miptree in order to
|
||||
* perform any needed resolves.
|
||||
*
|
||||
* \param[in] start_level The first mip level to be accessed
|
||||
*
|
||||
* \param[in] num_levels The number of miplevels to be accessed or
|
||||
* INTEL_REMAINING_LEVELS to indicate every level
|
||||
* above start_level will be accessed
|
||||
*
|
||||
* \param[in] start_layer The first array slice or 3D layer to be accessed
|
||||
*
|
||||
* \param[in] num_layers The number of array slices or 3D layers be
|
||||
* accessed or INTEL_REMAINING_LAYERS to indicate
|
||||
* every layer above start_layer will be accessed
|
||||
*
|
||||
* \param[in] aux_supported Whether or not the access will support the
|
||||
* miptree's auxiliary compression format; this
|
||||
* must be false for uncompressed miptrees
|
||||
*
|
||||
* \param[in] fast_clear_supported Whether or not the access will support
|
||||
* fast clears in the miptree's auxiliary
|
||||
* compression format
|
||||
*/
|
||||
void
|
||||
brw_miptree_prepare_access(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt,
|
||||
uint32_t start_level, uint32_t num_levels,
|
||||
uint32_t start_layer, uint32_t num_layers,
|
||||
enum isl_aux_usage aux_usage,
|
||||
bool fast_clear_supported);
|
||||
|
||||
/** Complete a write operation
|
||||
*
|
||||
* This function should be called after any operation writes to a miptree.
|
||||
* This will update the miptree's compression state so that future resolves
|
||||
* happen correctly. Technically, this function can be called before the
|
||||
* write occurs but the caller must ensure that they don't interlace
|
||||
* brw_miptree_prepare_access and brw_miptree_finish_write calls to
|
||||
* overlapping layer/level ranges.
|
||||
*
|
||||
* \param[in] level The mip level that was written
|
||||
*
|
||||
* \param[in] start_layer The first array slice or 3D layer written
|
||||
*
|
||||
* \param[in] num_layers The number of array slices or 3D layers
|
||||
* written or INTEL_REMAINING_LAYERS to indicate
|
||||
* every layer above start_layer was written
|
||||
*
|
||||
* \param[in] written_with_aux Whether or not the write was done with
|
||||
* auxiliary compression enabled
|
||||
*/
|
||||
void
|
||||
brw_miptree_finish_write(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt, uint32_t level,
|
||||
uint32_t start_layer, uint32_t num_layers,
|
||||
enum isl_aux_usage aux_usage);
|
||||
|
||||
/** Get the auxiliary compression state of a miptree slice */
|
||||
enum isl_aux_state
|
||||
brw_miptree_get_aux_state(const struct brw_mipmap_tree *mt,
|
||||
uint32_t level, uint32_t layer);
|
||||
|
||||
/** Set the auxiliary compression state of a miptree slice range
|
||||
*
|
||||
* This function directly sets the auxiliary compression state of a slice
|
||||
* range of a miptree. It only modifies data structures and does not do any
|
||||
* resolves. This should only be called by code which directly performs
|
||||
* compression operations such as fast clears and resolves. Most code should
|
||||
* use brw_miptree_prepare_access or brw_miptree_finish_write.
|
||||
*/
|
||||
void
|
||||
brw_miptree_set_aux_state(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt, uint32_t level,
|
||||
uint32_t start_layer, uint32_t num_layers,
|
||||
enum isl_aux_state aux_state);
|
||||
|
||||
/**
|
||||
* Prepare a miptree for raw access
|
||||
*
|
||||
* This helper prepares the miptree for access that knows nothing about any
|
||||
* sort of compression whatsoever. This is useful when mapping the surface or
|
||||
* using it with the blitter.
|
||||
*/
|
||||
static inline void
|
||||
brw_miptree_access_raw(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt,
|
||||
uint32_t level, uint32_t layer,
|
||||
bool write)
|
||||
{
|
||||
brw_miptree_prepare_access(brw, mt, level, 1, layer, 1,
|
||||
ISL_AUX_USAGE_NONE, false);
|
||||
if (write)
|
||||
brw_miptree_finish_write(brw, mt, level, layer, 1, ISL_AUX_USAGE_NONE);
|
||||
}
|
||||
|
||||
enum isl_aux_usage
|
||||
brw_miptree_texture_aux_usage(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt,
|
||||
enum isl_format view_format,
|
||||
enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits);
|
||||
void
|
||||
brw_miptree_prepare_texture(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt,
|
||||
enum isl_format view_format,
|
||||
uint32_t start_level, uint32_t num_levels,
|
||||
uint32_t start_layer, uint32_t num_layers,
|
||||
enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits);
|
||||
void
|
||||
brw_miptree_prepare_image(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt);
|
||||
|
||||
enum isl_aux_usage
|
||||
brw_miptree_render_aux_usage(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt,
|
||||
enum isl_format render_format,
|
||||
bool blend_enabled,
|
||||
bool draw_aux_disabled);
|
||||
void
|
||||
brw_miptree_prepare_render(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt, uint32_t level,
|
||||
uint32_t start_layer, uint32_t layer_count,
|
||||
enum isl_aux_usage aux_usage);
|
||||
void
|
||||
brw_miptree_finish_render(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt, uint32_t level,
|
||||
uint32_t start_layer, uint32_t layer_count,
|
||||
enum isl_aux_usage aux_usage);
|
||||
void
|
||||
brw_miptree_prepare_depth(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt, uint32_t level,
|
||||
uint32_t start_layer, uint32_t layer_count);
|
||||
void
|
||||
brw_miptree_finish_depth(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt, uint32_t level,
|
||||
uint32_t start_layer, uint32_t layer_count,
|
||||
bool depth_written);
|
||||
void
|
||||
brw_miptree_prepare_external(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt);
|
||||
void
|
||||
brw_miptree_finish_external(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt);
|
||||
|
||||
void
|
||||
brw_miptree_make_shareable(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt);
|
||||
|
||||
void
|
||||
brw_miptree_updownsample(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *src,
|
||||
struct brw_mipmap_tree *dst);
|
||||
|
||||
void
|
||||
brw_update_r8stencil(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt);
|
||||
|
||||
void
|
||||
brw_miptree_map(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt,
|
||||
unsigned int level,
|
||||
unsigned int slice,
|
||||
unsigned int x,
|
||||
unsigned int y,
|
||||
unsigned int w,
|
||||
unsigned int h,
|
||||
GLbitfield mode,
|
||||
void **out_ptr,
|
||||
ptrdiff_t *out_stride);
|
||||
|
||||
void
|
||||
brw_miptree_unmap(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt,
|
||||
unsigned int level,
|
||||
unsigned int slice);
|
||||
|
||||
bool
|
||||
brw_miptree_sample_with_hiz(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt);
|
||||
|
||||
bool
|
||||
brw_miptree_set_clear_color(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt,
|
||||
union isl_color_value clear_color);
|
||||
|
||||
/* Get a clear color suitable for filling out an ISL surface state. */
|
||||
union isl_color_value
|
||||
brw_miptree_get_clear_color(const struct brw_mipmap_tree *mt,
|
||||
struct brw_bo **clear_color_bo,
|
||||
uint64_t *clear_color_offset);
|
||||
|
||||
|
||||
static inline int
|
||||
brw_miptree_blt_pitch(struct brw_mipmap_tree *mt)
|
||||
{
|
||||
int pitch = mt->surf.row_pitch_B;
|
||||
if (mt->surf.tiling != ISL_TILING_LINEAR)
|
||||
pitch /= 4;
|
||||
return pitch;
|
||||
}
|
||||
|
||||
isl_memcpy_type
|
||||
brw_miptree_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type,
|
||||
uint32_t *cpp);
|
||||
|
||||
static inline bool
|
||||
brw_miptree_needs_fake_etc(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
bool is_etc = _mesa_is_format_etc2(mt->format) ||
|
||||
(mt->format == MESA_FORMAT_ETC1_RGB8);
|
||||
|
||||
return devinfo->ver < 8 && devinfo->platform != INTEL_PLATFORM_BYT && is_etc;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
brw_miptree_has_etc_shadow(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt)
|
||||
{
|
||||
return brw_miptree_needs_fake_etc(brw, mt) && mt->shadow_mt;
|
||||
}
|
||||
|
||||
void
|
||||
brw_miptree_update_etc_shadow_levels(struct brw_context *brw,
|
||||
struct brw_mipmap_tree *mt);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -1,728 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "brw_batch.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_defines.h"
|
||||
#include "compiler/brw_eu_defines.h"
|
||||
|
||||
#include "main/framebuffer.h"
|
||||
#include "main/fbobject.h"
|
||||
#include "main/format_utils.h"
|
||||
#include "main/glformats.h"
|
||||
|
||||
/**
|
||||
* Upload pointers to the per-stage state.
|
||||
*
|
||||
* The state pointers in this packet are all relative to the general state
|
||||
* base address set by CMD_STATE_BASE_ADDRESS, which is 0.
|
||||
*/
|
||||
static void
|
||||
upload_pipelined_state_pointers(struct brw_context *brw)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
if (devinfo->ver == 5) {
|
||||
/* Need to flush before changing clip max threads for errata. */
|
||||
BEGIN_BATCH(1);
|
||||
OUT_BATCH(MI_FLUSH);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
BEGIN_BATCH(7);
|
||||
OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
|
||||
OUT_RELOC(brw->batch.state.bo, 0, brw->vs.base.state_offset);
|
||||
if (brw->ff_gs.prog_active)
|
||||
OUT_RELOC(brw->batch.state.bo, 0, brw->ff_gs.state_offset | 1);
|
||||
else
|
||||
OUT_BATCH(0);
|
||||
OUT_RELOC(brw->batch.state.bo, 0, brw->clip.state_offset | 1);
|
||||
OUT_RELOC(brw->batch.state.bo, 0, brw->sf.state_offset);
|
||||
OUT_RELOC(brw->batch.state.bo, 0, brw->wm.base.state_offset);
|
||||
OUT_RELOC(brw->batch.state.bo, 0, brw->cc.state_offset);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
brw->ctx.NewDriverState |= BRW_NEW_PSP;
|
||||
}
|
||||
|
||||
static void
|
||||
upload_psp_urb_cbs(struct brw_context *brw)
|
||||
{
|
||||
upload_pipelined_state_pointers(brw);
|
||||
brw_upload_urb_fence(brw);
|
||||
brw_upload_cs_urb_state(brw);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_psp_urb_cbs = {
|
||||
.dirty = {
|
||||
.mesa = 0,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_FF_GS_PROG_DATA |
|
||||
BRW_NEW_GFX4_UNIT_STATE |
|
||||
BRW_NEW_STATE_BASE_ADDRESS |
|
||||
BRW_NEW_URB_FENCE,
|
||||
},
|
||||
.emit = upload_psp_urb_cbs,
|
||||
};
|
||||
|
||||
uint32_t
|
||||
brw_depthbuffer_format(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct gl_framebuffer *fb = ctx->DrawBuffer;
|
||||
struct brw_renderbuffer *drb = brw_get_renderbuffer(fb, BUFFER_DEPTH);
|
||||
struct brw_renderbuffer *srb;
|
||||
|
||||
if (!drb &&
|
||||
(srb = brw_get_renderbuffer(fb, BUFFER_STENCIL)) &&
|
||||
!srb->mt->stencil_mt &&
|
||||
(brw_rb_format(srb) == MESA_FORMAT_Z24_UNORM_S8_UINT ||
|
||||
brw_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_S8X24_UINT)) {
|
||||
drb = srb;
|
||||
}
|
||||
|
||||
if (!drb)
|
||||
return BRW_DEPTHFORMAT_D32_FLOAT;
|
||||
|
||||
return brw_depth_format(brw, drb->mt->format);
|
||||
}
|
||||
|
||||
static struct brw_mipmap_tree *
|
||||
get_stencil_miptree(struct brw_renderbuffer *irb)
|
||||
{
|
||||
if (!irb)
|
||||
return NULL;
|
||||
if (irb->mt->stencil_mt)
|
||||
return irb->mt->stencil_mt;
|
||||
return brw_renderbuffer_get_mt(irb);
|
||||
}
|
||||
|
||||
static bool
|
||||
rebase_depth_stencil(struct brw_context *brw, struct brw_renderbuffer *irb,
|
||||
bool invalidate)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
uint32_t tile_mask_x = 0, tile_mask_y = 0;
|
||||
|
||||
isl_get_tile_masks(irb->mt->surf.tiling, irb->mt->cpp,
|
||||
&tile_mask_x, &tile_mask_y);
|
||||
assert(!brw_miptree_level_has_hiz(irb->mt, irb->mt_level));
|
||||
|
||||
uint32_t tile_x = irb->draw_x & tile_mask_x;
|
||||
uint32_t tile_y = irb->draw_y & tile_mask_y;
|
||||
|
||||
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
|
||||
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
|
||||
* Coordinate Offset X/Y":
|
||||
*
|
||||
* "The 3 LSBs of both offsets must be zero to ensure correct
|
||||
* alignment"
|
||||
*/
|
||||
bool rebase = tile_x & 7 || tile_y & 7;
|
||||
|
||||
/* We didn't even have intra-tile offsets before g45. */
|
||||
rebase |= (!devinfo->has_surface_tile_offset && (tile_x || tile_y));
|
||||
|
||||
if (rebase) {
|
||||
perf_debug("HW workaround: blitting depth level %d to a temporary "
|
||||
"to fix alignment (depth tile offset %d,%d)\n",
|
||||
irb->mt_level, tile_x, tile_y);
|
||||
brw_renderbuffer_move_to_temp(brw, irb, invalidate);
|
||||
|
||||
/* There is now only single slice miptree. */
|
||||
brw->depthstencil.tile_x = 0;
|
||||
brw->depthstencil.tile_y = 0;
|
||||
brw->depthstencil.depth_offset = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* While we just tried to get everything aligned, we may have failed to do
|
||||
* so in the case of rendering to array or 3D textures, where nonzero faces
|
||||
* will still have an offset post-rebase. At least give an informative
|
||||
* warning.
|
||||
*/
|
||||
WARN_ONCE((tile_x & 7) || (tile_y & 7),
|
||||
"Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
|
||||
"Truncating offset (%u:%u), bad rendering may occur.\n",
|
||||
tile_x, tile_y);
|
||||
tile_x &= ~7;
|
||||
tile_y &= ~7;
|
||||
|
||||
brw->depthstencil.tile_x = tile_x;
|
||||
brw->depthstencil.tile_y = tile_y;
|
||||
brw->depthstencil.depth_offset = brw_miptree_get_aligned_offset(
|
||||
irb->mt,
|
||||
irb->draw_x & ~tile_mask_x,
|
||||
irb->draw_y & ~tile_mask_y);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
brw_workaround_depthstencil_alignment(struct brw_context *brw,
|
||||
GLbitfield clear_mask)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct gl_framebuffer *fb = ctx->DrawBuffer;
|
||||
struct brw_renderbuffer *depth_irb = brw_get_renderbuffer(fb, BUFFER_DEPTH);
|
||||
struct brw_renderbuffer *stencil_irb = brw_get_renderbuffer(fb, BUFFER_STENCIL);
|
||||
struct brw_mipmap_tree *depth_mt = NULL;
|
||||
bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH;
|
||||
bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL;
|
||||
|
||||
if (depth_irb)
|
||||
depth_mt = depth_irb->mt;
|
||||
|
||||
/* Initialize brw->depthstencil to 'nop' workaround state.
|
||||
*/
|
||||
brw->depthstencil.tile_x = 0;
|
||||
brw->depthstencil.tile_y = 0;
|
||||
brw->depthstencil.depth_offset = 0;
|
||||
|
||||
/* Gfx6+ doesn't require the workarounds, since we always program the
|
||||
* surface state at the start of the whole surface.
|
||||
*/
|
||||
if (devinfo->ver >= 6)
|
||||
return;
|
||||
|
||||
/* Check if depth buffer is in depth/stencil format. If so, then it's only
|
||||
* safe to invalidate it if we're also clearing stencil.
|
||||
*/
|
||||
if (depth_irb && invalidate_depth &&
|
||||
_mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL)
|
||||
invalidate_depth = invalidate_stencil && stencil_irb;
|
||||
|
||||
if (depth_irb) {
|
||||
if (rebase_depth_stencil(brw, depth_irb, invalidate_depth)) {
|
||||
/* In the case of stencil_irb being the same packed depth/stencil
|
||||
* texture but not the same rb, make it point at our rebased mt, too.
|
||||
*/
|
||||
if (stencil_irb &&
|
||||
stencil_irb != depth_irb &&
|
||||
stencil_irb->mt == depth_mt) {
|
||||
brw_miptree_reference(&stencil_irb->mt, depth_irb->mt);
|
||||
brw_renderbuffer_set_draw_offset(stencil_irb);
|
||||
}
|
||||
}
|
||||
|
||||
if (stencil_irb) {
|
||||
assert(stencil_irb->mt == depth_irb->mt);
|
||||
assert(stencil_irb->mt_level == depth_irb->mt_level);
|
||||
assert(stencil_irb->mt_layer == depth_irb->mt_layer);
|
||||
}
|
||||
}
|
||||
|
||||
/* If there is no depth attachment, consider if stencil needs rebase. */
|
||||
if (!depth_irb && stencil_irb)
|
||||
rebase_depth_stencil(brw, stencil_irb, invalidate_stencil);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_emit_depth_stencil_hiz(struct brw_context *brw,
|
||||
struct brw_renderbuffer *depth_irb,
|
||||
struct brw_mipmap_tree *depth_mt,
|
||||
struct brw_renderbuffer *stencil_irb,
|
||||
struct brw_mipmap_tree *stencil_mt)
|
||||
{
|
||||
uint32_t tile_x = brw->depthstencil.tile_x;
|
||||
uint32_t tile_y = brw->depthstencil.tile_y;
|
||||
uint32_t depth_surface_type = BRW_SURFACE_NULL;
|
||||
uint32_t depthbuffer_format = BRW_DEPTHFORMAT_D32_FLOAT;
|
||||
uint32_t depth_offset = 0;
|
||||
uint32_t width = 1, height = 1;
|
||||
bool tiled_surface = true;
|
||||
|
||||
/* If there's a packed depth/stencil bound to stencil only, we need to
|
||||
* emit the packed depth/stencil buffer packet.
|
||||
*/
|
||||
if (!depth_irb && stencil_irb) {
|
||||
depth_irb = stencil_irb;
|
||||
depth_mt = stencil_mt;
|
||||
}
|
||||
|
||||
if (depth_irb && depth_mt) {
|
||||
depthbuffer_format = brw_depthbuffer_format(brw);
|
||||
depth_surface_type = BRW_SURFACE_2D;
|
||||
depth_offset = brw->depthstencil.depth_offset;
|
||||
width = depth_irb->Base.Base.Width;
|
||||
height = depth_irb->Base.Base.Height;
|
||||
tiled_surface = depth_mt->surf.tiling != ISL_TILING_LINEAR;
|
||||
}
|
||||
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
const unsigned len = (devinfo->verx10 == 45 || devinfo->ver == 5) ? 6 : 5;
|
||||
|
||||
BEGIN_BATCH(len);
|
||||
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
|
||||
OUT_BATCH((depth_mt ? depth_mt->surf.row_pitch_B - 1 : 0) |
|
||||
(depthbuffer_format << 18) |
|
||||
(BRW_TILEWALK_YMAJOR << 26) |
|
||||
(tiled_surface << 27) |
|
||||
(depth_surface_type << 29));
|
||||
|
||||
if (depth_mt) {
|
||||
OUT_RELOC(depth_mt->bo, RELOC_WRITE, depth_offset);
|
||||
} else {
|
||||
OUT_BATCH(0);
|
||||
}
|
||||
|
||||
OUT_BATCH(((width + tile_x - 1) << 6) |
|
||||
((height + tile_y - 1) << 19));
|
||||
OUT_BATCH(0);
|
||||
|
||||
if (devinfo->verx10 >= 45)
|
||||
OUT_BATCH(tile_x | (tile_y << 16));
|
||||
else
|
||||
assert(tile_x == 0 && tile_y == 0);
|
||||
|
||||
if (devinfo->ver >= 6)
|
||||
OUT_BATCH(0);
|
||||
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
void
|
||||
brw_emit_depthbuffer(struct brw_context *brw)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct gl_framebuffer *fb = ctx->DrawBuffer;
|
||||
/* _NEW_BUFFERS */
|
||||
struct brw_renderbuffer *depth_irb = brw_get_renderbuffer(fb, BUFFER_DEPTH);
|
||||
struct brw_renderbuffer *stencil_irb = brw_get_renderbuffer(fb, BUFFER_STENCIL);
|
||||
struct brw_mipmap_tree *depth_mt = brw_renderbuffer_get_mt(depth_irb);
|
||||
struct brw_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb);
|
||||
|
||||
if (depth_mt)
|
||||
brw_cache_flush_for_depth(brw, depth_mt->bo);
|
||||
if (stencil_mt)
|
||||
brw_cache_flush_for_depth(brw, stencil_mt->bo);
|
||||
|
||||
if (devinfo->ver < 6) {
|
||||
brw_emit_depth_stencil_hiz(brw, depth_irb, depth_mt,
|
||||
stencil_irb, stencil_mt);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Skip repeated NULL depth/stencil emits (think 2D rendering). */
|
||||
if (!depth_mt && !stencil_mt && brw->no_depth_or_stencil) {
|
||||
assert(brw->hw_ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
brw_emit_depth_stall_flushes(brw);
|
||||
|
||||
const unsigned ds_dwords = brw->isl_dev.ds.size / 4;
|
||||
brw_batch_begin(brw, ds_dwords);
|
||||
uint32_t *ds_map = brw->batch.map_next;
|
||||
const uint32_t ds_offset = (char *)ds_map - (char *)brw->batch.batch.map;
|
||||
|
||||
struct isl_view view = {
|
||||
/* Some nice defaults */
|
||||
.base_level = 0,
|
||||
.levels = 1,
|
||||
.base_array_layer = 0,
|
||||
.array_len = 1,
|
||||
.swizzle = ISL_SWIZZLE_IDENTITY,
|
||||
};
|
||||
|
||||
struct isl_depth_stencil_hiz_emit_info info = {
|
||||
.view = &view,
|
||||
.mocs = brw_mocs(&brw->isl_dev, NULL),
|
||||
};
|
||||
|
||||
if (depth_mt) {
|
||||
view.usage |= ISL_SURF_USAGE_DEPTH_BIT;
|
||||
info.depth_surf = &depth_mt->surf;
|
||||
|
||||
info.depth_address =
|
||||
brw_batch_reloc(&brw->batch,
|
||||
ds_offset + brw->isl_dev.ds.depth_offset,
|
||||
depth_mt->bo, depth_mt->offset, RELOC_WRITE);
|
||||
|
||||
info.mocs = brw_mocs(&brw->isl_dev, depth_mt->bo);
|
||||
view.base_level = depth_irb->mt_level - depth_irb->mt->first_level;
|
||||
view.base_array_layer = depth_irb->mt_layer;
|
||||
view.array_len = MAX2(depth_irb->layer_count, 1);
|
||||
view.format = depth_mt->surf.format;
|
||||
|
||||
info.hiz_usage = depth_mt->aux_usage;
|
||||
if (!brw_renderbuffer_has_hiz(depth_irb)) {
|
||||
/* Just because a miptree has ISL_AUX_USAGE_HIZ does not mean that
|
||||
* all miplevels of that miptree are guaranteed to support HiZ. See
|
||||
* brw_miptree_level_enable_hiz for details.
|
||||
*/
|
||||
info.hiz_usage = ISL_AUX_USAGE_NONE;
|
||||
}
|
||||
|
||||
if (info.hiz_usage == ISL_AUX_USAGE_HIZ) {
|
||||
info.hiz_surf = &depth_mt->aux_buf->surf;
|
||||
|
||||
uint64_t hiz_offset = 0;
|
||||
if (devinfo->ver == 6) {
|
||||
/* HiZ surfaces on Sandy Bridge technically don't support
|
||||
* mip-mapping. However, we can fake it by offsetting to the
|
||||
* first slice of LOD0 in the HiZ surface.
|
||||
*/
|
||||
isl_surf_get_image_offset_B_tile_sa(&depth_mt->aux_buf->surf,
|
||||
view.base_level, 0, 0,
|
||||
&hiz_offset, NULL, NULL);
|
||||
}
|
||||
|
||||
info.hiz_address =
|
||||
brw_batch_reloc(&brw->batch,
|
||||
ds_offset + brw->isl_dev.ds.hiz_offset,
|
||||
depth_mt->aux_buf->bo,
|
||||
depth_mt->aux_buf->offset + hiz_offset,
|
||||
RELOC_WRITE);
|
||||
}
|
||||
|
||||
info.depth_clear_value = depth_mt->fast_clear_color.f32[0];
|
||||
}
|
||||
|
||||
if (stencil_mt) {
|
||||
view.usage |= ISL_SURF_USAGE_STENCIL_BIT;
|
||||
info.stencil_surf = &stencil_mt->surf;
|
||||
|
||||
if (!depth_mt) {
|
||||
info.mocs = brw_mocs(&brw->isl_dev, stencil_mt->bo);
|
||||
view.base_level = stencil_irb->mt_level - stencil_irb->mt->first_level;
|
||||
view.base_array_layer = stencil_irb->mt_layer;
|
||||
view.array_len = MAX2(stencil_irb->layer_count, 1);
|
||||
view.format = stencil_mt->surf.format;
|
||||
}
|
||||
|
||||
uint64_t stencil_offset = 0;
|
||||
if (devinfo->ver == 6) {
|
||||
/* Stencil surfaces on Sandy Bridge technically don't support
|
||||
* mip-mapping. However, we can fake it by offsetting to the
|
||||
* first slice of LOD0 in the stencil surface.
|
||||
*/
|
||||
isl_surf_get_image_offset_B_tile_sa(&stencil_mt->surf,
|
||||
view.base_level, 0, 0,
|
||||
&stencil_offset, NULL, NULL);
|
||||
}
|
||||
|
||||
info.stencil_address =
|
||||
brw_batch_reloc(&brw->batch,
|
||||
ds_offset + brw->isl_dev.ds.stencil_offset,
|
||||
stencil_mt->bo,
|
||||
stencil_mt->offset + stencil_offset,
|
||||
RELOC_WRITE);
|
||||
}
|
||||
|
||||
isl_emit_depth_stencil_hiz_s(&brw->isl_dev, ds_map, &info);
|
||||
|
||||
brw->batch.map_next += ds_dwords;
|
||||
brw_batch_advance(brw);
|
||||
|
||||
brw->no_depth_or_stencil = !depth_mt && !stencil_mt;
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_depthbuffer = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_BUFFERS,
|
||||
.brw = BRW_NEW_AUX_STATE |
|
||||
BRW_NEW_BATCH |
|
||||
BRW_NEW_BLORP,
|
||||
},
|
||||
.emit = brw_emit_depthbuffer,
|
||||
};
|
||||
|
||||
void
|
||||
brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
const bool is_965 = devinfo->verx10 == 40;
|
||||
const uint32_t _3DSTATE_PIPELINE_SELECT =
|
||||
is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;
|
||||
|
||||
if (devinfo->ver >= 8 && devinfo->ver < 10) {
|
||||
/* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
|
||||
*
|
||||
* Software must clear the COLOR_CALC_STATE Valid field in
|
||||
* 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
|
||||
* with Pipeline Select set to GPGPU.
|
||||
*
|
||||
* The internal hardware docs recommend the same workaround for Gfx9
|
||||
* hardware too.
|
||||
*/
|
||||
if (pipeline == BRW_COMPUTE_PIPELINE) {
|
||||
BEGIN_BATCH(2);
|
||||
OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
brw->ctx.NewDriverState |= BRW_NEW_CC_STATE;
|
||||
}
|
||||
}
|
||||
|
||||
if (devinfo->ver == 9 && pipeline == BRW_RENDER_PIPELINE) {
|
||||
/* We seem to have issues with geometry flickering when 3D and compute
|
||||
* are combined in the same batch and this appears to fix it.
|
||||
*/
|
||||
const uint32_t maxNumberofThreads =
|
||||
devinfo->max_cs_threads * devinfo->subslice_total - 1;
|
||||
|
||||
BEGIN_BATCH(9);
|
||||
OUT_BATCH(MEDIA_VFE_STATE << 16 | (9 - 2));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(2 << 8 | maxNumberofThreads << 16);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(2 << 16);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
if (devinfo->ver >= 6) {
|
||||
/* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
|
||||
* PIPELINE_SELECT [DevBWR+]":
|
||||
*
|
||||
* Project: DEVSNB+
|
||||
*
|
||||
* Software must ensure all the write caches are flushed through a
|
||||
* stalling PIPE_CONTROL command followed by another PIPE_CONTROL
|
||||
* command to invalidate read only caches prior to programming
|
||||
* MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
|
||||
*/
|
||||
const unsigned dc_flush =
|
||||
devinfo->ver >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
|
||||
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
dc_flush |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_STATE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_INSTRUCTION_INVALIDATE);
|
||||
|
||||
} else {
|
||||
/* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
|
||||
* PIPELINE_SELECT [DevBWR+]":
|
||||
*
|
||||
* Project: PRE-DEVSNB
|
||||
*
|
||||
* Software must ensure the current pipeline is flushed via an
|
||||
* MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT.
|
||||
*/
|
||||
BEGIN_BATCH(1);
|
||||
OUT_BATCH(MI_FLUSH);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
/* Select the pipeline */
|
||||
BEGIN_BATCH(1);
|
||||
OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 |
|
||||
(devinfo->ver >= 9 ? (3 << 8) : 0) |
|
||||
(pipeline == BRW_COMPUTE_PIPELINE ? 2 : 0));
|
||||
ADVANCE_BATCH();
|
||||
|
||||
if (devinfo->verx10 == 70 &&
|
||||
pipeline == BRW_RENDER_PIPELINE) {
|
||||
/* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
|
||||
* PIPELINE_SELECT [DevBWR+]":
|
||||
*
|
||||
* Project: DEVIVB, DEVHSW:GT3:A0
|
||||
*
|
||||
* Software must send a pipe_control with a CS stall and a post sync
|
||||
* operation and then a dummy DRAW after every MI_SET_CONTEXT and
|
||||
* after any PIPELINE_SELECT that is enabling 3D mode.
|
||||
*/
|
||||
gfx7_emit_cs_stall_flush(brw);
|
||||
|
||||
BEGIN_BATCH(7);
|
||||
OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
|
||||
OUT_BATCH(_3DPRIM_POINTLIST);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
if (devinfo->platform == INTEL_PLATFORM_GLK) {
|
||||
/* Project: DevGLK
|
||||
*
|
||||
* "This chicken bit works around a hardware issue with barrier logic
|
||||
* encountered when switching between GPGPU and 3D pipelines. To
|
||||
* workaround the issue, this mode bit should be set after a pipeline
|
||||
* is selected."
|
||||
*/
|
||||
const unsigned barrier_mode =
|
||||
pipeline == BRW_RENDER_PIPELINE ? GLK_SCEC_BARRIER_MODE_3D_HULL
|
||||
: GLK_SCEC_BARRIER_MODE_GPGPU;
|
||||
brw_load_register_imm32(brw, SLICE_COMMON_ECO_CHICKEN1,
|
||||
barrier_mode | GLK_SCEC_BARRIER_MODE_MASK);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the pixel hashing modes that determine the balancing of PS threads
|
||||
* across subslices and slices.
|
||||
*
|
||||
* \param width Width bound of the rendering area (already scaled down if \p
|
||||
* scale is greater than 1).
|
||||
* \param height Height bound of the rendering area (already scaled down if \p
|
||||
* scale is greater than 1).
|
||||
* \param scale The number of framebuffer samples that could potentially be
|
||||
* affected by an individual channel of the PS thread. This is
|
||||
* typically one for single-sampled rendering, but for operations
|
||||
* like CCS resolves and fast clears a single PS invocation may
|
||||
* update a huge number of pixels, in which case a finer
|
||||
* balancing is desirable in order to maximally utilize the
|
||||
* bandwidth available. UINT_MAX can be used as shorthand for
|
||||
* "finest hashing mode available".
|
||||
*/
|
||||
void
|
||||
brw_emit_hashing_mode(struct brw_context *brw, unsigned width,
|
||||
unsigned height, unsigned scale)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
if (devinfo->ver == 9) {
|
||||
const uint32_t slice_hashing[] = {
|
||||
/* Because all Gfx9 platforms with more than one slice require
|
||||
* three-way subslice hashing, a single "normal" 16x16 slice hashing
|
||||
* block is guaranteed to suffer from substantial imbalance, with one
|
||||
* subslice receiving twice as much work as the other two in the
|
||||
* slice.
|
||||
*
|
||||
* The performance impact of that would be particularly severe when
|
||||
* three-way hashing is also in use for slice balancing (which is the
|
||||
* case for all Gfx9 GT4 platforms), because one of the slices
|
||||
* receives one every three 16x16 blocks in either direction, which
|
||||
* is roughly the periodicity of the underlying subslice imbalance
|
||||
* pattern ("roughly" because in reality the hardware's
|
||||
* implementation of three-way hashing doesn't do exact modulo 3
|
||||
* arithmetic, which somewhat decreases the magnitude of this effect
|
||||
* in practice). This leads to a systematic subslice imbalance
|
||||
* within that slice regardless of the size of the primitive. The
|
||||
* 32x32 hashing mode guarantees that the subslice imbalance within a
|
||||
* single slice hashing block is minimal, largely eliminating this
|
||||
* effect.
|
||||
*/
|
||||
GFX9_SLICE_HASHING_32x32,
|
||||
/* Finest slice hashing mode available. */
|
||||
GFX9_SLICE_HASHING_NORMAL
|
||||
};
|
||||
const uint32_t subslice_hashing[] = {
|
||||
/* The 16x16 subslice hashing mode is used on non-LLC platforms to
|
||||
* match the performance of previous Mesa versions. 16x16 has a
|
||||
* slight cache locality benefit especially visible in the sampler L1
|
||||
* cache efficiency of low-bandwidth platforms, but it comes at the
|
||||
* cost of greater subslice imbalance for primitives of dimensions
|
||||
* approximately intermediate between 16x4 and 16x16.
|
||||
*/
|
||||
(devinfo->has_llc ? GFX9_SUBSLICE_HASHING_16x4 :
|
||||
GFX9_SUBSLICE_HASHING_16x16),
|
||||
/* Finest subslice hashing mode available. */
|
||||
GFX9_SUBSLICE_HASHING_8x4
|
||||
};
|
||||
/* Dimensions of the smallest hashing block of a given hashing mode. If
|
||||
* the rendering area is smaller than this there can't possibly be any
|
||||
* benefit from switching to this mode, so we optimize out the
|
||||
* transition.
|
||||
*/
|
||||
const unsigned min_size[][2] = {
|
||||
{ 16, 4 },
|
||||
{ 8, 4 }
|
||||
};
|
||||
const unsigned idx = scale > 1;
|
||||
|
||||
if (width > min_size[idx][0] || height > min_size[idx][1]) {
|
||||
const uint32_t gt_mode =
|
||||
(devinfo->num_slices == 1 ? 0 :
|
||||
GFX9_SLICE_HASHING_MASK_BITS | slice_hashing[idx]) |
|
||||
GFX9_SUBSLICE_HASHING_MASK_BITS | subslice_hashing[idx];
|
||||
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
brw_load_register_imm32(brw, GFX7_GT_MODE, gt_mode);
|
||||
|
||||
brw->current_hash_scale = scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Misc invariant state packets
|
||||
*/
|
||||
void
|
||||
brw_upload_invariant_state(struct brw_context *brw)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
const bool is_965 = devinfo->verx10 == 40;
|
||||
|
||||
brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
|
||||
brw->last_pipeline = BRW_RENDER_PIPELINE;
|
||||
|
||||
if (devinfo->ver >= 8) {
|
||||
BEGIN_BATCH(3);
|
||||
OUT_BATCH(CMD_STATE_SIP << 16 | (3 - 2));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
} else {
|
||||
BEGIN_BATCH(2);
|
||||
OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
/* Original Gfx4 doesn't have 3DSTATE_AA_LINE_PARAMETERS. */
|
||||
if (!is_965) {
|
||||
BEGIN_BATCH(3);
|
||||
OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
|
||||
/* use legacy aa line coverage computation */
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,111 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_MULTISAMPLE_STATE_H
|
||||
#define BRW_MULTISAMPLE_STATE_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/**
|
||||
* Note: There are no standard multisample positions defined in OpenGL
|
||||
* specifications. Implementations have the freedom to pick the positions
|
||||
* which give plausible results. But the Vulkan specification does define
|
||||
* standard sample positions. So, we decided to pick the same pattern in
|
||||
* OpenGL as in Vulkan to keep it uniform across drivers and also to avoid
|
||||
* breaking applications which rely on this standard pattern.
|
||||
*/
|
||||
|
||||
/**
|
||||
* 1x MSAA has a single sample at the center: (0.5, 0.5) -> (0x8, 0x8).
|
||||
*
|
||||
* 2x MSAA sample positions are (0.75, 0.75) and (0.25, 0.25):
|
||||
* 4 c
|
||||
* 4 1
|
||||
* c 0
|
||||
*/
|
||||
static const uint32_t
|
||||
brw_multisample_positions_1x_2x = 0x008844cc;
|
||||
|
||||
/**
|
||||
* Sample positions:
|
||||
* 2 6 a e
|
||||
* 2 0
|
||||
* 6 1
|
||||
* a 2
|
||||
* e 3
|
||||
*/
|
||||
static const uint32_t
|
||||
brw_multisample_positions_4x = 0xae2ae662;
|
||||
|
||||
/**
|
||||
* Sample positions:
|
||||
*
|
||||
* From the Ivy Bridge PRM, Vol2 Part1 p304 (3DSTATE_MULTISAMPLE:
|
||||
* Programming Notes):
|
||||
* "When programming the sample offsets (for NUMSAMPLES_4 or _8 and
|
||||
* MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3 (or 7
|
||||
* for 8X) must have monotonically increasing distance from the
|
||||
* pixel center. This is required to get the correct centroid
|
||||
* computation in the device."
|
||||
*
|
||||
* Sample positions:
|
||||
* 1 3 5 7 9 b d f
|
||||
* 1 7
|
||||
* 3 3
|
||||
* 5 0
|
||||
* 7 5
|
||||
* 9 2
|
||||
* b 1
|
||||
* d 4
|
||||
* f 6
|
||||
*/
|
||||
static const uint32_t
|
||||
brw_multisample_positions_8x[] = { 0x53d97b95, 0xf1bf173d };
|
||||
|
||||
/**
|
||||
* Sample positions:
|
||||
*
|
||||
* 0 1 2 3 4 5 6 7 8 9 a b c d e f
|
||||
* 0 15
|
||||
* 1 9
|
||||
* 2 10
|
||||
* 3 7
|
||||
* 4 13
|
||||
* 5 1
|
||||
* 6 4
|
||||
* 7 3
|
||||
* 8 12
|
||||
* 9 0
|
||||
* a 2
|
||||
* b 6
|
||||
* c 11
|
||||
* d 5
|
||||
* e 8
|
||||
* f 14
|
||||
*/
|
||||
static const uint32_t
|
||||
brw_multisample_positions_16x[] = {
|
||||
0xc75a7599, 0xb3dbad36, 0x2c42816e, 0x10eff408
|
||||
};
|
||||
|
||||
#endif /* BRW_MULTISAMPLE_STATE_H */
|
||||
|
|
@ -1,450 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler/brw_nir.h"
|
||||
#include "compiler/glsl/ir_uniform.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "brw_program.h"
|
||||
|
||||
static void
|
||||
brw_nir_setup_glsl_builtin_uniform(nir_variable *var,
|
||||
const struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
bool is_scalar)
|
||||
{
|
||||
const nir_state_slot *const slots = var->state_slots;
|
||||
assert(var->state_slots != NULL);
|
||||
|
||||
unsigned uniform_index = var->data.driver_location / 4;
|
||||
for (unsigned int i = 0; i < var->num_state_slots; i++) {
|
||||
/* This state reference has already been setup by ir_to_mesa, but we'll
|
||||
* get the same index back here.
|
||||
*/
|
||||
int index = _mesa_add_state_reference(prog->Parameters,
|
||||
slots[i].tokens);
|
||||
|
||||
/* Add each of the unique swizzles of the element as a parameter.
|
||||
* This'll end up matching the expected layout of the
|
||||
* array/matrix/structure we're trying to fill in.
|
||||
*/
|
||||
int last_swiz = -1;
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
int swiz = GET_SWZ(slots[i].swizzle, j);
|
||||
|
||||
/* If we hit a pair of identical swizzles, this means we've hit the
|
||||
* end of the builtin variable. In scalar mode, we should just quit
|
||||
* and move on to the next one. In vec4, we need to continue and pad
|
||||
* it out to 4 components.
|
||||
*/
|
||||
if (swiz == last_swiz && is_scalar)
|
||||
break;
|
||||
|
||||
last_swiz = swiz;
|
||||
|
||||
stage_prog_data->param[uniform_index++] =
|
||||
BRW_PARAM_PARAMETER(index, swiz);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
setup_vec4_image_param(uint32_t *params, uint32_t idx,
|
||||
unsigned offset, unsigned n)
|
||||
{
|
||||
assert(offset % sizeof(uint32_t) == 0);
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
params[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
|
||||
|
||||
for (unsigned i = n; i < 4; ++i)
|
||||
params[i] = BRW_PARAM_BUILTIN_ZERO;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_setup_image_uniform_values(nir_variable *var,
|
||||
struct brw_stage_prog_data *prog_data)
|
||||
{
|
||||
unsigned param_start_index = var->data.driver_location / 4;
|
||||
uint32_t *param = &prog_data->param[param_start_index];
|
||||
unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
|
||||
|
||||
for (unsigned i = 0; i < num_images; i++) {
|
||||
const unsigned image_idx = var->data.binding + i;
|
||||
|
||||
/* Upload the brw_image_param structure. The order is expected to match
|
||||
* the BRW_IMAGE_PARAM_*_OFFSET defines.
|
||||
*/
|
||||
setup_vec4_image_param(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
|
||||
image_idx,
|
||||
offsetof(brw_image_param, offset), 2);
|
||||
setup_vec4_image_param(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
|
||||
image_idx,
|
||||
offsetof(brw_image_param, size), 3);
|
||||
setup_vec4_image_param(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
|
||||
image_idx,
|
||||
offsetof(brw_image_param, stride), 4);
|
||||
setup_vec4_image_param(param + BRW_IMAGE_PARAM_TILING_OFFSET,
|
||||
image_idx,
|
||||
offsetof(brw_image_param, tiling), 3);
|
||||
setup_vec4_image_param(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
|
||||
image_idx,
|
||||
offsetof(brw_image_param, swizzling), 2);
|
||||
param += BRW_IMAGE_PARAM_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
count_uniform_storage_slots(const struct glsl_type *type)
|
||||
{
|
||||
/* gl_uniform_storage can cope with one level of array, so if the
|
||||
* type is a composite type or an array where each element occupies
|
||||
* more than one slot than we need to recursively process it.
|
||||
*/
|
||||
if (glsl_type_is_struct_or_ifc(type)) {
|
||||
unsigned location_count = 0;
|
||||
|
||||
for (unsigned i = 0; i < glsl_get_length(type); i++) {
|
||||
const struct glsl_type *field_type = glsl_get_struct_field(type, i);
|
||||
|
||||
location_count += count_uniform_storage_slots(field_type);
|
||||
}
|
||||
|
||||
return location_count;
|
||||
}
|
||||
|
||||
if (glsl_type_is_array(type)) {
|
||||
const struct glsl_type *element_type = glsl_get_array_element(type);
|
||||
|
||||
if (glsl_type_is_array(element_type) ||
|
||||
glsl_type_is_struct_or_ifc(element_type)) {
|
||||
unsigned element_count = count_uniform_storage_slots(element_type);
|
||||
return element_count * glsl_get_length(type);
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
|
||||
const struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
bool is_scalar)
|
||||
{
|
||||
if (var->type->without_array()->is_sampler() ||
|
||||
var->type->without_array()->is_image())
|
||||
return;
|
||||
|
||||
/* The data for our (non-builtin) uniforms is stored in a series of
|
||||
* gl_uniform_storage structs for each subcomponent that
|
||||
* glGetUniformLocation() could name. We know it's been set up in the same
|
||||
* order we'd walk the type, so walk the list of storage that matches the
|
||||
* range of slots covered by this variable.
|
||||
*/
|
||||
unsigned uniform_index = var->data.driver_location / 4;
|
||||
unsigned num_slots = count_uniform_storage_slots(var->type);
|
||||
for (unsigned u = 0; u < num_slots; u++) {
|
||||
struct gl_uniform_storage *storage =
|
||||
&prog->sh.data->UniformStorage[var->data.location + u];
|
||||
|
||||
/* We already handled samplers and images via the separate top-level
|
||||
* variables created by gl_nir_lower_samplers_as_deref(), but they're
|
||||
* still part of the structure's storage, and so we'll see them while
|
||||
* walking it to set up the other regular fields. Just skip over them.
|
||||
*/
|
||||
if (storage->builtin ||
|
||||
storage->type->is_sampler() ||
|
||||
storage->type->is_image())
|
||||
continue;
|
||||
|
||||
gl_constant_value *components = storage->storage;
|
||||
unsigned vector_count = (MAX2(storage->array_elements, 1) *
|
||||
storage->type->matrix_columns);
|
||||
unsigned vector_size = storage->type->vector_elements;
|
||||
unsigned max_vector_size = 4;
|
||||
if (storage->type->base_type == GLSL_TYPE_DOUBLE ||
|
||||
storage->type->base_type == GLSL_TYPE_UINT64 ||
|
||||
storage->type->base_type == GLSL_TYPE_INT64) {
|
||||
vector_size *= 2;
|
||||
if (vector_size > 4)
|
||||
max_vector_size = 8;
|
||||
}
|
||||
|
||||
for (unsigned s = 0; s < vector_count; s++) {
|
||||
unsigned i;
|
||||
for (i = 0; i < vector_size; i++) {
|
||||
uint32_t idx = components - prog->sh.data->UniformDataSlots;
|
||||
stage_prog_data->param[uniform_index++] = BRW_PARAM_UNIFORM(idx);
|
||||
components++;
|
||||
}
|
||||
|
||||
if (!is_scalar) {
|
||||
/* Pad out with zeros if needed (only needed for vec4) */
|
||||
for (; i < max_vector_size; i++) {
|
||||
stage_prog_data->param[uniform_index++] =
|
||||
BRW_PARAM_BUILTIN_ZERO;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader,
|
||||
const struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
bool is_scalar)
|
||||
{
|
||||
unsigned nr_params = shader->num_uniforms / 4;
|
||||
stage_prog_data->nr_params = nr_params;
|
||||
stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
|
||||
|
||||
nir_foreach_uniform_variable(var, shader) {
|
||||
/* UBO's, atomics and samplers don't take up space in the
|
||||
uniform file */
|
||||
if (var->interface_type != NULL || var->type->contains_atomic())
|
||||
continue;
|
||||
|
||||
if (var->num_state_slots > 0) {
|
||||
brw_nir_setup_glsl_builtin_uniform(var, prog, stage_prog_data,
|
||||
is_scalar);
|
||||
} else {
|
||||
brw_nir_setup_glsl_uniform(shader->info.stage, var, prog,
|
||||
stage_prog_data, is_scalar);
|
||||
}
|
||||
}
|
||||
|
||||
nir_foreach_image_variable(var, shader)
|
||||
brw_setup_image_uniform_values(var, stage_prog_data);
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
|
||||
struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data)
|
||||
{
|
||||
struct gl_program_parameter_list *plist = prog->Parameters;
|
||||
|
||||
unsigned nr_params = plist->NumParameters * 4;
|
||||
stage_prog_data->nr_params = nr_params;
|
||||
stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
|
||||
|
||||
/* For ARB programs, prog_to_nir generates a single "parameters" variable
|
||||
* for all uniform data. There may be additional sampler variables, and
|
||||
* an extra uniform from nir_lower_wpos_ytransform.
|
||||
*/
|
||||
|
||||
for (unsigned p = 0; p < plist->NumParameters; p++) {
|
||||
/* Parameters should be either vec4 uniforms or single component
|
||||
* constants; matrices and other larger types should have been broken
|
||||
* down earlier.
|
||||
*/
|
||||
assert(plist->Parameters[p].Size <= 4);
|
||||
|
||||
unsigned i;
|
||||
for (i = 0; i < plist->Parameters[p].Size; i++)
|
||||
stage_prog_data->param[4 * p + i] = BRW_PARAM_PARAMETER(p, i);
|
||||
for (; i < 4; i++)
|
||||
stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO;
|
||||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
get_aoa_deref_offset(nir_builder *b,
|
||||
nir_deref_instr *deref,
|
||||
unsigned elem_size)
|
||||
{
|
||||
unsigned array_size = elem_size;
|
||||
nir_ssa_def *offset = nir_imm_int(b, 0);
|
||||
|
||||
while (deref->deref_type != nir_deref_type_var) {
|
||||
assert(deref->deref_type == nir_deref_type_array);
|
||||
|
||||
/* This level's element size is the previous level's array size */
|
||||
nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
|
||||
assert(deref->arr.index.ssa);
|
||||
offset = nir_iadd(b, offset,
|
||||
nir_imul(b, index, nir_imm_int(b, array_size)));
|
||||
|
||||
deref = nir_deref_instr_parent(deref);
|
||||
assert(glsl_type_is_array(deref->type));
|
||||
array_size *= glsl_get_length(deref->type);
|
||||
}
|
||||
|
||||
/* Accessing an invalid surface index with the dataport can result in a
|
||||
* hang. According to the spec "if the index used to select an individual
|
||||
* element is negative or greater than or equal to the size of the array,
|
||||
* the results of the operation are undefined but may not lead to
|
||||
* termination" -- which is one of the possible outcomes of the hang.
|
||||
* Clamp the index to prevent access outside of the array bounds.
|
||||
*/
|
||||
return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_lower_gl_images(nir_shader *shader,
|
||||
const struct gl_program *prog)
|
||||
{
|
||||
/* We put image uniforms at the end */
|
||||
nir_foreach_image_variable(var, shader) {
|
||||
const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
|
||||
|
||||
var->data.driver_location = shader->num_uniforms;
|
||||
shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4;
|
||||
}
|
||||
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, impl);
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_image_deref_load:
|
||||
case nir_intrinsic_image_deref_store:
|
||||
case nir_intrinsic_image_deref_atomic_add:
|
||||
case nir_intrinsic_image_deref_atomic_imin:
|
||||
case nir_intrinsic_image_deref_atomic_umin:
|
||||
case nir_intrinsic_image_deref_atomic_imax:
|
||||
case nir_intrinsic_image_deref_atomic_umax:
|
||||
case nir_intrinsic_image_deref_atomic_and:
|
||||
case nir_intrinsic_image_deref_atomic_or:
|
||||
case nir_intrinsic_image_deref_atomic_xor:
|
||||
case nir_intrinsic_image_deref_atomic_exchange:
|
||||
case nir_intrinsic_image_deref_atomic_comp_swap:
|
||||
case nir_intrinsic_image_deref_size:
|
||||
case nir_intrinsic_image_deref_samples:
|
||||
case nir_intrinsic_image_deref_load_raw_intel:
|
||||
case nir_intrinsic_image_deref_store_raw_intel: {
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
|
||||
struct gl_uniform_storage *storage =
|
||||
&prog->sh.data->UniformStorage[var->data.location];
|
||||
const unsigned image_var_idx =
|
||||
storage->opaque[shader->info.stage].index;
|
||||
|
||||
b.cursor = nir_before_instr(&intrin->instr);
|
||||
nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx),
|
||||
get_aoa_deref_offset(&b, deref, 1));
|
||||
nir_rewrite_image_intrinsic(intrin, index, false);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_image_deref_load_param_intel: {
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
const unsigned num_images =
|
||||
MAX2(1, var->type->arrays_of_arrays_size());
|
||||
|
||||
b.cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
const unsigned param = nir_intrinsic_base(intrin);
|
||||
nir_ssa_def *offset =
|
||||
get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4);
|
||||
offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16));
|
||||
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(b.shader,
|
||||
nir_intrinsic_load_uniform);
|
||||
nir_intrinsic_set_base(load, var->data.driver_location);
|
||||
nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4);
|
||||
load->src[0] = nir_src_for_ssa(offset);
|
||||
load->num_components = intrin->dest.ssa.num_components;
|
||||
nir_ssa_dest_init(&load->instr, &load->dest,
|
||||
intrin->dest.ssa.num_components,
|
||||
intrin->dest.ssa.bit_size, NULL);
|
||||
nir_builder_instr_insert(&b, &load->instr);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
||||
&load->dest.ssa);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_lower_legacy_clipping(nir_shader *nir, int nr_userclip_plane_consts,
|
||||
struct brw_stage_prog_data *prog_data)
|
||||
{
|
||||
if (nr_userclip_plane_consts == 0)
|
||||
return;
|
||||
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
|
||||
nir_lower_clip_vs(nir, (1 << nr_userclip_plane_consts) - 1, true, false,
|
||||
NULL);
|
||||
nir_lower_io_to_temporaries(nir, impl, true, false);
|
||||
nir_lower_global_vars_to_local(nir);
|
||||
nir_lower_vars_to_ssa(nir);
|
||||
|
||||
const unsigned clip_plane_base = nir->num_uniforms;
|
||||
|
||||
assert(nir->num_uniforms == prog_data->nr_params * 4);
|
||||
const unsigned num_clip_floats = 4 * nr_userclip_plane_consts;
|
||||
uint32_t *clip_param =
|
||||
brw_stage_prog_data_add_params(prog_data, num_clip_floats);
|
||||
nir->num_uniforms += num_clip_floats * sizeof(float);
|
||||
assert(nir->num_uniforms == prog_data->nr_params * 4);
|
||||
|
||||
for (unsigned i = 0; i < num_clip_floats; i++)
|
||||
clip_param[i] = BRW_PARAM_BUILTIN_CLIP_PLANE(i / 4, i % 4);
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, impl);
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_load_user_clip_plane)
|
||||
continue;
|
||||
|
||||
b.cursor = nir_before_instr(instr);
|
||||
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
|
||||
load->num_components = 4;
|
||||
load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
|
||||
nir_intrinsic_set_base(load, clip_plane_base + 4 * sizeof(float) *
|
||||
nir_intrinsic_ucp_id(intrin));
|
||||
nir_intrinsic_set_range(load, 4 * sizeof(float));
|
||||
nir_builder_instr_insert(&b, &load->instr);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
||||
&load->dest.ssa);
|
||||
nir_instr_remove(instr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,187 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2010 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file brw_object_purgeable.c
|
||||
*
|
||||
* The driver implementation of the GL_APPLE_object_purgeable extension.
|
||||
*/
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "main/macros.h"
|
||||
#include "main/bufferobj.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_buffer_objects.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
|
||||
static GLenum
|
||||
brw_buffer_purgeable(struct brw_bo *buffer)
|
||||
{
|
||||
int retained = 0;
|
||||
|
||||
if (buffer != NULL)
|
||||
retained = brw_bo_madvise(buffer, I915_MADV_DONTNEED);
|
||||
|
||||
return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE;
|
||||
}
|
||||
|
||||
static GLenum
|
||||
brw_buffer_object_purgeable(struct gl_context * ctx,
|
||||
struct gl_buffer_object *obj,
|
||||
GLenum option)
|
||||
{
|
||||
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
|
||||
|
||||
if (intel_obj->buffer != NULL)
|
||||
return brw_buffer_purgeable(intel_obj->buffer);
|
||||
|
||||
if (option == GL_RELEASED_APPLE) {
|
||||
return GL_RELEASED_APPLE;
|
||||
} else {
|
||||
/* XXX Create the buffer and madvise(MADV_DONTNEED)? */
|
||||
return brw_buffer_purgeable(intel_obj->buffer);
|
||||
}
|
||||
}
|
||||
|
||||
static GLenum
|
||||
brw_texture_object_purgeable(struct gl_context * ctx,
|
||||
struct gl_texture_object *obj,
|
||||
GLenum option)
|
||||
{
|
||||
struct brw_texture_object *intel;
|
||||
|
||||
(void) ctx;
|
||||
(void) option;
|
||||
|
||||
intel = brw_texture_object(obj);
|
||||
if (intel->mt == NULL || intel->mt->bo == NULL)
|
||||
return GL_RELEASED_APPLE;
|
||||
|
||||
return brw_buffer_purgeable(intel->mt->bo);
|
||||
}
|
||||
|
||||
static GLenum
|
||||
brw_render_object_purgeable(struct gl_context * ctx,
|
||||
struct gl_renderbuffer *obj,
|
||||
GLenum option)
|
||||
{
|
||||
struct brw_renderbuffer *intel;
|
||||
|
||||
(void) ctx;
|
||||
(void) option;
|
||||
|
||||
intel = brw_renderbuffer(obj);
|
||||
if (intel->mt == NULL)
|
||||
return GL_RELEASED_APPLE;
|
||||
|
||||
return brw_buffer_purgeable(intel->mt->bo);
|
||||
}
|
||||
|
||||
static int
|
||||
brw_bo_unpurgeable(struct brw_bo *buffer)
|
||||
{
|
||||
int retained;
|
||||
|
||||
retained = 0;
|
||||
if (buffer != NULL)
|
||||
retained = brw_bo_madvise(buffer, I915_MADV_WILLNEED);
|
||||
|
||||
return retained;
|
||||
}
|
||||
|
||||
static GLenum
|
||||
brw_buffer_object_unpurgeable(struct gl_context * ctx,
|
||||
struct gl_buffer_object *obj,
|
||||
GLenum option)
|
||||
{
|
||||
struct brw_buffer_object *intel = brw_buffer_object(obj);
|
||||
|
||||
(void) ctx;
|
||||
|
||||
if (!intel->buffer)
|
||||
return GL_UNDEFINED_APPLE;
|
||||
|
||||
if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->buffer)) {
|
||||
brw_bo_unreference(intel->buffer);
|
||||
intel->buffer = NULL;
|
||||
return GL_UNDEFINED_APPLE;
|
||||
}
|
||||
|
||||
return GL_RETAINED_APPLE;
|
||||
}
|
||||
|
||||
static GLenum
|
||||
brw_texture_object_unpurgeable(struct gl_context * ctx,
|
||||
struct gl_texture_object *obj,
|
||||
GLenum option)
|
||||
{
|
||||
struct brw_texture_object *intel;
|
||||
|
||||
(void) ctx;
|
||||
|
||||
intel = brw_texture_object(obj);
|
||||
if (intel->mt == NULL || intel->mt->bo == NULL)
|
||||
return GL_UNDEFINED_APPLE;
|
||||
|
||||
if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->mt->bo)) {
|
||||
brw_miptree_release(&intel->mt);
|
||||
return GL_UNDEFINED_APPLE;
|
||||
}
|
||||
|
||||
return GL_RETAINED_APPLE;
|
||||
}
|
||||
|
||||
static GLenum
|
||||
brw_render_object_unpurgeable(struct gl_context * ctx,
|
||||
struct gl_renderbuffer *obj,
|
||||
GLenum option)
|
||||
{
|
||||
struct brw_renderbuffer *intel;
|
||||
|
||||
(void) ctx;
|
||||
|
||||
intel = brw_renderbuffer(obj);
|
||||
if (intel->mt == NULL)
|
||||
return GL_UNDEFINED_APPLE;
|
||||
|
||||
if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->mt->bo)) {
|
||||
brw_miptree_release(&intel->mt);
|
||||
return GL_UNDEFINED_APPLE;
|
||||
}
|
||||
|
||||
return GL_RETAINED_APPLE;
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_object_purgeable_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->BufferObjectPurgeable = brw_buffer_object_purgeable;
|
||||
functions->TextureObjectPurgeable = brw_texture_object_purgeable;
|
||||
functions->RenderObjectPurgeable = brw_render_object_purgeable;
|
||||
|
||||
functions->BufferObjectUnpurgeable = brw_buffer_object_unpurgeable;
|
||||
functions->TextureObjectUnpurgeable = brw_texture_object_unpurgeable;
|
||||
functions->RenderObjectUnpurgeable = brw_render_object_unpurgeable;
|
||||
}
|
||||
|
|
@ -1,533 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file brw_performance_query.c
|
||||
*
|
||||
* Implementation of the GL_INTEL_performance_query extension.
|
||||
*
|
||||
* Currently there are two possible counter sources exposed here:
|
||||
*
|
||||
* On Gfx6+ hardware we have numerous 64bit Pipeline Statistics Registers
|
||||
* that we can snapshot at the beginning and end of a query.
|
||||
*
|
||||
* On Gfx7.5+ we have Observability Architecture counters which are
|
||||
* covered in separate document from the rest of the PRMs. It is available at:
|
||||
* https://01.org/linuxgraphics/documentation/driver-documentation-prms
|
||||
* => 2013 Intel Core Processor Family => Observability Performance Counters
|
||||
* (This one volume covers Sandybridge, Ivybridge, Baytrail, and Haswell,
|
||||
* though notably we currently only support OA counters for Haswell+)
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
/* put before sys/types.h to silence glibc warnings */
|
||||
#ifdef MAJOR_IN_MKDEV
|
||||
#include <sys/mkdev.h>
|
||||
#endif
|
||||
#ifdef MAJOR_IN_SYSMACROS
|
||||
#include <sys/sysmacros.h>
|
||||
#endif
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <xf86drm.h>
|
||||
#include "drm-uapi/i915_drm.h"
|
||||
|
||||
#include "main/hash.h"
|
||||
#include "main/macros.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "main/performance_query.h"
|
||||
|
||||
#include "util/bitset.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/list.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_batch.h"
|
||||
|
||||
#include "perf/intel_perf.h"
|
||||
#include "perf/intel_perf_regs.h"
|
||||
#include "perf/intel_perf_mdapi.h"
|
||||
#include "perf/intel_perf_query.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_PERFMON
|
||||
|
||||
#define OAREPORT_REASON_MASK 0x3f
|
||||
#define OAREPORT_REASON_SHIFT 19
|
||||
#define OAREPORT_REASON_TIMER (1<<0)
|
||||
#define OAREPORT_REASON_TRIGGER1 (1<<1)
|
||||
#define OAREPORT_REASON_TRIGGER2 (1<<2)
|
||||
#define OAREPORT_REASON_CTX_SWITCH (1<<3)
|
||||
#define OAREPORT_REASON_GO_TRANSITION (1<<4)
|
||||
|
||||
struct brw_perf_query_object {
|
||||
struct gl_perf_query_object base;
|
||||
struct intel_perf_query_object *query;
|
||||
};
|
||||
|
||||
/** Downcasting convenience macro. */
|
||||
static inline struct brw_perf_query_object *
|
||||
brw_perf_query(struct gl_perf_query_object *o)
|
||||
{
|
||||
return (struct brw_perf_query_object *) o;
|
||||
}
|
||||
|
||||
#define MI_RPC_BO_SIZE 4096
|
||||
#define MI_RPC_BO_END_OFFSET_BYTES (MI_RPC_BO_SIZE / 2)
|
||||
#define MI_FREQ_START_OFFSET_BYTES (3072)
|
||||
#define MI_FREQ_END_OFFSET_BYTES (3076)
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
static bool
|
||||
brw_is_perf_query_ready(struct gl_context *ctx,
|
||||
struct gl_perf_query_object *o);
|
||||
|
||||
static void
|
||||
dump_perf_query_callback(void *query_void, void *brw_void)
|
||||
{
|
||||
struct brw_context *ctx = brw_void;
|
||||
struct intel_perf_context *perf_ctx = ctx->perf_ctx;
|
||||
struct gl_perf_query_object *o = query_void;
|
||||
struct brw_perf_query_object * brw_query = brw_perf_query(o);
|
||||
struct intel_perf_query_object *obj = brw_query->query;
|
||||
|
||||
DBG("%4d: %-6s %-8s ",
|
||||
o->Id,
|
||||
o->Used ? "Dirty," : "New,",
|
||||
o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"));
|
||||
intel_perf_dump_query(perf_ctx, obj, &ctx->batch);
|
||||
}
|
||||
|
||||
static void
|
||||
dump_perf_queries(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
intel_perf_dump_query_count(brw->perf_ctx);
|
||||
_mesa_HashWalk(ctx->PerfQuery.Objects, dump_perf_query_callback, brw);
|
||||
}
|
||||
|
||||
/**
|
||||
* Driver hook for glGetPerfQueryInfoINTEL().
|
||||
*/
|
||||
static void
|
||||
brw_get_perf_query_info(struct gl_context *ctx,
|
||||
unsigned query_index,
|
||||
const char **name,
|
||||
GLuint *data_size,
|
||||
GLuint *n_counters,
|
||||
GLuint *n_active)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct intel_perf_context *perf_ctx = brw->perf_ctx;
|
||||
struct intel_perf_config *perf_cfg = intel_perf_config(perf_ctx);
|
||||
const struct intel_perf_query_info *query = &perf_cfg->queries[query_index];
|
||||
|
||||
*name = query->name;
|
||||
*data_size = query->data_size;
|
||||
*n_counters = query->n_counters;
|
||||
*n_active = intel_perf_active_queries(perf_ctx, query);
|
||||
}
|
||||
|
||||
static GLuint
|
||||
intel_counter_type_enum_to_gl_type(enum intel_perf_counter_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case INTEL_PERF_COUNTER_TYPE_EVENT: return GL_PERFQUERY_COUNTER_EVENT_INTEL;
|
||||
case INTEL_PERF_COUNTER_TYPE_DURATION_NORM: return GL_PERFQUERY_COUNTER_DURATION_NORM_INTEL;
|
||||
case INTEL_PERF_COUNTER_TYPE_DURATION_RAW: return GL_PERFQUERY_COUNTER_DURATION_RAW_INTEL;
|
||||
case INTEL_PERF_COUNTER_TYPE_THROUGHPUT: return GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
|
||||
case INTEL_PERF_COUNTER_TYPE_RAW: return GL_PERFQUERY_COUNTER_RAW_INTEL;
|
||||
case INTEL_PERF_COUNTER_TYPE_TIMESTAMP: return GL_PERFQUERY_COUNTER_TIMESTAMP_INTEL;
|
||||
default:
|
||||
unreachable("Unknown counter type");
|
||||
}
|
||||
}
|
||||
|
||||
static GLuint
|
||||
intel_counter_data_type_to_gl_type(enum intel_perf_counter_data_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32: return GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL;
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_UINT32: return GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL;
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64: return GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT: return GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
|
||||
case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: return GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL;
|
||||
default:
|
||||
unreachable("Unknown counter data type");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Driver hook for glGetPerfCounterInfoINTEL().
|
||||
*/
|
||||
static void
|
||||
brw_get_perf_counter_info(struct gl_context *ctx,
|
||||
unsigned query_index,
|
||||
unsigned counter_index,
|
||||
const char **name,
|
||||
const char **desc,
|
||||
GLuint *offset,
|
||||
GLuint *data_size,
|
||||
GLuint *type_enum,
|
||||
GLuint *data_type_enum,
|
||||
GLuint64 *raw_max)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct intel_perf_config *perf_cfg = intel_perf_config(brw->perf_ctx);
|
||||
const struct intel_perf_query_info *query =
|
||||
&perf_cfg->queries[query_index];
|
||||
const struct intel_perf_query_counter *counter =
|
||||
&query->counters[counter_index];
|
||||
|
||||
*name = counter->name;
|
||||
*desc = counter->desc;
|
||||
*offset = counter->offset;
|
||||
*data_size = intel_perf_query_counter_get_size(counter);
|
||||
*type_enum = intel_counter_type_enum_to_gl_type(counter->type);
|
||||
*data_type_enum = intel_counter_data_type_to_gl_type(counter->data_type);
|
||||
*raw_max = counter->raw_max;
|
||||
}
|
||||
|
||||
enum OaReadStatus {
|
||||
OA_READ_STATUS_ERROR,
|
||||
OA_READ_STATUS_UNFINISHED,
|
||||
OA_READ_STATUS_FINISHED,
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/**
|
||||
* Driver hook for glBeginPerfQueryINTEL().
|
||||
*/
|
||||
static bool
|
||||
brw_begin_perf_query(struct gl_context *ctx,
|
||||
struct gl_perf_query_object *o)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_query_object *brw_query = brw_perf_query(o);
|
||||
struct intel_perf_query_object *obj = brw_query->query;
|
||||
struct intel_perf_context *perf_ctx = brw->perf_ctx;
|
||||
|
||||
/* We can assume the frontend hides mistaken attempts to Begin a
|
||||
* query object multiple times before its End. Similarly if an
|
||||
* application reuses a query object before results have arrived
|
||||
* the frontend will wait for prior results so we don't need
|
||||
* to support abandoning in-flight results.
|
||||
*/
|
||||
assert(!o->Active);
|
||||
assert(!o->Used || o->Ready); /* no in-flight query to worry about */
|
||||
|
||||
DBG("Begin(%d)\n", o->Id);
|
||||
|
||||
bool ret = intel_perf_begin_query(perf_ctx, obj);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_PERFMON))
|
||||
dump_perf_queries(brw);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Driver hook for glEndPerfQueryINTEL().
|
||||
*/
|
||||
static void
|
||||
brw_end_perf_query(struct gl_context *ctx,
|
||||
struct gl_perf_query_object *o)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_query_object *brw_query = brw_perf_query(o);
|
||||
struct intel_perf_query_object *obj = brw_query->query;
|
||||
struct intel_perf_context *perf_ctx = brw->perf_ctx;
|
||||
|
||||
DBG("End(%d)\n", o->Id);
|
||||
intel_perf_end_query(perf_ctx, obj);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_query_object *brw_query = brw_perf_query(o);
|
||||
struct intel_perf_query_object *obj = brw_query->query;
|
||||
|
||||
assert(!o->Ready);
|
||||
|
||||
intel_perf_wait_query(brw->perf_ctx, obj, &brw->batch);
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_is_perf_query_ready(struct gl_context *ctx,
|
||||
struct gl_perf_query_object *o)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_query_object *brw_query = brw_perf_query(o);
|
||||
struct intel_perf_query_object *obj = brw_query->query;
|
||||
|
||||
if (o->Ready)
|
||||
return true;
|
||||
|
||||
return intel_perf_is_query_ready(brw->perf_ctx, obj, &brw->batch);
|
||||
}
|
||||
|
||||
/**
|
||||
* Driver hook for glGetPerfQueryDataINTEL().
|
||||
*/
|
||||
static bool
|
||||
brw_get_perf_query_data(struct gl_context *ctx,
|
||||
struct gl_perf_query_object *o,
|
||||
GLsizei data_size,
|
||||
GLuint *data,
|
||||
GLuint *bytes_written)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_query_object *brw_query = brw_perf_query(o);
|
||||
struct intel_perf_query_object *obj = brw_query->query;
|
||||
|
||||
assert(brw_is_perf_query_ready(ctx, o));
|
||||
|
||||
DBG("GetData(%d)\n", o->Id);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_PERFMON))
|
||||
dump_perf_queries(brw);
|
||||
|
||||
/* We expect that the frontend only calls this hook when it knows
|
||||
* that results are available.
|
||||
*/
|
||||
assert(o->Ready);
|
||||
|
||||
intel_perf_get_query_data(brw->perf_ctx, obj, &brw->batch,
|
||||
data_size, data, bytes_written);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct gl_perf_query_object *
|
||||
brw_new_perf_query_object(struct gl_context *ctx, unsigned query_index)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct intel_perf_context *perf_ctx = brw->perf_ctx;
|
||||
struct intel_perf_query_object * obj = intel_perf_new_query(perf_ctx, query_index);
|
||||
if (unlikely(!obj))
|
||||
return NULL;
|
||||
|
||||
struct brw_perf_query_object *brw_query = calloc(1, sizeof(struct brw_perf_query_object));
|
||||
if (unlikely(!brw_query)) {
|
||||
intel_perf_delete_query(perf_ctx, obj);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
brw_query->query = obj;
|
||||
return &brw_query->base;
|
||||
}
|
||||
|
||||
/**
|
||||
* Driver hook for glDeletePerfQueryINTEL().
|
||||
*/
|
||||
static void
|
||||
brw_delete_perf_query(struct gl_context *ctx,
|
||||
struct gl_perf_query_object *o)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_perf_query_object *brw_query = brw_perf_query(o);
|
||||
struct intel_perf_query_object *obj = brw_query->query;
|
||||
struct intel_perf_context *perf_ctx = brw->perf_ctx;
|
||||
|
||||
/* We can assume that the frontend waits for a query to complete
|
||||
* before ever calling into here, so we don't have to worry about
|
||||
* deleting an in-flight query object.
|
||||
*/
|
||||
assert(!o->Active);
|
||||
assert(!o->Used || o->Ready);
|
||||
|
||||
DBG("Delete(%d)\n", o->Id);
|
||||
|
||||
intel_perf_delete_query(perf_ctx, obj);
|
||||
free(brw_query);
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* intel_device_info will have incorrect default topology values for unsupported
|
||||
* kernels. Verify kernel support to ensure OA metrics are accurate.
|
||||
*/
|
||||
static bool
|
||||
oa_metrics_kernel_support(int fd, const struct intel_device_info *devinfo)
|
||||
{
|
||||
if (devinfo->ver >= 10) {
|
||||
/* topology uAPI required for CNL+ (kernel 4.17+) make a call to the api
|
||||
* to verify support
|
||||
*/
|
||||
struct drm_i915_query_item item = {
|
||||
.query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
|
||||
};
|
||||
struct drm_i915_query query = {
|
||||
.num_items = 1,
|
||||
.items_ptr = (uintptr_t) &item,
|
||||
};
|
||||
|
||||
/* kernel 4.17+ supports the query */
|
||||
return drmIoctl(fd, DRM_IOCTL_I915_QUERY, &query) == 0;
|
||||
}
|
||||
|
||||
if (devinfo->ver >= 8) {
|
||||
/* 4.13+ api required for gfx8 - gfx9 */
|
||||
int mask;
|
||||
struct drm_i915_getparam gp = {
|
||||
.param = I915_PARAM_SLICE_MASK,
|
||||
.value = &mask,
|
||||
};
|
||||
/* kernel 4.13+ supports this parameter */
|
||||
return drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0;
|
||||
}
|
||||
|
||||
if (devinfo->ver == 7)
|
||||
/* default topology values are correct for HSW */
|
||||
return true;
|
||||
|
||||
/* oa not supported before gen 7*/
|
||||
return false;
|
||||
}
|
||||
|
||||
static void *
|
||||
brw_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size)
|
||||
{
|
||||
return brw_bo_alloc(bufmgr, name, size, BRW_MEMZONE_OTHER);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_oa_emit_mi_report_perf_count(void *c,
|
||||
void *bo,
|
||||
uint32_t offset_in_bytes,
|
||||
uint32_t report_id)
|
||||
{
|
||||
struct brw_context *ctx = c;
|
||||
ctx->vtbl.emit_mi_report_perf_count(ctx,
|
||||
bo,
|
||||
offset_in_bytes,
|
||||
report_id);
|
||||
}
|
||||
|
||||
typedef void (*bo_unreference_t)(void *);
|
||||
typedef void *(*bo_map_t)(void *, void *, unsigned flags);
|
||||
typedef void (*bo_unmap_t)(void *);
|
||||
typedef void (* emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
|
||||
typedef void (*emit_mi_flush_t)(void *);
|
||||
|
||||
static void
|
||||
brw_oa_batchbuffer_flush(void *c, const char *file, int line)
|
||||
{
|
||||
struct brw_context *ctx = c;
|
||||
_brw_batch_flush_fence(ctx, -1, NULL, file, line);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_oa_emit_stall_at_pixel_scoreboard(void *c)
|
||||
{
|
||||
struct brw_context *brw = c;
|
||||
brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_perf_store_register(struct brw_context *brw, struct brw_bo *bo,
|
||||
uint32_t reg, uint32_t reg_size,
|
||||
uint32_t offset)
|
||||
{
|
||||
if (reg_size == 8) {
|
||||
brw_store_register_mem64(brw, bo, reg, offset);
|
||||
} else {
|
||||
assert(reg_size == 4);
|
||||
brw_store_register_mem32(brw, bo, reg, offset);
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (*store_register_mem_t)(void *ctx, void *bo,
|
||||
uint32_t reg, uint32_t reg_size,
|
||||
uint32_t offset);
|
||||
typedef bool (*batch_references_t)(void *batch, void *bo);
|
||||
typedef void (*bo_wait_rendering_t)(void *bo);
|
||||
typedef int (*bo_busy_t)(void *bo);
|
||||
|
||||
static unsigned
|
||||
brw_init_perf_query_info(struct gl_context *ctx)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
struct intel_perf_context *perf_ctx = brw->perf_ctx;
|
||||
struct intel_perf_config *perf_cfg = intel_perf_config(perf_ctx);
|
||||
|
||||
if (perf_cfg)
|
||||
return perf_cfg->n_queries;
|
||||
|
||||
if (!oa_metrics_kernel_support(brw->screen->fd, devinfo))
|
||||
return 0;
|
||||
|
||||
perf_cfg = intel_perf_new(brw->mem_ctx);
|
||||
|
||||
perf_cfg->vtbl.bo_alloc = brw_oa_bo_alloc;
|
||||
perf_cfg->vtbl.bo_unreference = (bo_unreference_t)brw_bo_unreference;
|
||||
perf_cfg->vtbl.bo_map = (bo_map_t)brw_bo_map;
|
||||
perf_cfg->vtbl.bo_unmap = (bo_unmap_t)brw_bo_unmap;
|
||||
perf_cfg->vtbl.emit_stall_at_pixel_scoreboard =
|
||||
(emit_mi_flush_t)brw_oa_emit_stall_at_pixel_scoreboard;
|
||||
perf_cfg->vtbl.emit_mi_report_perf_count =
|
||||
(emit_mi_report_t)brw_oa_emit_mi_report_perf_count;
|
||||
perf_cfg->vtbl.batchbuffer_flush = brw_oa_batchbuffer_flush;
|
||||
perf_cfg->vtbl.store_register_mem =
|
||||
(store_register_mem_t) brw_perf_store_register;
|
||||
perf_cfg->vtbl.batch_references = (batch_references_t)brw_batch_references;
|
||||
perf_cfg->vtbl.bo_wait_rendering = (bo_wait_rendering_t)brw_bo_wait_rendering;
|
||||
perf_cfg->vtbl.bo_busy = (bo_busy_t)brw_bo_busy;
|
||||
|
||||
intel_perf_init_metrics(perf_cfg, devinfo, brw->screen->fd,
|
||||
true /* pipeline stats */,
|
||||
true /* register snapshots */);
|
||||
intel_perf_init_context(perf_ctx, perf_cfg, brw->mem_ctx, brw, brw->bufmgr,
|
||||
devinfo, brw->hw_ctx, brw->screen->fd);
|
||||
|
||||
return perf_cfg->n_queries;
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_performance_queries(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
|
||||
ctx->Driver.InitPerfQueryInfo = brw_init_perf_query_info;
|
||||
ctx->Driver.GetPerfQueryInfo = brw_get_perf_query_info;
|
||||
ctx->Driver.GetPerfCounterInfo = brw_get_perf_counter_info;
|
||||
ctx->Driver.NewPerfQueryObject = brw_new_perf_query_object;
|
||||
ctx->Driver.DeletePerfQuery = brw_delete_perf_query;
|
||||
ctx->Driver.BeginPerfQuery = brw_begin_perf_query;
|
||||
ctx->Driver.EndPerfQuery = brw_end_perf_query;
|
||||
ctx->Driver.WaitPerfQuery = brw_wait_perf_query;
|
||||
ctx->Driver.IsPerfQueryReady = brw_is_perf_query_ready;
|
||||
ctx->Driver.GetPerfQueryData = brw_get_perf_query_data;
|
||||
}
|
||||
|
|
@ -1,454 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2010 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_batch.h"
|
||||
#include "brw_fbo.h"
|
||||
|
||||
/**
|
||||
* Emit a PIPE_CONTROL with various flushing flags.
|
||||
*
|
||||
* The caller is responsible for deciding what flags are appropriate for the
|
||||
* given generation.
|
||||
*/
|
||||
void
|
||||
brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
if (devinfo->ver >= 6 &&
|
||||
(flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
|
||||
(flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
|
||||
/* A pipe control command with flush and invalidate bits set
|
||||
* simultaneously is an inherently racy operation on Gfx6+ if the
|
||||
* contents of the flushed caches were intended to become visible from
|
||||
* any of the invalidated caches. Split it in two PIPE_CONTROLs, the
|
||||
* first one should stall the pipeline to make sure that the flushed R/W
|
||||
* caches are coherent with memory once the specified R/O caches are
|
||||
* invalidated. On pre-Gfx6 hardware the (implicit) R/O cache
|
||||
* invalidation seems to happen at the bottom of the pipeline together
|
||||
* with any write cache flush, so this shouldn't be a concern. In order
|
||||
* to ensure a full stall, we do an end-of-pipe sync.
|
||||
*/
|
||||
brw_emit_end_of_pipe_sync(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS));
|
||||
flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
|
||||
}
|
||||
|
||||
brw->vtbl.emit_raw_pipe_control(brw, flags, NULL, 0, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit a PIPE_CONTROL that writes to a buffer object.
|
||||
*
|
||||
* \p flags should contain one of the following items:
|
||||
* - PIPE_CONTROL_WRITE_IMMEDIATE
|
||||
* - PIPE_CONTROL_WRITE_TIMESTAMP
|
||||
* - PIPE_CONTROL_WRITE_DEPTH_COUNT
|
||||
*/
|
||||
void
|
||||
brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset,
|
||||
uint64_t imm)
|
||||
{
|
||||
brw->vtbl.emit_raw_pipe_control(brw, flags, bo, offset, imm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Restriction [DevSNB, DevIVB]:
|
||||
*
|
||||
* Prior to changing Depth/Stencil Buffer state (i.e. any combination of
|
||||
* 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
|
||||
* 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
|
||||
* (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
|
||||
* cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
|
||||
* another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
|
||||
* unless SW can otherwise guarantee that the pipeline from WM onwards is
|
||||
* already flushed (e.g., via a preceding MI_FLUSH).
|
||||
*/
|
||||
void
|
||||
brw_emit_depth_stall_flushes(struct brw_context *brw)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
assert(devinfo->ver >= 6);
|
||||
|
||||
/* Starting on BDW, these pipe controls are unnecessary.
|
||||
*
|
||||
* WM HW will internally manage the draining pipe and flushing of the caches
|
||||
* when this command is issued. The PIPE_CONTROL restrictions are removed.
|
||||
*/
|
||||
if (devinfo->ver >= 8)
|
||||
return;
|
||||
|
||||
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
|
||||
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
|
||||
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
|
||||
}
|
||||
|
||||
/**
|
||||
* From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input):
|
||||
* "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
|
||||
* stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
|
||||
* 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
|
||||
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs
|
||||
* to be sent before any combination of VS associated 3DSTATE."
|
||||
*/
|
||||
void
|
||||
gfx7_emit_vs_workaround_flush(struct brw_context *brw)
|
||||
{
|
||||
ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
assert(devinfo->ver == 7);
|
||||
brw_emit_pipe_control_write(brw,
|
||||
PIPE_CONTROL_WRITE_IMMEDIATE
|
||||
| PIPE_CONTROL_DEPTH_STALL,
|
||||
brw->workaround_bo,
|
||||
brw->workaround_bo_offset, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* From the PRM, Volume 2a:
|
||||
*
|
||||
* "Indirect State Pointers Disable
|
||||
*
|
||||
* At the completion of the post-sync operation associated with this pipe
|
||||
* control packet, the indirect state pointers in the hardware are
|
||||
* considered invalid; the indirect pointers are not saved in the context.
|
||||
* If any new indirect state commands are executed in the command stream
|
||||
* while the pipe control is pending, the new indirect state commands are
|
||||
* preserved.
|
||||
*
|
||||
* [DevIVB+]: Using Invalidate State Pointer (ISP) only inhibits context
|
||||
* restoring of Push Constant (3DSTATE_CONSTANT_*) commands. Push Constant
|
||||
* commands are only considered as Indirect State Pointers. Once ISP is
|
||||
* issued in a context, SW must initialize by programming push constant
|
||||
* commands for all the shaders (at least to zero length) before attempting
|
||||
* any rendering operation for the same context."
|
||||
*
|
||||
* 3DSTATE_CONSTANT_* packets are restored during a context restore,
|
||||
* even though they point to a BO that has been already unreferenced at
|
||||
* the end of the previous batch buffer. This has been fine so far since
|
||||
* we are protected by these scratch page (every address not covered by
|
||||
* a BO should be pointing to the scratch page). But on CNL, it is
|
||||
* causing a GPU hang during context restore at the 3DSTATE_CONSTANT_*
|
||||
* instruction.
|
||||
*
|
||||
* The flag "Indirect State Pointers Disable" in PIPE_CONTROL tells the
|
||||
* hardware to ignore previous 3DSTATE_CONSTANT_* packets during a
|
||||
* context restore, so the mentioned hang doesn't happen. However,
|
||||
* software must program push constant commands for all stages prior to
|
||||
* rendering anything, so we flag them as dirty.
|
||||
*
|
||||
* Finally, we also make sure to stall at pixel scoreboard to make sure the
|
||||
* constants have been loaded into the EUs prior to disable the push constants
|
||||
* so that it doesn't hang a previous 3DPRIMITIVE.
|
||||
*/
|
||||
void
|
||||
gfx7_emit_isp_disable(struct brw_context *brw)
|
||||
{
|
||||
brw->vtbl.emit_raw_pipe_control(brw,
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD |
|
||||
PIPE_CONTROL_CS_STALL,
|
||||
NULL, 0, 0);
|
||||
brw->vtbl.emit_raw_pipe_control(brw,
|
||||
PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |
|
||||
PIPE_CONTROL_CS_STALL,
|
||||
NULL, 0, 0);
|
||||
|
||||
brw->vs.base.push_constants_dirty = true;
|
||||
brw->tcs.base.push_constants_dirty = true;
|
||||
brw->tes.base.push_constants_dirty = true;
|
||||
brw->gs.base.push_constants_dirty = true;
|
||||
brw->wm.base.push_constants_dirty = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit a PIPE_CONTROL command for gfx7 with the CS Stall bit set.
|
||||
*/
|
||||
void
|
||||
gfx7_emit_cs_stall_flush(struct brw_context *brw)
|
||||
{
|
||||
brw_emit_pipe_control_write(brw,
|
||||
PIPE_CONTROL_CS_STALL
|
||||
| PIPE_CONTROL_WRITE_IMMEDIATE,
|
||||
brw->workaround_bo,
|
||||
brw->workaround_bo_offset, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Emits a PIPE_CONTROL with a non-zero post-sync operation, for
|
||||
* implementing two workarounds on gfx6. From section 1.4.7.1
|
||||
* "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
|
||||
*
|
||||
* [DevSNB-C+{W/A}] Before any depth stall flush (including those
|
||||
* produced by non-pipelined state commands), software needs to first
|
||||
* send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
|
||||
* 0.
|
||||
*
|
||||
* [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
|
||||
* =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
|
||||
*
|
||||
* And the workaround for these two requires this workaround first:
|
||||
*
|
||||
* [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
|
||||
* BEFORE the pipe-control with a post-sync op and no write-cache
|
||||
* flushes.
|
||||
*
|
||||
* And this last workaround is tricky because of the requirements on
|
||||
* that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
|
||||
* volume 2 part 1:
|
||||
*
|
||||
* "1 of the following must also be set:
|
||||
* - Render Target Cache Flush Enable ([12] of DW1)
|
||||
* - Depth Cache Flush Enable ([0] of DW1)
|
||||
* - Stall at Pixel Scoreboard ([1] of DW1)
|
||||
* - Depth Stall ([13] of DW1)
|
||||
* - Post-Sync Operation ([13] of DW1)
|
||||
* - Notify Enable ([8] of DW1)"
|
||||
*
|
||||
* The cache flushes require the workaround flush that triggered this
|
||||
* one, so we can't use it. Depth stall would trigger the same.
|
||||
* Post-sync nonzero is what triggered this second workaround, so we
|
||||
* can't use that one either. Notify enable is IRQs, which aren't
|
||||
* really our business. That leaves only stall at scoreboard.
|
||||
*/
|
||||
void
|
||||
brw_emit_post_sync_nonzero_flush(struct brw_context *brw)
|
||||
{
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
||||
|
||||
brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
|
||||
brw->workaround_bo,
|
||||
brw->workaround_bo_offset, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization":
|
||||
*
|
||||
* Write synchronization is a special case of end-of-pipe
|
||||
* synchronization that requires that the render cache and/or depth
|
||||
* related caches are flushed to memory, where the data will become
|
||||
* globally visible. This type of synchronization is required prior to
|
||||
* SW (CPU) actually reading the result data from memory, or initiating
|
||||
* an operation that will use as a read surface (such as a texture
|
||||
* surface) a previous render target and/or depth/stencil buffer
|
||||
*
|
||||
*
|
||||
* From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
|
||||
*
|
||||
* Exercising the write cache flush bits (Render Target Cache Flush
|
||||
* Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only
|
||||
* ensures the write caches are flushed and doesn't guarantee the data
|
||||
* is globally visible.
|
||||
*
|
||||
* SW can track the completion of the end-of-pipe-synchronization by
|
||||
* using "Notify Enable" and "PostSync Operation - Write Immediate
|
||||
* Data" in the PIPE_CONTROL command.
|
||||
*/
|
||||
void
|
||||
brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
if (devinfo->ver >= 6) {
|
||||
/* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
|
||||
*
|
||||
* "The most common action to perform upon reaching a synchronization
|
||||
* point is to write a value out to memory. An immediate value
|
||||
* (included with the synchronization command) may be written."
|
||||
*
|
||||
*
|
||||
* From Broadwell PRM, volume 7, "End-of-Pipe Synchronization":
|
||||
*
|
||||
* "In case the data flushed out by the render engine is to be read
|
||||
* back in to the render engine in coherent manner, then the render
|
||||
* engine has to wait for the fence completion before accessing the
|
||||
* flushed data. This can be achieved by following means on various
|
||||
* products: PIPE_CONTROL command with CS Stall and the required
|
||||
* write caches flushed with Post-Sync-Operation as Write Immediate
|
||||
* Data.
|
||||
*
|
||||
* Example:
|
||||
* - Workload-1 (3D/GPGPU/MEDIA)
|
||||
* - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate
|
||||
* Data, Required Write Cache Flush bits set)
|
||||
* - Workload-2 (Can use the data produce or output by Workload-1)
|
||||
*/
|
||||
brw_emit_pipe_control_write(brw,
|
||||
flags | PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_WRITE_IMMEDIATE,
|
||||
brw->workaround_bo,
|
||||
brw->workaround_bo_offset, 0);
|
||||
|
||||
if (devinfo->platform == INTEL_PLATFORM_HSW) {
|
||||
/* Haswell needs addition work-arounds:
|
||||
*
|
||||
* From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
|
||||
*
|
||||
* Option 1:
|
||||
* PIPE_CONTROL command with the CS Stall and the required write
|
||||
* caches flushed with Post-SyncOperation as Write Immediate Data
|
||||
* followed by eight dummy MI_STORE_DATA_IMM (write to scratch
|
||||
* spce) commands.
|
||||
*
|
||||
* Example:
|
||||
* - Workload-1
|
||||
* - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write
|
||||
* Immediate Data, Required Write Cache Flush bits set)
|
||||
* - MI_STORE_DATA_IMM (8 times) (Dummy data, Scratch Address)
|
||||
* - Workload-2 (Can use the data produce or output by
|
||||
* Workload-1)
|
||||
*
|
||||
* Unfortunately, both the PRMs and the internal docs are a bit
|
||||
* out-of-date in this regard. What the windows driver does (and
|
||||
* this appears to actually work) is to emit a register read from the
|
||||
* memory address written by the pipe control above.
|
||||
*
|
||||
* What register we load into doesn't matter. We choose an indirect
|
||||
* rendering register because we know it always exists and it's one
|
||||
* of the first registers the command parser allows us to write. If
|
||||
* you don't have command parser support in your kernel (pre-4.2),
|
||||
* this will get turned into MI_NOOP and you won't get the
|
||||
* workaround. Unfortunately, there's just not much we can do in
|
||||
* that case. This register is perfectly safe to write since we
|
||||
* always re-load all of the indirect draw registers right before
|
||||
* 3DPRIMITIVE when needed anyway.
|
||||
*/
|
||||
brw_load_register_mem(brw, GFX7_3DPRIM_START_INSTANCE,
|
||||
brw->workaround_bo, brw->workaround_bo_offset);
|
||||
}
|
||||
} else {
|
||||
/* On gfx4-5, a regular pipe control seems to suffice. */
|
||||
brw_emit_pipe_control_flush(brw, flags);
|
||||
}
|
||||
}
|
||||
|
||||
/* Emit a pipelined flush to either flush render and texture cache for
|
||||
* reading from a FBO-drawn texture, or flush so that frontbuffer
|
||||
* render appears on the screen in DRI1.
|
||||
*
|
||||
* This is also used for the always_flush_cache driconf debug option.
|
||||
*/
|
||||
void
|
||||
brw_emit_mi_flush(struct brw_context *brw)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH;
|
||||
if (devinfo->ver >= 6) {
|
||||
flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
|
||||
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_DATA_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_VF_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_CS_STALL;
|
||||
}
|
||||
brw_emit_pipe_control_flush(brw, flags);
|
||||
}
|
||||
|
||||
static bool
|
||||
init_identifier_bo(struct brw_context *brw)
|
||||
{
|
||||
void *bo_map;
|
||||
|
||||
if (!can_do_exec_capture(brw->screen))
|
||||
return true;
|
||||
|
||||
bo_map = brw_bo_map(NULL, brw->workaround_bo, MAP_READ | MAP_WRITE);
|
||||
if (!bo_map)
|
||||
return false;
|
||||
|
||||
brw->workaround_bo->kflags |= EXEC_OBJECT_CAPTURE;
|
||||
brw->workaround_bo_offset =
|
||||
ALIGN(intel_debug_write_identifiers(bo_map, 4096, "i965") + 8, 8);
|
||||
|
||||
brw_bo_unmap(brw->workaround_bo);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
brw_init_pipe_control(struct brw_context *brw,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
switch (devinfo->ver) {
|
||||
case 11:
|
||||
brw->vtbl.emit_raw_pipe_control = gfx11_emit_raw_pipe_control;
|
||||
break;
|
||||
case 9:
|
||||
brw->vtbl.emit_raw_pipe_control = gfx9_emit_raw_pipe_control;
|
||||
break;
|
||||
case 8:
|
||||
brw->vtbl.emit_raw_pipe_control = gfx8_emit_raw_pipe_control;
|
||||
break;
|
||||
case 7:
|
||||
brw->vtbl.emit_raw_pipe_control =
|
||||
devinfo->verx10 == 75 ?
|
||||
gfx75_emit_raw_pipe_control : gfx7_emit_raw_pipe_control;
|
||||
break;
|
||||
case 6:
|
||||
brw->vtbl.emit_raw_pipe_control = gfx6_emit_raw_pipe_control;
|
||||
break;
|
||||
case 5:
|
||||
brw->vtbl.emit_raw_pipe_control = gfx5_emit_raw_pipe_control;
|
||||
break;
|
||||
case 4:
|
||||
brw->vtbl.emit_raw_pipe_control =
|
||||
devinfo->verx10 == 45 ?
|
||||
gfx45_emit_raw_pipe_control : gfx4_emit_raw_pipe_control;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unhandled Gen.");
|
||||
}
|
||||
|
||||
if (devinfo->ver < 6)
|
||||
return 0;
|
||||
|
||||
/* We can't just use brw_state_batch to get a chunk of space for
|
||||
* the gfx6 workaround because it involves actually writing to
|
||||
* the buffer, and the kernel doesn't let us write to the batch.
|
||||
*/
|
||||
brw->workaround_bo = brw_bo_alloc(brw->bufmgr, "workaround", 4096,
|
||||
BRW_MEMZONE_OTHER);
|
||||
if (brw->workaround_bo == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
if (!init_identifier_bo(brw))
|
||||
return -ENOMEM; /* Couldn't map workaround_bo?? */
|
||||
|
||||
brw->workaround_bo_offset = 0;
|
||||
brw->pipe_controls_since_last_cs_stall = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
brw_fini_pipe_control(struct brw_context *brw)
|
||||
{
|
||||
brw_bo_unreference(brw->workaround_bo);
|
||||
}
|
||||
|
|
@ -1,95 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_PIPE_CONTROL_DOT_H
|
||||
#define BRW_PIPE_CONTROL_DOT_H
|
||||
|
||||
struct brw_context;
|
||||
struct intel_device_info;
|
||||
struct brw_bo;
|
||||
|
||||
/** @{
|
||||
*
|
||||
* PIPE_CONTROL operation, a combination MI_FLUSH and register write with
|
||||
* additional flushing control.
|
||||
*
|
||||
* The bits here are not the actual hardware values. The actual values
|
||||
* shift around a bit per-generation, so we just have flags for each
|
||||
* potential operation, and use genxml to encode the actual packet.
|
||||
*/
|
||||
enum pipe_control_flags
|
||||
{
|
||||
PIPE_CONTROL_FLUSH_LLC = (1 << 1),
|
||||
PIPE_CONTROL_LRI_POST_SYNC_OP = (1 << 2),
|
||||
PIPE_CONTROL_STORE_DATA_INDEX = (1 << 3),
|
||||
PIPE_CONTROL_CS_STALL = (1 << 4),
|
||||
PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET = (1 << 5),
|
||||
PIPE_CONTROL_SYNC_GFDT = (1 << 6),
|
||||
PIPE_CONTROL_TLB_INVALIDATE = (1 << 7),
|
||||
PIPE_CONTROL_MEDIA_STATE_CLEAR = (1 << 8),
|
||||
PIPE_CONTROL_WRITE_IMMEDIATE = (1 << 9),
|
||||
PIPE_CONTROL_WRITE_DEPTH_COUNT = (1 << 10),
|
||||
PIPE_CONTROL_WRITE_TIMESTAMP = (1 << 11),
|
||||
PIPE_CONTROL_DEPTH_STALL = (1 << 12),
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH = (1 << 13),
|
||||
PIPE_CONTROL_INSTRUCTION_INVALIDATE = (1 << 14),
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE = (1 << 15),
|
||||
PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
|
||||
PIPE_CONTROL_NOTIFY_ENABLE = (1 << 17),
|
||||
PIPE_CONTROL_FLUSH_ENABLE = (1 << 18),
|
||||
PIPE_CONTROL_DATA_CACHE_FLUSH = (1 << 19),
|
||||
PIPE_CONTROL_VF_CACHE_INVALIDATE = (1 << 20),
|
||||
PIPE_CONTROL_CONST_CACHE_INVALIDATE = (1 << 21),
|
||||
PIPE_CONTROL_STATE_CACHE_INVALIDATE = (1 << 22),
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD = (1 << 23),
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH = (1 << 24),
|
||||
};
|
||||
|
||||
#define PIPE_CONTROL_CACHE_FLUSH_BITS \
|
||||
(PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH)
|
||||
|
||||
#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \
|
||||
(PIPE_CONTROL_STATE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | \
|
||||
PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
|
||||
PIPE_CONTROL_INSTRUCTION_INVALIDATE)
|
||||
|
||||
/** @} */
|
||||
|
||||
int brw_init_pipe_control(struct brw_context *brw,
|
||||
const struct intel_device_info *info);
|
||||
void brw_fini_pipe_control(struct brw_context *brw);
|
||||
|
||||
void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
|
||||
void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset,
|
||||
uint64_t imm);
|
||||
void brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags);
|
||||
void brw_emit_mi_flush(struct brw_context *brw);
|
||||
void brw_emit_post_sync_nonzero_flush(struct brw_context *brw);
|
||||
void brw_emit_depth_stall_flushes(struct brw_context *brw);
|
||||
void gfx7_emit_vs_workaround_flush(struct brw_context *brw);
|
||||
void gfx7_emit_cs_stall_flush(struct brw_context *brw);
|
||||
void gfx7_emit_isp_disable(struct brw_context *brw);
|
||||
|
||||
#endif
|
||||
|
|
@ -1,133 +0,0 @@
|
|||
/*
|
||||
* Copyright 2006 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portionsalloc
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/accum.h"
|
||||
#include "main/enums.h"
|
||||
#include "main/state.h"
|
||||
#include "main/stencil.h"
|
||||
#include "main/bufferobj.h"
|
||||
#include "main/context.h"
|
||||
#include "swrast/swrast.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_pixel.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_PIXEL
|
||||
|
||||
static GLenum
|
||||
effective_func(GLenum func, bool src_alpha_is_one)
|
||||
{
|
||||
if (src_alpha_is_one) {
|
||||
if (func == GL_SRC_ALPHA)
|
||||
return GL_ONE;
|
||||
if (func == GL_ONE_MINUS_SRC_ALPHA)
|
||||
return GL_ZERO;
|
||||
}
|
||||
|
||||
return func;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if any fragment operations are in effect which might effect
|
||||
* glDraw/CopyPixels.
|
||||
*/
|
||||
bool
|
||||
brw_check_blit_fragment_ops(struct gl_context * ctx, bool src_alpha_is_one)
|
||||
{
|
||||
if (ctx->NewState)
|
||||
_mesa_update_state(ctx);
|
||||
|
||||
if (_mesa_arb_fragment_program_enabled(ctx)) {
|
||||
DBG("fallback due to fragment program\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->Color.BlendEnabled &&
|
||||
(effective_func(ctx->Color.Blend[0].SrcRGB, src_alpha_is_one) != GL_ONE ||
|
||||
effective_func(ctx->Color.Blend[0].DstRGB, src_alpha_is_one) != GL_ZERO ||
|
||||
ctx->Color.Blend[0].EquationRGB != GL_FUNC_ADD ||
|
||||
effective_func(ctx->Color.Blend[0].SrcA, src_alpha_is_one) != GL_ONE ||
|
||||
effective_func(ctx->Color.Blend[0].DstA, src_alpha_is_one) != GL_ZERO ||
|
||||
ctx->Color.Blend[0].EquationA != GL_FUNC_ADD)) {
|
||||
DBG("fallback due to blend\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->Texture._MaxEnabledTexImageUnit != -1) {
|
||||
DBG("fallback due to texturing\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (GET_COLORMASK(ctx->Color.ColorMask, 0) != 0xf) {
|
||||
DBG("fallback due to color masking\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->Color.AlphaEnabled) {
|
||||
DBG("fallback due to alpha\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->Depth.Test) {
|
||||
DBG("fallback due to depth test\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->Fog.Enabled) {
|
||||
DBG("fallback due to fog\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->_ImageTransferState) {
|
||||
DBG("fallback due to image transfer\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (_mesa_stencil_is_enabled(ctx)) {
|
||||
DBG("fallback due to image stencil\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) {
|
||||
DBG("fallback due to pixel zoom\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->RenderMode != GL_RENDER) {
|
||||
DBG("fallback due to render mode\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_pixel_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->Bitmap = brw_bitmap;
|
||||
functions->CopyPixels = brw_copypixels;
|
||||
functions->DrawPixels = brw_drawpixels;
|
||||
functions->ReadPixels = brw_readpixels;
|
||||
}
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
/*
|
||||
* Copyright 2006 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_PIXEL_H
|
||||
#define BRW_PIXEL_H
|
||||
|
||||
#include "main/mtypes.h"
|
||||
|
||||
void brw_init_pixel_functions(struct dd_function_table *functions);
|
||||
bool brw_check_blit_fragment_ops(struct gl_context *ctx,
|
||||
bool src_alpha_is_one);
|
||||
|
||||
void brw_readpixels(struct gl_context *ctx,
|
||||
GLint x, GLint y,
|
||||
GLsizei width, GLsizei height,
|
||||
GLenum format, GLenum type,
|
||||
const struct gl_pixelstore_attrib *pack,
|
||||
GLvoid *pixels);
|
||||
|
||||
void brw_drawpixels(struct gl_context *ctx,
|
||||
GLint x, GLint y,
|
||||
GLsizei width, GLsizei height,
|
||||
GLenum format,
|
||||
GLenum type,
|
||||
const struct gl_pixelstore_attrib *unpack,
|
||||
const GLvoid *pixels);
|
||||
|
||||
void brw_copypixels(struct gl_context *ctx,
|
||||
GLint srcx, GLint srcy,
|
||||
GLsizei width, GLsizei height,
|
||||
GLint destx, GLint desty, GLenum type);
|
||||
|
||||
void brw_bitmap(struct gl_context *ctx,
|
||||
GLint x, GLint y,
|
||||
GLsizei width, GLsizei height,
|
||||
const struct gl_pixelstore_attrib *unpack,
|
||||
const GLubyte *pixels);
|
||||
|
||||
#endif
|
||||
|
|
@ -1,363 +0,0 @@
|
|||
/*
|
||||
* Copyright 2006 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portionsalloc
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/blend.h"
|
||||
#include "main/enums.h"
|
||||
#include "main/image.h"
|
||||
#include "main/colormac.h"
|
||||
#include "main/condrender.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "main/macros.h"
|
||||
#include "main/pbo.h"
|
||||
#include "main/bufferobj.h"
|
||||
#include "main/state.h"
|
||||
#include "main/texobj.h"
|
||||
#include "main/context.h"
|
||||
#include "main/fbobject.h"
|
||||
#include "swrast/swrast.h"
|
||||
#include "drivers/common/meta.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_screen.h"
|
||||
#include "brw_batch.h"
|
||||
#include "brw_blit.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "brw_image.h"
|
||||
#include "brw_buffers.h"
|
||||
#include "brw_pixel.h"
|
||||
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_PIXEL
|
||||
|
||||
|
||||
/* Unlike the other intel_pixel_* functions, the expectation here is
|
||||
* that the incoming data is not in a PBO. With the XY_TEXT blit
|
||||
* method, there's no benefit haveing it in a PBO, but we could
|
||||
* implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit
|
||||
* PBO bitmaps. I think they are probably pretty rare though - I
|
||||
* wonder if Xgl uses them?
|
||||
*/
|
||||
static const GLubyte *
|
||||
map_pbo(struct gl_context *ctx,
|
||||
GLsizei width, GLsizei height,
|
||||
const struct gl_pixelstore_attrib *unpack,
|
||||
const GLubyte *bitmap)
|
||||
{
|
||||
GLubyte *buf;
|
||||
|
||||
if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
|
||||
GL_COLOR_INDEX, GL_BITMAP,
|
||||
INT_MAX, (const GLvoid *) bitmap)) {
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
|
||||
GL_MAP_READ_BIT,
|
||||
unpack->BufferObj,
|
||||
MAP_INTERNAL);
|
||||
if (!buf) {
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ADD_POINTERS(buf, bitmap);
|
||||
}
|
||||
|
||||
static bool test_bit( const GLubyte *src, GLuint bit )
|
||||
{
|
||||
return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
|
||||
}
|
||||
|
||||
static void set_bit( GLubyte *dest, GLuint bit )
|
||||
{
|
||||
dest[bit/8] |= 1 << (bit % 8);
|
||||
}
|
||||
|
||||
/* Extract a rectangle's worth of data from the bitmap. Called
|
||||
* per chunk of HW-sized bitmap.
|
||||
*/
|
||||
static GLuint
|
||||
get_bitmap_rect(GLsizei width, GLsizei height,
|
||||
const struct gl_pixelstore_attrib *unpack,
|
||||
const GLubyte *bitmap,
|
||||
GLuint x, GLuint y,
|
||||
GLuint w, GLuint h,
|
||||
GLubyte *dest,
|
||||
GLuint row_align,
|
||||
bool invert)
|
||||
{
|
||||
GLuint src_offset = (x + unpack->SkipPixels) & 0x7;
|
||||
GLuint mask = unpack->LsbFirst ? 0 : 7;
|
||||
GLuint bit = 0;
|
||||
GLint row, col;
|
||||
GLint first, last;
|
||||
GLint incr;
|
||||
GLuint count = 0;
|
||||
|
||||
DBG("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n",
|
||||
__func__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask);
|
||||
|
||||
if (invert) {
|
||||
first = h-1;
|
||||
last = 0;
|
||||
incr = -1;
|
||||
}
|
||||
else {
|
||||
first = 0;
|
||||
last = h-1;
|
||||
incr = 1;
|
||||
}
|
||||
|
||||
/* Require that dest be pre-zero'd.
|
||||
*/
|
||||
for (row = first; row != (last+incr); row += incr) {
|
||||
const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap,
|
||||
width, height,
|
||||
GL_COLOR_INDEX, GL_BITMAP,
|
||||
y + row, x);
|
||||
|
||||
for (col = 0; col < w; col++, bit++) {
|
||||
if (test_bit(rowsrc, (col + src_offset) ^ mask)) {
|
||||
set_bit(dest, bit ^ 7);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (row_align)
|
||||
bit = ALIGN(bit, row_align);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the low Y value of the vertical range given, flipped according to
|
||||
* whether the framebuffer is or not.
|
||||
*/
|
||||
static inline int
|
||||
y_flip(struct gl_framebuffer *fb, int y, int height)
|
||||
{
|
||||
if (fb->FlipY)
|
||||
return fb->Height - y - height;
|
||||
else
|
||||
return y;
|
||||
}
|
||||
|
||||
/*
|
||||
* Render a bitmap.
|
||||
*/
|
||||
static bool
|
||||
do_blit_bitmap(struct gl_context *ctx,
|
||||
GLint dstx, GLint dsty,
|
||||
GLsizei width, GLsizei height,
|
||||
const struct gl_pixelstore_attrib *unpack,
|
||||
const GLubyte *bitmap)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct gl_framebuffer *fb = ctx->DrawBuffer;
|
||||
struct brw_renderbuffer *irb;
|
||||
GLfloat tmpColor[4];
|
||||
GLubyte ubcolor[4];
|
||||
GLuint color;
|
||||
GLsizei bitmap_width = width;
|
||||
GLsizei bitmap_height = height;
|
||||
GLint px, py;
|
||||
GLuint stipple[32];
|
||||
GLint orig_dstx = dstx;
|
||||
GLint orig_dsty = dsty;
|
||||
|
||||
/* Update draw buffer bounds */
|
||||
_mesa_update_state(ctx);
|
||||
|
||||
if (ctx->Depth.Test) {
|
||||
/* The blit path produces incorrect results when depth testing is on.
|
||||
* It seems the blit Z coord is always 1.0 (the far plane) so fragments
|
||||
* will likely be obscured by other, closer geometry.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
brw_prepare_render(brw);
|
||||
|
||||
if (fb->_NumColorDrawBuffers != 1) {
|
||||
perf_debug("accelerated glBitmap() only supports rendering to a "
|
||||
"single color buffer\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
irb = brw_renderbuffer(fb->_ColorDrawBuffers[0]);
|
||||
|
||||
if (unpack->BufferObj) {
|
||||
bitmap = map_pbo(ctx, width, height, unpack, bitmap);
|
||||
if (bitmap == NULL)
|
||||
return true; /* even though this is an error, we're done */
|
||||
}
|
||||
|
||||
COPY_4V(tmpColor, ctx->Current.RasterColor);
|
||||
|
||||
if (_mesa_need_secondary_color(ctx)) {
|
||||
ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor);
|
||||
}
|
||||
|
||||
UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[0], tmpColor[0]);
|
||||
UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[1], tmpColor[1]);
|
||||
UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]);
|
||||
UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]);
|
||||
|
||||
switch (_mesa_get_render_format(ctx, brw_rb_format(irb))) {
|
||||
case MESA_FORMAT_B8G8R8A8_UNORM:
|
||||
case MESA_FORMAT_B8G8R8X8_UNORM:
|
||||
color = PACK_COLOR_8888(ubcolor[3], ubcolor[0], ubcolor[1], ubcolor[2]);
|
||||
break;
|
||||
case MESA_FORMAT_B5G6R5_UNORM:
|
||||
color = PACK_COLOR_565(ubcolor[0], ubcolor[1], ubcolor[2]);
|
||||
break;
|
||||
default:
|
||||
perf_debug("Unsupported format %s in accelerated glBitmap()\n",
|
||||
_mesa_get_format_name(irb->mt->format));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!brw_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F))
|
||||
return false;
|
||||
|
||||
/* Clip to buffer bounds and scissor. */
|
||||
if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
|
||||
fb->_Xmax, fb->_Ymax,
|
||||
&dstx, &dsty, &width, &height))
|
||||
goto out;
|
||||
|
||||
dsty = y_flip(fb, dsty, height);
|
||||
|
||||
#define DY 32
|
||||
#define DX 32
|
||||
|
||||
/* The blitter has no idea about fast color clears, so we need to resolve
|
||||
* the miptree before we do anything.
|
||||
*/
|
||||
brw_miptree_access_raw(brw, irb->mt, irb->mt_level, irb->mt_layer, true);
|
||||
|
||||
/* Chop it all into chunks that can be digested by hardware: */
|
||||
for (py = 0; py < height; py += DY) {
|
||||
for (px = 0; px < width; px += DX) {
|
||||
int h = MIN2(DY, height - py);
|
||||
int w = MIN2(DX, width - px);
|
||||
GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8;
|
||||
const enum gl_logicop_mode logic_op = ctx->Color.ColorLogicOpEnabled ?
|
||||
ctx->Color._LogicOp : COLOR_LOGICOP_COPY;
|
||||
|
||||
assert(sz <= sizeof(stipple));
|
||||
memset(stipple, 0, sz);
|
||||
|
||||
/* May need to adjust this when padding has been introduced in
|
||||
* sz above:
|
||||
*
|
||||
* Have to translate destination coordinates back into source
|
||||
* coordinates.
|
||||
*/
|
||||
int count = get_bitmap_rect(bitmap_width, bitmap_height, unpack,
|
||||
bitmap,
|
||||
-orig_dstx + (dstx + px),
|
||||
-orig_dsty + y_flip(fb, dsty + py, h),
|
||||
w, h,
|
||||
(GLubyte *)stipple,
|
||||
8,
|
||||
fb->FlipY);
|
||||
if (count == 0)
|
||||
continue;
|
||||
|
||||
if (!brw_emit_immediate_color_expand_blit(brw,
|
||||
irb->mt->cpp,
|
||||
(GLubyte *)stipple,
|
||||
sz,
|
||||
color,
|
||||
irb->mt->surf.row_pitch_B,
|
||||
irb->mt->bo,
|
||||
irb->mt->offset,
|
||||
irb->mt->surf.tiling,
|
||||
dstx + px,
|
||||
dsty + py,
|
||||
w, h,
|
||||
logic_op)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->Query.CurrentOcclusionObject)
|
||||
ctx->Query.CurrentOcclusionObject->Result += count;
|
||||
}
|
||||
}
|
||||
out:
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_SYNC))
|
||||
brw_batch_flush(brw);
|
||||
|
||||
if (unpack->BufferObj) {
|
||||
/* done with PBO so unmap it now */
|
||||
ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj, MAP_INTERNAL);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* There are a large number of possible ways to implement bitmap on
|
||||
* this hardware, most of them have some sort of drawback. Here are a
|
||||
* few that spring to mind:
|
||||
*
|
||||
* Blit:
|
||||
* - XY_MONO_SRC_BLT_CMD
|
||||
* - use XY_SETUP_CLIP_BLT for cliprect clipping.
|
||||
* - XY_TEXT_BLT
|
||||
* - XY_TEXT_IMMEDIATE_BLT
|
||||
* - blit per cliprect, subject to maximum immediate data size.
|
||||
* - XY_COLOR_BLT
|
||||
* - per pixel or run of pixels
|
||||
* - XY_PIXEL_BLT
|
||||
* - good for sparse bitmaps
|
||||
*
|
||||
* 3D engine:
|
||||
* - Point per pixel
|
||||
* - Translate bitmap to an alpha texture and render as a quad
|
||||
* - Chop bitmap up into 32x32 squares and render w/polygon stipple.
|
||||
*/
|
||||
void
|
||||
brw_bitmap(struct gl_context * ctx,
|
||||
GLint x, GLint y,
|
||||
GLsizei width, GLsizei height,
|
||||
const struct gl_pixelstore_attrib *unpack,
|
||||
const GLubyte * pixels)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
|
||||
if (!_mesa_check_conditional_render(ctx))
|
||||
return;
|
||||
|
||||
if (brw->screen->devinfo.ver < 6 &&
|
||||
do_blit_bitmap(ctx, x, y, width, height, unpack, pixels))
|
||||
return;
|
||||
|
||||
_mesa_meta_Bitmap(ctx, x, y, width, height, unpack, pixels);
|
||||
}
|
||||
|
|
@ -1,212 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/image.h"
|
||||
#include "main/state.h"
|
||||
#include "main/stencil.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "main/condrender.h"
|
||||
#include "main/fbobject.h"
|
||||
#include "drivers/common/meta.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_buffers.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
#include "brw_pixel.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "brw_blit.h"
|
||||
#include "brw_batch.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_PIXEL
|
||||
|
||||
/**
|
||||
* CopyPixels with the blitter. Don't support zooming, pixel transfer, etc.
|
||||
*/
|
||||
static bool
|
||||
do_blit_copypixels(struct gl_context * ctx,
|
||||
GLint srcx, GLint srcy,
|
||||
GLsizei width, GLsizei height,
|
||||
GLint dstx, GLint dsty, GLenum type)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct gl_framebuffer *fb = ctx->DrawBuffer;
|
||||
struct gl_framebuffer *read_fb = ctx->ReadBuffer;
|
||||
GLint orig_dstx;
|
||||
GLint orig_dsty;
|
||||
GLint orig_srcx;
|
||||
GLint orig_srcy;
|
||||
struct brw_renderbuffer *draw_irb = NULL;
|
||||
struct brw_renderbuffer *read_irb = NULL;
|
||||
|
||||
/* Update draw buffer bounds */
|
||||
_mesa_update_state(ctx);
|
||||
|
||||
brw_prepare_render(brw);
|
||||
|
||||
switch (type) {
|
||||
case GL_COLOR:
|
||||
if (fb->_NumColorDrawBuffers != 1) {
|
||||
perf_debug("glCopyPixels() fallback: MRT\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
draw_irb = brw_renderbuffer(fb->_ColorDrawBuffers[0]);
|
||||
read_irb = brw_renderbuffer(read_fb->_ColorReadBuffer);
|
||||
break;
|
||||
case GL_DEPTH_STENCIL_EXT:
|
||||
draw_irb = brw_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer);
|
||||
read_irb =
|
||||
brw_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
|
||||
break;
|
||||
case GL_DEPTH:
|
||||
perf_debug("glCopyPixels() fallback: GL_DEPTH\n");
|
||||
return false;
|
||||
case GL_STENCIL:
|
||||
perf_debug("glCopyPixels() fallback: GL_STENCIL\n");
|
||||
return false;
|
||||
default:
|
||||
perf_debug("glCopyPixels(): Unknown type\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!draw_irb) {
|
||||
perf_debug("glCopyPixels() fallback: missing draw buffer\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!read_irb) {
|
||||
perf_debug("glCopyPixels() fallback: missing read buffer\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (draw_irb->mt->surf.samples > 1 || read_irb->mt->surf.samples > 1) {
|
||||
perf_debug("glCopyPixels() fallback: multisampled buffers\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->_ImageTransferState) {
|
||||
perf_debug("glCopyPixels(): Unsupported image transfer state\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->Depth.Test) {
|
||||
perf_debug("glCopyPixels(): Unsupported depth test state\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (brw->stencil_enabled) {
|
||||
perf_debug("glCopyPixels(): Unsupported stencil test state\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->Fog.Enabled ||
|
||||
ctx->Texture._MaxEnabledTexImageUnit != -1 ||
|
||||
_mesa_arb_fragment_program_enabled(ctx)) {
|
||||
perf_debug("glCopyPixels(): Unsupported fragment shader state\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->Color.AlphaEnabled ||
|
||||
ctx->Color.BlendEnabled) {
|
||||
perf_debug("glCopyPixels(): Unsupported blend state\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (GET_COLORMASK(ctx->Color.ColorMask, 0) != 0xf) {
|
||||
perf_debug("glCopyPixels(): Unsupported color mask state\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) {
|
||||
perf_debug("glCopyPixels(): Unsupported pixel zoom\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
brw_batch_flush(brw);
|
||||
|
||||
/* Clip to destination buffer. */
|
||||
orig_dstx = dstx;
|
||||
orig_dsty = dsty;
|
||||
if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
|
||||
fb->_Xmax, fb->_Ymax,
|
||||
&dstx, &dsty, &width, &height))
|
||||
goto out;
|
||||
/* Adjust src coords for our post-clipped destination origin */
|
||||
srcx += dstx - orig_dstx;
|
||||
srcy += dsty - orig_dsty;
|
||||
|
||||
/* Clip to source buffer. */
|
||||
orig_srcx = srcx;
|
||||
orig_srcy = srcy;
|
||||
if (!_mesa_clip_to_region(0, 0,
|
||||
read_fb->Width, read_fb->Height,
|
||||
&srcx, &srcy, &width, &height))
|
||||
goto out;
|
||||
/* Adjust dst coords for our post-clipped source origin */
|
||||
dstx += srcx - orig_srcx;
|
||||
dsty += srcy - orig_srcy;
|
||||
|
||||
if (!brw_miptree_blit(brw,
|
||||
read_irb->mt, read_irb->mt_level, read_irb->mt_layer,
|
||||
srcx, srcy, read_fb->FlipY,
|
||||
draw_irb->mt, draw_irb->mt_level, draw_irb->mt_layer,
|
||||
dstx, dsty, fb->FlipY,
|
||||
width, height,
|
||||
(ctx->Color.ColorLogicOpEnabled ?
|
||||
ctx->Color._LogicOp : COLOR_LOGICOP_COPY))) {
|
||||
DBG("%s: blit failure\n", __func__);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->Query.CurrentOcclusionObject)
|
||||
ctx->Query.CurrentOcclusionObject->Result += width * height;
|
||||
|
||||
out:
|
||||
|
||||
DBG("%s: success\n", __func__);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
brw_copypixels(struct gl_context *ctx,
|
||||
GLint srcx, GLint srcy,
|
||||
GLsizei width, GLsizei height,
|
||||
GLint destx, GLint desty, GLenum type)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
|
||||
DBG("%s\n", __func__);
|
||||
|
||||
if (!_mesa_check_conditional_render(ctx))
|
||||
return;
|
||||
|
||||
if (brw->screen->devinfo.ver < 6 &&
|
||||
do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
|
||||
return;
|
||||
|
||||
/* this will use swrast if needed */
|
||||
_mesa_meta_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
|
||||
}
|
||||
|
|
@ -1,178 +0,0 @@
|
|||
/*
|
||||
* Copyright 2006 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portionsalloc
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/enums.h"
|
||||
#include "main/image.h"
|
||||
#include "main/glformats.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "main/condrender.h"
|
||||
#include "main/fbobject.h"
|
||||
#include "main/teximage.h"
|
||||
#include "main/texobj.h"
|
||||
#include "main/texstate.h"
|
||||
#include "main/bufferobj.h"
|
||||
#include "swrast/swrast.h"
|
||||
#include "drivers/common/meta.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_screen.h"
|
||||
#include "brw_blit.h"
|
||||
#include "brw_buffers.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
#include "brw_pixel.h"
|
||||
#include "brw_buffer_objects.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_PIXEL
|
||||
|
||||
static bool
|
||||
do_blit_drawpixels(struct gl_context * ctx,
|
||||
GLint x, GLint y, GLsizei width, GLsizei height,
|
||||
GLenum format, GLenum type,
|
||||
const struct gl_pixelstore_attrib *unpack,
|
||||
const GLvoid * pixels)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_buffer_object *src = brw_buffer_object(unpack->BufferObj);
|
||||
GLuint src_offset;
|
||||
struct brw_bo *src_buffer;
|
||||
|
||||
DBG("%s\n", __func__);
|
||||
|
||||
if (!brw_check_blit_fragment_ops(ctx, false))
|
||||
return false;
|
||||
|
||||
if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
|
||||
DBG("%s: fallback due to MRT\n", __func__);
|
||||
return false;
|
||||
}
|
||||
|
||||
brw_prepare_render(brw);
|
||||
|
||||
struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
|
||||
struct brw_renderbuffer *irb = brw_renderbuffer(rb);
|
||||
|
||||
mesa_format src_format = _mesa_format_from_format_and_type(format, type);
|
||||
if (_mesa_format_is_mesa_array_format(src_format))
|
||||
src_format = _mesa_format_from_array_format(src_format);
|
||||
mesa_format dst_format = irb->mt->format;
|
||||
|
||||
/* We can safely discard sRGB encode/decode for the DrawPixels interface */
|
||||
src_format = _mesa_get_srgb_format_linear(src_format);
|
||||
dst_format = _mesa_get_srgb_format_linear(dst_format);
|
||||
|
||||
if (!brw_miptree_blit_compatible_formats(src_format, dst_format)) {
|
||||
DBG("%s: bad format for blit\n", __func__);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (unpack->SwapBytes || unpack->LsbFirst ||
|
||||
unpack->SkipPixels || unpack->SkipRows) {
|
||||
DBG("%s: bad packing params\n", __func__);
|
||||
return false;
|
||||
}
|
||||
|
||||
int src_stride = _mesa_image_row_stride(unpack, width, format, type);
|
||||
bool src_flip = false;
|
||||
/* Mesa flips the src_stride for unpack->Invert, but we want our mt to have
|
||||
* a normal src_stride.
|
||||
*/
|
||||
if (unpack->Invert) {
|
||||
src_stride = -src_stride;
|
||||
src_flip = true;
|
||||
}
|
||||
|
||||
src_offset = (GLintptr)pixels;
|
||||
src_offset += _mesa_image_offset(2, unpack, width, height,
|
||||
format, type, 0, 0, 0);
|
||||
|
||||
src_buffer = brw_bufferobj_buffer(brw, src, src_offset,
|
||||
height * src_stride, false);
|
||||
|
||||
struct brw_mipmap_tree *pbo_mt =
|
||||
brw_miptree_create_for_bo(brw,
|
||||
src_buffer,
|
||||
irb->mt->format,
|
||||
src_offset,
|
||||
width, height, 1,
|
||||
src_stride,
|
||||
ISL_TILING_LINEAR,
|
||||
MIPTREE_CREATE_DEFAULT);
|
||||
if (!pbo_mt)
|
||||
return false;
|
||||
|
||||
if (!brw_miptree_blit(brw,
|
||||
pbo_mt, 0, 0,
|
||||
0, 0, src_flip,
|
||||
irb->mt, irb->mt_level, irb->mt_layer,
|
||||
x, y, ctx->DrawBuffer->FlipY,
|
||||
width, height, COLOR_LOGICOP_COPY)) {
|
||||
DBG("%s: blit failed\n", __func__);
|
||||
brw_miptree_release(&pbo_mt);
|
||||
return false;
|
||||
}
|
||||
|
||||
brw_miptree_release(&pbo_mt);
|
||||
|
||||
if (ctx->Query.CurrentOcclusionObject)
|
||||
ctx->Query.CurrentOcclusionObject->Result += width * height;
|
||||
|
||||
DBG("%s: success\n", __func__);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
brw_drawpixels(struct gl_context *ctx,
|
||||
GLint x, GLint y,
|
||||
GLsizei width, GLsizei height,
|
||||
GLenum format,
|
||||
GLenum type,
|
||||
const struct gl_pixelstore_attrib *unpack,
|
||||
const GLvoid *pixels)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
|
||||
if (!_mesa_check_conditional_render(ctx))
|
||||
return;
|
||||
|
||||
if (format == GL_STENCIL_INDEX) {
|
||||
_swrast_DrawPixels(ctx, x, y, width, height, format, type,
|
||||
unpack, pixels);
|
||||
return;
|
||||
}
|
||||
|
||||
if (brw->screen->devinfo.ver < 6 &&
|
||||
unpack->BufferObj) {
|
||||
if (do_blit_drawpixels(ctx, x, y, width, height, format, type, unpack,
|
||||
pixels)) {
|
||||
return;
|
||||
}
|
||||
|
||||
perf_debug("%s: fallback to generic code in PBO case\n", __func__);
|
||||
}
|
||||
|
||||
_mesa_meta_DrawPixels(ctx, x, y, width, height, format, type,
|
||||
unpack, pixels);
|
||||
}
|
||||
|
|
@ -1,300 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/enums.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "main/macros.h"
|
||||
#include "main/fbobject.h"
|
||||
#include "main/image.h"
|
||||
#include "main/bufferobj.h"
|
||||
#include "main/readpix.h"
|
||||
#include "main/state.h"
|
||||
#include "main/glformats.h"
|
||||
#include "program/prog_instruction.h"
|
||||
#include "drivers/common/meta.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_blorp.h"
|
||||
#include "brw_screen.h"
|
||||
#include "brw_batch.h"
|
||||
#include "brw_buffers.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
#include "brw_pixel.h"
|
||||
#include "brw_buffer_objects.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_PIXEL
|
||||
|
||||
/**
|
||||
* \brief A fast path for glReadPixels
|
||||
*
|
||||
* This fast path is taken when the source format is BGRA, RGBA,
|
||||
* A or L and when the texture memory is X- or Y-tiled. It downloads
|
||||
* the source data by directly mapping the memory without a GTT fence.
|
||||
* This then needs to be de-tiled on the CPU before presenting the data to
|
||||
* the user in the linear fasion.
|
||||
*
|
||||
* This is a performance win over the conventional texture download path.
|
||||
* In the conventional texture download path, the texture is either mapped
|
||||
* through the GTT or copied to a linear buffer with the blitter before
|
||||
* handing off to a software path. This allows us to avoid round-tripping
|
||||
* through the GPU (in the case where we would be blitting) and do only a
|
||||
* single copy operation.
|
||||
*/
|
||||
static bool
|
||||
brw_readpixels_tiled_memcpy(struct gl_context *ctx,
|
||||
GLint xoffset, GLint yoffset,
|
||||
GLsizei width, GLsizei height,
|
||||
GLenum format, GLenum type,
|
||||
GLvoid * pixels,
|
||||
const struct gl_pixelstore_attrib *pack)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
/* This path supports reading from color buffers only */
|
||||
if (rb == NULL)
|
||||
return false;
|
||||
|
||||
struct brw_renderbuffer *irb = brw_renderbuffer(rb);
|
||||
int dst_pitch;
|
||||
|
||||
/* The miptree's buffer. */
|
||||
struct brw_bo *bo;
|
||||
|
||||
uint32_t cpp;
|
||||
isl_memcpy_type copy_type;
|
||||
|
||||
/* This fastpath is restricted to specific renderbuffer types:
|
||||
* a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
|
||||
* more types.
|
||||
*/
|
||||
if (!devinfo->has_llc ||
|
||||
!(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
|
||||
pixels == NULL ||
|
||||
pack->BufferObj ||
|
||||
pack->Alignment > 4 ||
|
||||
pack->SkipPixels > 0 ||
|
||||
pack->SkipRows > 0 ||
|
||||
(pack->RowLength != 0 && pack->RowLength != width) ||
|
||||
pack->SwapBytes ||
|
||||
pack->LsbFirst ||
|
||||
pack->Invert)
|
||||
return false;
|
||||
|
||||
/* Only a simple blit, no scale, bias or other mapping. */
|
||||
if (ctx->_ImageTransferState)
|
||||
return false;
|
||||
|
||||
/* It is possible that the renderbuffer (or underlying texture) is
|
||||
* multisampled. Since ReadPixels from a multisampled buffer requires a
|
||||
* multisample resolve, we can't handle this here
|
||||
*/
|
||||
if (rb->NumSamples > 1)
|
||||
return false;
|
||||
|
||||
/* We can't handle copying from RGBX or BGRX because the tiled_memcpy
|
||||
* function doesn't set the last channel to 1. Note this checks BaseFormat
|
||||
* rather than TexFormat in case the RGBX format is being simulated with an
|
||||
* RGBA format.
|
||||
*/
|
||||
if (rb->_BaseFormat == GL_RGB)
|
||||
return false;
|
||||
|
||||
copy_type = brw_miptree_get_memcpy_type(rb->Format, format, type, &cpp);
|
||||
if (copy_type == ISL_MEMCPY_INVALID)
|
||||
return false;
|
||||
|
||||
if (!irb->mt ||
|
||||
(irb->mt->surf.tiling != ISL_TILING_X &&
|
||||
irb->mt->surf.tiling != ISL_TILING_Y0)) {
|
||||
/* The algorithm is written only for X- or Y-tiled memory. */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* tiled_to_linear() assumes that if the object is swizzled, it is using
|
||||
* I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y. This is only
|
||||
* true on gfx5 and above.
|
||||
*
|
||||
* The killer on top is that some gfx4 have an L-shaped swizzle mode, where
|
||||
* parts of the memory aren't swizzled at all. Userspace just can't handle
|
||||
* that.
|
||||
*/
|
||||
if (devinfo->ver < 5 && devinfo->has_bit6_swizzle)
|
||||
return false;
|
||||
|
||||
/* Since we are going to read raw data to the miptree, we need to resolve
|
||||
* any pending fast color clears before we start.
|
||||
*/
|
||||
brw_miptree_access_raw(brw, irb->mt, irb->mt_level, irb->mt_layer, false);
|
||||
|
||||
bo = irb->mt->bo;
|
||||
|
||||
if (brw_batch_references(&brw->batch, bo)) {
|
||||
perf_debug("Flushing before mapping a referenced bo.\n");
|
||||
brw_batch_flush(brw);
|
||||
}
|
||||
|
||||
void *map = brw_bo_map(brw, bo, MAP_READ | MAP_RAW);
|
||||
if (map == NULL) {
|
||||
DBG("%s: failed to map bo\n", __func__);
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned slice_offset_x, slice_offset_y;
|
||||
brw_miptree_get_image_offset(irb->mt, irb->mt_level, irb->mt_layer,
|
||||
&slice_offset_x, &slice_offset_y);
|
||||
xoffset += slice_offset_x;
|
||||
yoffset += slice_offset_y;
|
||||
|
||||
dst_pitch = _mesa_image_row_stride(pack, width, format, type);
|
||||
|
||||
/* For a window-system renderbuffer, the buffer is actually flipped
|
||||
* vertically, so we need to handle that. Since the detiling function
|
||||
* can only really work in the forwards direction, we have to be a
|
||||
* little creative. First, we compute the Y-offset of the first row of
|
||||
* the renderbuffer (in renderbuffer coordinates). We then match that
|
||||
* with the last row of the client's data. Finally, we give
|
||||
* tiled_to_linear a negative pitch so that it walks through the
|
||||
* client's data backwards as it walks through the renderbufer forwards.
|
||||
*/
|
||||
if (ctx->ReadBuffer->FlipY) {
|
||||
yoffset = rb->Height - yoffset - height;
|
||||
pixels += (ptrdiff_t) (height - 1) * dst_pitch;
|
||||
dst_pitch = -dst_pitch;
|
||||
}
|
||||
|
||||
/* We postponed printing this message until having committed to executing
|
||||
* the function.
|
||||
*/
|
||||
DBG("%s: x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
|
||||
"mesa_format=0x%x tiling=%d "
|
||||
"pack=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n",
|
||||
__func__, xoffset, yoffset, width, height,
|
||||
format, type, rb->Format, irb->mt->surf.tiling,
|
||||
pack->Alignment, pack->RowLength, pack->SkipPixels,
|
||||
pack->SkipRows);
|
||||
|
||||
isl_memcpy_tiled_to_linear(
|
||||
xoffset * cpp, (xoffset + width) * cpp,
|
||||
yoffset, yoffset + height,
|
||||
pixels,
|
||||
map + irb->mt->offset,
|
||||
dst_pitch, irb->mt->surf.row_pitch_B,
|
||||
devinfo->has_bit6_swizzle,
|
||||
irb->mt->surf.tiling,
|
||||
copy_type
|
||||
);
|
||||
|
||||
brw_bo_unmap(bo);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_readpixels_blorp(struct gl_context *ctx,
|
||||
unsigned x, unsigned y,
|
||||
unsigned w, unsigned h,
|
||||
GLenum format, GLenum type, const void *pixels,
|
||||
const struct gl_pixelstore_attrib *packing)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
|
||||
if (!rb)
|
||||
return false;
|
||||
|
||||
struct brw_renderbuffer *irb = brw_renderbuffer(rb);
|
||||
|
||||
/* _mesa_get_readpixels_transfer_ops() includes the cases of read
|
||||
* color clamping along with the ctx->_ImageTransferState.
|
||||
*/
|
||||
if (_mesa_get_readpixels_transfer_ops(ctx, rb->Format, format,
|
||||
type, GL_FALSE))
|
||||
return false;
|
||||
|
||||
GLenum dst_base_format = _mesa_unpack_format_to_base_format(format);
|
||||
if (_mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat,
|
||||
dst_base_format))
|
||||
return false;
|
||||
|
||||
unsigned swizzle;
|
||||
if (irb->Base.Base._BaseFormat == GL_RGB) {
|
||||
swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
|
||||
} else {
|
||||
swizzle = SWIZZLE_XYZW;
|
||||
}
|
||||
|
||||
return brw_blorp_download_miptree(brw, irb->mt, rb->Format, swizzle,
|
||||
irb->mt_level, x, y, irb->mt_layer,
|
||||
w, h, 1, GL_TEXTURE_2D, format, type,
|
||||
ctx->ReadBuffer->FlipY, pixels, packing);
|
||||
}
|
||||
|
||||
void
|
||||
brw_readpixels(struct gl_context *ctx,
|
||||
GLint x, GLint y, GLsizei width, GLsizei height,
|
||||
GLenum format, GLenum type,
|
||||
const struct gl_pixelstore_attrib *pack, GLvoid *pixels)
|
||||
{
|
||||
bool ok;
|
||||
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
bool dirty;
|
||||
|
||||
DBG("%s\n", __func__);
|
||||
|
||||
/* Reading pixels wont dirty the front buffer, so reset the dirty
|
||||
* flag after calling brw_prepare_render().
|
||||
*/
|
||||
dirty = brw->front_buffer_dirty;
|
||||
brw_prepare_render(brw);
|
||||
brw->front_buffer_dirty = dirty;
|
||||
|
||||
if (pack->BufferObj) {
|
||||
if (brw_readpixels_blorp(ctx, x, y, width, height,
|
||||
format, type, pixels, pack))
|
||||
return;
|
||||
|
||||
perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
|
||||
}
|
||||
|
||||
ok = brw_readpixels_tiled_memcpy(ctx, x, y, width, height,
|
||||
format, type, pixels, pack);
|
||||
if(ok)
|
||||
return;
|
||||
|
||||
/* Update Mesa state before calling _mesa_readpixels().
|
||||
* XXX this may not be needed since ReadPixels no longer uses the
|
||||
* span code.
|
||||
*/
|
||||
|
||||
if (ctx->NewState)
|
||||
_mesa_update_state(ctx);
|
||||
|
||||
_mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
|
||||
|
||||
/* There's an brw_prepare_render() call in intelSpanRenderStart(). */
|
||||
brw->front_buffer_dirty = dirty;
|
||||
}
|
||||
|
|
@ -1,462 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2012 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Jordan Justen <jordan.l.justen@intel.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#include "main/bufferobj.h"
|
||||
#include "main/varray.h"
|
||||
#include "vbo/vbo.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_draw.h"
|
||||
|
||||
#include "brw_batch.h"
|
||||
|
||||
|
||||
#define UPDATE_MIN2(a, b) (a) = MIN2((a), (b))
|
||||
#define UPDATE_MAX2(a, b) (a) = MAX2((a), (b))
|
||||
|
||||
/*
|
||||
* Notes on primitive restart:
|
||||
* The code below is used when the driver does not fully support primitive
|
||||
* restart (for example, if it only does restart index of ~0).
|
||||
*
|
||||
* We map the index buffer, find the restart indexes, unmap
|
||||
* the index buffer then draw the sub-primitives delineated by the restarts.
|
||||
*
|
||||
* A couple possible optimizations:
|
||||
* 1. Save the list of sub-primitive (start, count) values in a list attached
|
||||
* to the index buffer for re-use in subsequent draws. The list would be
|
||||
* invalidated when the contents of the buffer changed.
|
||||
* 2. If drawing triangle strips or quad strips, create a new index buffer
|
||||
* that uses duplicated vertices to render the disjoint strips as one
|
||||
* long strip. We'd have to be careful to avoid using too much memory
|
||||
* for this.
|
||||
*
|
||||
* Finally, some apps might perform better if they don't use primitive restart
|
||||
* at all rather than this fallback path. Set MESA_EXTENSION_OVERRIDE to
|
||||
* "-GL_NV_primitive_restart" to test that.
|
||||
*/
|
||||
|
||||
|
||||
struct sub_primitive
|
||||
{
|
||||
GLuint start;
|
||||
GLuint count;
|
||||
GLuint min_index;
|
||||
GLuint max_index;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Scan the elements array to find restart indexes. Return an array
|
||||
* of struct sub_primitive to indicate how to draw the sub-primitives
|
||||
* are delineated by the restart index.
|
||||
*/
|
||||
static struct sub_primitive *
|
||||
find_sub_primitives(const void *elements, unsigned element_size,
|
||||
unsigned start, unsigned end, unsigned restart_index,
|
||||
unsigned *num_sub_prims)
|
||||
{
|
||||
const unsigned max_prims = end - start;
|
||||
struct sub_primitive *sub_prims;
|
||||
unsigned i, cur_start, cur_count;
|
||||
GLuint scan_index;
|
||||
unsigned scan_num;
|
||||
|
||||
sub_prims =
|
||||
malloc(max_prims * sizeof(struct sub_primitive));
|
||||
|
||||
if (!sub_prims) {
|
||||
*num_sub_prims = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cur_start = start;
|
||||
cur_count = 0;
|
||||
scan_num = 0;
|
||||
|
||||
#define IB_INDEX_READ(TYPE, INDEX) (((const GL##TYPE *) elements)[INDEX])
|
||||
|
||||
#define SCAN_ELEMENTS(TYPE) \
|
||||
sub_prims[scan_num].min_index = (GL##TYPE) 0xffffffff; \
|
||||
sub_prims[scan_num].max_index = 0; \
|
||||
for (i = start; i < end; i++) { \
|
||||
scan_index = IB_INDEX_READ(TYPE, i); \
|
||||
if (scan_index == restart_index) { \
|
||||
if (cur_count > 0) { \
|
||||
assert(scan_num < max_prims); \
|
||||
sub_prims[scan_num].start = cur_start; \
|
||||
sub_prims[scan_num].count = cur_count; \
|
||||
scan_num++; \
|
||||
sub_prims[scan_num].min_index = (GL##TYPE) 0xffffffff; \
|
||||
sub_prims[scan_num].max_index = 0; \
|
||||
} \
|
||||
cur_start = i + 1; \
|
||||
cur_count = 0; \
|
||||
} \
|
||||
else { \
|
||||
UPDATE_MIN2(sub_prims[scan_num].min_index, scan_index); \
|
||||
UPDATE_MAX2(sub_prims[scan_num].max_index, scan_index); \
|
||||
cur_count++; \
|
||||
} \
|
||||
} \
|
||||
if (cur_count > 0) { \
|
||||
assert(scan_num < max_prims); \
|
||||
sub_prims[scan_num].start = cur_start; \
|
||||
sub_prims[scan_num].count = cur_count; \
|
||||
scan_num++; \
|
||||
}
|
||||
|
||||
switch (element_size) {
|
||||
case 1:
|
||||
SCAN_ELEMENTS(ubyte);
|
||||
break;
|
||||
case 2:
|
||||
SCAN_ELEMENTS(ushort);
|
||||
break;
|
||||
case 4:
|
||||
SCAN_ELEMENTS(uint);
|
||||
break;
|
||||
default:
|
||||
assert(0 && "bad index_size in find_sub_primitives()");
|
||||
}
|
||||
|
||||
#undef SCAN_ELEMENTS
|
||||
|
||||
*num_sub_prims = scan_num;
|
||||
|
||||
return sub_prims;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Handle primitive restart in software.
|
||||
*
|
||||
* This function breaks up calls into the driver so primitive restart
|
||||
* support is not required in the driver.
|
||||
*/
|
||||
static void
|
||||
vbo_sw_primitive_restart_common_start(struct gl_context *ctx,
|
||||
const struct _mesa_prim *prims,
|
||||
GLuint nr_prims,
|
||||
const struct _mesa_index_buffer *ib,
|
||||
GLuint num_instances,
|
||||
GLuint base_instance,
|
||||
struct gl_buffer_object *indirect,
|
||||
GLsizeiptr indirect_offset,
|
||||
bool primitive_restart,
|
||||
unsigned restart_index)
|
||||
{
|
||||
GLuint prim_num;
|
||||
struct _mesa_prim new_prim;
|
||||
struct _mesa_index_buffer new_ib;
|
||||
struct sub_primitive *sub_prims;
|
||||
struct sub_primitive *sub_prim;
|
||||
GLuint num_sub_prims;
|
||||
GLuint sub_prim_num;
|
||||
GLuint end_index;
|
||||
GLuint sub_end_index;
|
||||
struct _mesa_prim temp_prim;
|
||||
GLboolean map_ib = ib->obj && !ib->obj->Mappings[MAP_INTERNAL].Pointer;
|
||||
const void *ptr;
|
||||
|
||||
/* If there is an indirect buffer, map it and extract the draw params */
|
||||
if (indirect) {
|
||||
const uint32_t *indirect_params;
|
||||
if (!ctx->Driver.MapBufferRange(ctx, 0, indirect->Size, GL_MAP_READ_BIT,
|
||||
indirect, MAP_INTERNAL)) {
|
||||
|
||||
/* something went wrong with mapping, give up */
|
||||
_mesa_error(ctx, GL_OUT_OF_MEMORY,
|
||||
"failed to map indirect buffer for sw primitive restart");
|
||||
return;
|
||||
}
|
||||
|
||||
assert(nr_prims == 1);
|
||||
new_prim = prims[0];
|
||||
indirect_params = (const uint32_t *)
|
||||
ADD_POINTERS(indirect->Mappings[MAP_INTERNAL].Pointer,
|
||||
indirect_offset);
|
||||
|
||||
new_prim.count = indirect_params[0];
|
||||
new_prim.start = indirect_params[2];
|
||||
new_prim.basevertex = indirect_params[3];
|
||||
|
||||
num_instances = indirect_params[1];
|
||||
base_instance = indirect_params[4];
|
||||
|
||||
new_ib = *ib;
|
||||
new_ib.count = new_prim.count;
|
||||
|
||||
prims = &new_prim;
|
||||
ib = &new_ib;
|
||||
|
||||
ctx->Driver.UnmapBuffer(ctx, indirect, MAP_INTERNAL);
|
||||
}
|
||||
|
||||
/* Find the sub-primitives. These are regions in the index buffer which
|
||||
* are split based on the primitive restart index value.
|
||||
*/
|
||||
if (map_ib) {
|
||||
ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
|
||||
ib->obj, MAP_INTERNAL);
|
||||
}
|
||||
|
||||
if (ib->obj)
|
||||
ptr = ADD_POINTERS(ib->obj->Mappings[MAP_INTERNAL].Pointer, ib->ptr);
|
||||
else
|
||||
ptr = ib->ptr;
|
||||
|
||||
sub_prims = find_sub_primitives(ptr, 1 << ib->index_size_shift,
|
||||
prims[0].start, prims[0].start + ib->count,
|
||||
restart_index, &num_sub_prims);
|
||||
|
||||
if (map_ib) {
|
||||
ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
|
||||
}
|
||||
|
||||
/* Loop over the primitives, and use the located sub-primitives to draw
|
||||
* each primitive with a break to implement each primitive restart.
|
||||
*/
|
||||
for (prim_num = 0; prim_num < nr_prims; prim_num++) {
|
||||
end_index = prims[prim_num].start + prims[prim_num].count;
|
||||
memcpy(&temp_prim, &prims[prim_num], sizeof (temp_prim));
|
||||
/* Loop over the sub-primitives drawing sub-ranges of the primitive. */
|
||||
for (sub_prim_num = 0; sub_prim_num < num_sub_prims; sub_prim_num++) {
|
||||
sub_prim = &sub_prims[sub_prim_num];
|
||||
sub_end_index = sub_prim->start + sub_prim->count;
|
||||
if (prims[prim_num].start <= sub_prim->start) {
|
||||
temp_prim.start = MAX2(prims[prim_num].start, sub_prim->start);
|
||||
temp_prim.count = MIN2(sub_end_index, end_index) - temp_prim.start;
|
||||
if ((temp_prim.start == sub_prim->start) &&
|
||||
(temp_prim.count == sub_prim->count)) {
|
||||
ctx->Driver.Draw(ctx, &temp_prim, 1, ib, true, false, 0,
|
||||
sub_prim->min_index, sub_prim->max_index,
|
||||
num_instances, base_instance);
|
||||
} else {
|
||||
ctx->Driver.Draw(ctx, &temp_prim, 1, ib,
|
||||
false, false, 0, -1, -1,
|
||||
num_instances, base_instance);
|
||||
}
|
||||
}
|
||||
if (sub_end_index >= end_index) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(sub_prims);
|
||||
}
|
||||
|
||||
static void
|
||||
vbo_sw_primitive_restart(struct gl_context *ctx,
|
||||
const struct _mesa_prim *prims,
|
||||
GLuint nr_prims,
|
||||
const struct _mesa_index_buffer *ib,
|
||||
GLuint num_instances,
|
||||
GLuint base_instance,
|
||||
struct gl_buffer_object *indirect,
|
||||
GLsizeiptr indirect_offset,
|
||||
bool primitive_restart,
|
||||
unsigned restart_index)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 1; i < nr_prims; i++) {
|
||||
if (prims[i].start != prims[0].start)
|
||||
break;
|
||||
}
|
||||
|
||||
vbo_sw_primitive_restart_common_start(ctx, &prims[0], i, ib,
|
||||
num_instances, base_instance,
|
||||
indirect, indirect_offset,
|
||||
primitive_restart,
|
||||
restart_index);
|
||||
if (i != nr_prims) {
|
||||
vbo_sw_primitive_restart(ctx, &prims[i], nr_prims - i, ib,
|
||||
num_instances, base_instance,
|
||||
indirect, indirect_offset,
|
||||
primitive_restart,
|
||||
restart_index);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the hardware's cut index support can handle the primitive
|
||||
* restart index value (pre-Haswell only).
|
||||
*/
|
||||
static bool
|
||||
can_cut_index_handle_restart_index(struct gl_context *ctx,
|
||||
const struct _mesa_index_buffer *ib,
|
||||
unsigned restart_index)
|
||||
{
|
||||
/* The FixedIndex variant means 0xFF, 0xFFFF, or 0xFFFFFFFF based on
|
||||
* the index buffer type, which corresponds exactly to the hardware.
|
||||
*/
|
||||
if (ctx->Array.PrimitiveRestartFixedIndex)
|
||||
return true;
|
||||
|
||||
bool cut_index_will_work;
|
||||
|
||||
switch (ib->index_size_shift) {
|
||||
case 0:
|
||||
cut_index_will_work = restart_index == 0xff;
|
||||
break;
|
||||
case 1:
|
||||
cut_index_will_work = restart_index == 0xffff;
|
||||
break;
|
||||
case 2:
|
||||
cut_index_will_work = restart_index == 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
|
||||
return cut_index_will_work;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the hardware's cut index support can handle the primitive
|
||||
* restart case.
|
||||
*/
|
||||
static bool
|
||||
can_cut_index_handle_prims(struct gl_context *ctx,
|
||||
const struct _mesa_prim *prim,
|
||||
GLuint nr_prims,
|
||||
const struct _mesa_index_buffer *ib,
|
||||
unsigned restart_index)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
/* Otherwise Haswell can do it all. */
|
||||
if (devinfo->verx10 >= 75)
|
||||
return true;
|
||||
|
||||
if (!can_cut_index_handle_restart_index(ctx, ib, restart_index)) {
|
||||
/* The primitive restart index can't be handled, so take
|
||||
* the software path
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < nr_prims; i++) {
|
||||
switch (prim[i].mode) {
|
||||
case GL_POINTS:
|
||||
case GL_LINES:
|
||||
case GL_LINE_STRIP:
|
||||
case GL_TRIANGLES:
|
||||
case GL_TRIANGLE_STRIP:
|
||||
case GL_LINES_ADJACENCY:
|
||||
case GL_LINE_STRIP_ADJACENCY:
|
||||
case GL_TRIANGLES_ADJACENCY:
|
||||
case GL_TRIANGLE_STRIP_ADJACENCY:
|
||||
/* Cut index supports these primitive types */
|
||||
break;
|
||||
default:
|
||||
/* Cut index does not support these primitive types */
|
||||
//case GL_LINE_LOOP:
|
||||
//case GL_TRIANGLE_FAN:
|
||||
//case GL_QUADS:
|
||||
//case GL_QUAD_STRIP:
|
||||
//case GL_POLYGON:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if primitive restart is enabled, and if so, handle it properly.
|
||||
*
|
||||
* In some cases the support will be handled in software. When available
|
||||
* hardware will handle primitive restart.
|
||||
*/
|
||||
GLboolean
|
||||
brw_handle_primitive_restart(struct gl_context *ctx,
|
||||
const struct _mesa_prim *prims,
|
||||
GLuint nr_prims,
|
||||
const struct _mesa_index_buffer *ib,
|
||||
GLuint num_instances, GLuint base_instance,
|
||||
bool primitive_restart,
|
||||
unsigned restart_index)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
|
||||
/* We only need to handle cases where there is an index buffer. */
|
||||
if (ib == NULL) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
/* If we have set the in_progress flag, then we are in the middle
|
||||
* of handling the primitive restart draw.
|
||||
*/
|
||||
if (brw->prim_restart.in_progress) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
/* If PrimitiveRestart is not enabled, then we aren't concerned about
|
||||
* handling this draw.
|
||||
*/
|
||||
if (!primitive_restart) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
/* Signal that we are in the process of handling the
|
||||
* primitive restart draw
|
||||
*/
|
||||
brw->prim_restart.in_progress = true;
|
||||
|
||||
if (can_cut_index_handle_prims(ctx, prims, nr_prims, ib, restart_index)) {
|
||||
/* Cut index should work for primitive restart, so use it
|
||||
*/
|
||||
brw->prim_restart.enable_cut_index = true;
|
||||
brw->prim_restart.restart_index = restart_index;
|
||||
brw_draw_prims(ctx, prims, nr_prims, ib, false, primitive_restart,
|
||||
restart_index, -1, -1,
|
||||
num_instances, base_instance);
|
||||
brw->prim_restart.enable_cut_index = false;
|
||||
} else {
|
||||
/* Not all the primitive draw modes are supported by the cut index,
|
||||
* so take the software path
|
||||
*/
|
||||
struct gl_buffer_object *indirect_data = brw->draw.draw_indirect_data;
|
||||
|
||||
/* Clear this to make the draw direct. */
|
||||
brw->draw.draw_indirect_data = NULL;
|
||||
|
||||
vbo_sw_primitive_restart(ctx, prims, nr_prims, ib, num_instances,
|
||||
base_instance, indirect_data,
|
||||
brw->draw.draw_indirect_offset,
|
||||
primitive_restart, restart_index);
|
||||
}
|
||||
|
||||
brw->prim_restart.in_progress = false;
|
||||
|
||||
/* The primitive restart draw was completed, so return true. */
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
|
@ -1,888 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
#include <pthread.h>
|
||||
#include "main/glspirv.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "program/prog_print.h"
|
||||
#include "program/prog_to_nir.h"
|
||||
#include "program/program.h"
|
||||
#include "program/programopt.h"
|
||||
#include "tnl/tnl.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "compiler/glsl/ir.h"
|
||||
#include "compiler/glsl/program.h"
|
||||
#include "compiler/glsl/gl_nir.h"
|
||||
#include "compiler/glsl/glsl_to_nir.h"
|
||||
|
||||
#include "brw_program.h"
|
||||
#include "brw_context.h"
|
||||
#include "compiler/brw_nir.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_batch.h"
|
||||
|
||||
#include "brw_cs.h"
|
||||
#include "brw_gs.h"
|
||||
#include "brw_vs.h"
|
||||
#include "brw_wm.h"
|
||||
#include "brw_state.h"
|
||||
|
||||
#include "main/shaderapi.h"
|
||||
#include "main/shaderobj.h"
|
||||
|
||||
static bool
|
||||
brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
|
||||
{
|
||||
if (is_scalar) {
|
||||
nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
|
||||
type_size_scalar_bytes);
|
||||
return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
|
||||
} else {
|
||||
nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
|
||||
type_size_vec4_bytes);
|
||||
return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static struct gl_program *brw_new_program(struct gl_context *ctx,
|
||||
gl_shader_stage stage,
|
||||
GLuint id, bool is_arb_asm);
|
||||
|
||||
nir_shader *
|
||||
brw_create_nir(struct brw_context *brw,
|
||||
const struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
gl_shader_stage stage,
|
||||
bool is_scalar)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
const nir_shader_compiler_options *options =
|
||||
ctx->Const.ShaderCompilerOptions[stage].NirOptions;
|
||||
nir_shader *nir;
|
||||
|
||||
/* First, lower the GLSL/Mesa IR or SPIR-V to NIR */
|
||||
if (shader_prog) {
|
||||
if (shader_prog->data->spirv) {
|
||||
nir = _mesa_spirv_to_nir(ctx, shader_prog, stage, options);
|
||||
} else {
|
||||
nir = glsl_to_nir(ctx, shader_prog, stage, options);
|
||||
|
||||
/* Remap the locations to slots so those requiring two slots will
|
||||
* occupy two locations. For instance, if we have in the IR code a
|
||||
* dvec3 attr0 in location 0 and vec4 attr1 in location 1, in NIR attr0
|
||||
* will use locations/slots 0 and 1, and attr1 will use location/slot 2
|
||||
*/
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX)
|
||||
nir_remap_dual_slot_attributes(nir, &prog->DualSlotInputs);
|
||||
}
|
||||
assert (nir);
|
||||
|
||||
nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out,
|
||||
NULL);
|
||||
nir_validate_shader(nir, "after glsl_to_nir or spirv_to_nir");
|
||||
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
|
||||
nir_shader_get_entrypoint(nir), true, false);
|
||||
} else {
|
||||
nir = prog_to_nir(prog, options);
|
||||
NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
|
||||
}
|
||||
nir_validate_shader(nir, "before brw_preprocess_nir");
|
||||
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
|
||||
if (!ctx->SoftFP64 && ((nir->info.bit_sizes_int | nir->info.bit_sizes_float) & 64) &&
|
||||
(options->lower_doubles_options & nir_lower_fp64_full_software)) {
|
||||
ctx->SoftFP64 = glsl_float64_funcs_to_nir(ctx, options);
|
||||
}
|
||||
|
||||
brw_preprocess_nir(brw->screen->compiler, nir, ctx->SoftFP64);
|
||||
|
||||
if (stage == MESA_SHADER_TESS_CTRL) {
|
||||
/* Lower gl_PatchVerticesIn from a sys. value to a uniform on Gfx8+. */
|
||||
static const gl_state_index16 tokens[STATE_LENGTH] =
|
||||
{ STATE_TCS_PATCH_VERTICES_IN };
|
||||
nir_lower_patch_vertices(nir, 0, devinfo->ver >= 8 ? tokens : NULL);
|
||||
}
|
||||
|
||||
if (stage == MESA_SHADER_TESS_EVAL) {
|
||||
/* Lower gl_PatchVerticesIn to a constant if we have a TCS, or
|
||||
* a uniform if we don't.
|
||||
*/
|
||||
struct gl_linked_shader *tcs =
|
||||
shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
|
||||
uint32_t static_patch_vertices =
|
||||
tcs ? tcs->Program->nir->info.tess.tcs_vertices_out : 0;
|
||||
static const gl_state_index16 tokens[STATE_LENGTH] =
|
||||
{ STATE_TES_PATCH_VERTICES_IN };
|
||||
nir_lower_patch_vertices(nir, static_patch_vertices, tokens);
|
||||
}
|
||||
|
||||
if (stage == MESA_SHADER_FRAGMENT) {
|
||||
static const struct nir_lower_wpos_ytransform_options wpos_options = {
|
||||
.state_tokens = {STATE_FB_WPOS_Y_TRANSFORM, 0, 0},
|
||||
.fs_coord_pixel_center_integer = 1,
|
||||
.fs_coord_origin_upper_left = 1,
|
||||
};
|
||||
|
||||
bool progress = false;
|
||||
NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
|
||||
if (progress) {
|
||||
_mesa_add_state_reference(prog->Parameters,
|
||||
wpos_options.state_tokens);
|
||||
}
|
||||
}
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
||||
static void
|
||||
shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
|
||||
{
|
||||
assert(glsl_type_is_vector_or_scalar(type));
|
||||
|
||||
uint32_t comp_size = glsl_type_is_boolean(type)
|
||||
? 4 : glsl_get_bit_size(type) / 8;
|
||||
unsigned length = glsl_get_vector_elements(type);
|
||||
*size = comp_size * length,
|
||||
*align = comp_size * (length == 3 ? 4 : length);
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_lower_resources(nir_shader *nir, struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
NIR_PASS_V(nir, brw_nir_lower_uniforms, nir->options->lower_to_scalar);
|
||||
NIR_PASS_V(prog->nir, gl_nir_lower_samplers, shader_prog);
|
||||
BITSET_COPY(prog->info.textures_used, prog->nir->info.textures_used);
|
||||
BITSET_COPY(prog->info.textures_used_by_txf, prog->nir->info.textures_used_by_txf);
|
||||
|
||||
NIR_PASS_V(prog->nir, brw_nir_lower_storage_image, devinfo);
|
||||
|
||||
if (prog->nir->info.stage == MESA_SHADER_COMPUTE &&
|
||||
shader_prog->data->spirv) {
|
||||
NIR_PASS_V(prog->nir, nir_lower_vars_to_explicit_types,
|
||||
nir_var_mem_shared, shared_type_info);
|
||||
NIR_PASS_V(prog->nir, nir_lower_explicit_io,
|
||||
nir_var_mem_shared, nir_address_format_32bit_offset);
|
||||
}
|
||||
|
||||
NIR_PASS_V(prog->nir, gl_nir_lower_buffers, shader_prog);
|
||||
/* Do a round of constant folding to clean up address calculations */
|
||||
NIR_PASS_V(prog->nir, nir_opt_constant_folding);
|
||||
}
|
||||
|
||||
void
|
||||
brw_shader_gather_info(nir_shader *nir, struct gl_program *prog)
|
||||
{
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
|
||||
/* Copy the info we just generated back into the gl_program */
|
||||
const char *prog_name = prog->info.name;
|
||||
const char *prog_label = prog->info.label;
|
||||
prog->info = nir->info;
|
||||
prog->info.name = prog_name;
|
||||
prog->info.label = prog_label;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
get_new_program_id(struct brw_screen *screen)
|
||||
{
|
||||
return p_atomic_inc_return(&screen->program_id);
|
||||
}
|
||||
|
||||
static struct gl_program *
|
||||
brw_new_program(struct gl_context *ctx,
|
||||
gl_shader_stage stage,
|
||||
GLuint id, bool is_arb_asm)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_program *prog = rzalloc(NULL, struct brw_program);
|
||||
|
||||
if (prog) {
|
||||
prog->id = get_new_program_id(brw->screen);
|
||||
|
||||
return _mesa_init_gl_program(&prog->program, stage, id, is_arb_asm);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_delete_program(struct gl_context *ctx, struct gl_program *prog)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
|
||||
/* Beware! prog's refcount has reached zero, and it's about to be freed.
|
||||
*
|
||||
* In brw_upload_pipeline_state(), we compare brw->programs[i] to
|
||||
* ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
|
||||
* pointer has changed.
|
||||
*
|
||||
* We cannot leave brw->programs[i] as a dangling pointer to the dead
|
||||
* program. malloc() may allocate the same memory for a new gl_program,
|
||||
* causing us to see matching pointers...but totally different programs.
|
||||
*
|
||||
* We cannot set brw->programs[i] to NULL, either. If we've deleted the
|
||||
* active program, Mesa may set ctx->FooProgram._Current to NULL. That
|
||||
* would cause us to see matching pointers (NULL == NULL), and fail to
|
||||
* detect that a program has changed since our last draw.
|
||||
*
|
||||
* So, set it to a bogus gl_program pointer that will never match,
|
||||
* causing us to properly reevaluate the state on our next draw.
|
||||
*
|
||||
* Getting this wrong causes heisenbugs which are very hard to catch,
|
||||
* as you need a very specific allocation pattern to hit the problem.
|
||||
*/
|
||||
static const struct gl_program deleted_program;
|
||||
|
||||
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
if (brw->programs[i] == prog)
|
||||
brw->programs[i] = (struct gl_program *) &deleted_program;
|
||||
}
|
||||
|
||||
_mesa_delete_program( ctx, prog );
|
||||
}
|
||||
|
||||
|
||||
static GLboolean
|
||||
brw_program_string_notify(struct gl_context *ctx,
|
||||
GLenum target,
|
||||
struct gl_program *prog)
|
||||
{
|
||||
assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
|
||||
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct brw_compiler *compiler = brw->screen->compiler;
|
||||
|
||||
switch (target) {
|
||||
case GL_FRAGMENT_PROGRAM_ARB: {
|
||||
struct brw_program *newFP = brw_program(prog);
|
||||
const struct brw_program *curFP =
|
||||
brw_program_const(brw->programs[MESA_SHADER_FRAGMENT]);
|
||||
|
||||
if (newFP == curFP)
|
||||
brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
|
||||
_mesa_program_fragment_position_to_sysval(&newFP->program);
|
||||
newFP->id = get_new_program_id(brw->screen);
|
||||
|
||||
prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
|
||||
|
||||
brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
|
||||
|
||||
brw_shader_gather_info(prog->nir, prog);
|
||||
|
||||
brw_fs_precompile(ctx, prog);
|
||||
break;
|
||||
}
|
||||
case GL_VERTEX_PROGRAM_ARB: {
|
||||
struct brw_program *newVP = brw_program(prog);
|
||||
const struct brw_program *curVP =
|
||||
brw_program_const(brw->programs[MESA_SHADER_VERTEX]);
|
||||
|
||||
if (newVP == curVP)
|
||||
brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
|
||||
if (newVP->program.arb.IsPositionInvariant) {
|
||||
_mesa_insert_mvp_code(ctx, &newVP->program);
|
||||
}
|
||||
newVP->id = get_new_program_id(brw->screen);
|
||||
|
||||
/* Also tell tnl about it:
|
||||
*/
|
||||
_tnl_program_string(ctx, target, prog);
|
||||
|
||||
prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
|
||||
compiler->scalar_stage[MESA_SHADER_VERTEX]);
|
||||
|
||||
brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
|
||||
|
||||
brw_shader_gather_info(prog->nir, prog);
|
||||
|
||||
brw_vs_precompile(ctx, prog);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
/*
|
||||
* driver->ProgramStringNotify is only called for ARB programs, fixed
|
||||
* function vertex programs, and ir_to_mesa (which isn't used by the
|
||||
* i965 back-end). Therefore, even after geometry shaders are added,
|
||||
* this function should only ever be called with a target of
|
||||
* GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
|
||||
*/
|
||||
unreachable("Unexpected target in brwProgramStringNotify");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL;
|
||||
assert(devinfo->ver >= 7 && devinfo->ver <= 11);
|
||||
|
||||
if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
|
||||
GL_ELEMENT_ARRAY_BARRIER_BIT |
|
||||
GL_COMMAND_BARRIER_BIT))
|
||||
bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
|
||||
|
||||
if (barriers & GL_UNIFORM_BARRIER_BIT)
|
||||
bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_CONST_CACHE_INVALIDATE);
|
||||
|
||||
if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
|
||||
bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
|
||||
|
||||
if (barriers & (GL_TEXTURE_UPDATE_BARRIER_BIT |
|
||||
GL_PIXEL_BUFFER_BARRIER_BIT))
|
||||
bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH);
|
||||
|
||||
if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
|
||||
bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH);
|
||||
|
||||
/* Typed surface messages are handled by the render cache on IVB, so we
|
||||
* need to flush it too.
|
||||
*/
|
||||
if (devinfo->verx10 == 70)
|
||||
bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
|
||||
|
||||
brw_emit_pipe_control_flush(brw, bits);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_framebuffer_fetch_barrier(struct gl_context *ctx)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
if (!ctx->Extensions.EXT_shader_framebuffer_fetch) {
|
||||
if (devinfo->ver >= 6) {
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
|
||||
} else {
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_get_scratch_bo(struct brw_context *brw,
|
||||
struct brw_bo **scratch_bo, int size)
|
||||
{
|
||||
struct brw_bo *old_bo = *scratch_bo;
|
||||
|
||||
if (old_bo && old_bo->size < size) {
|
||||
brw_bo_unreference(old_bo);
|
||||
old_bo = NULL;
|
||||
}
|
||||
|
||||
if (!old_bo) {
|
||||
*scratch_bo =
|
||||
brw_bo_alloc(brw->bufmgr, "scratch bo", size, BRW_MEMZONE_SCRATCH);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserve enough scratch space for the given stage to hold \p per_thread_size
|
||||
* bytes times the given \p thread_count.
|
||||
*/
|
||||
void
|
||||
brw_alloc_stage_scratch(struct brw_context *brw,
|
||||
struct brw_stage_state *stage_state,
|
||||
unsigned per_thread_size)
|
||||
{
|
||||
if (stage_state->per_thread_scratch >= per_thread_size)
|
||||
return;
|
||||
|
||||
stage_state->per_thread_scratch = per_thread_size;
|
||||
|
||||
if (stage_state->scratch_bo)
|
||||
brw_bo_unreference(stage_state->scratch_bo);
|
||||
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
assert(stage_state->stage < ARRAY_SIZE(devinfo->max_scratch_ids));
|
||||
unsigned max_ids = devinfo->max_scratch_ids[stage_state->stage];
|
||||
stage_state->scratch_bo =
|
||||
brw_bo_alloc(brw->bufmgr, "shader scratch space",
|
||||
per_thread_size * max_ids, BRW_MEMZONE_SCRATCH);
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_frag_prog_functions(struct dd_function_table *functions)
|
||||
{
|
||||
assert(functions->ProgramStringNotify == _tnl_program_string);
|
||||
|
||||
functions->NewProgram = brw_new_program;
|
||||
functions->DeleteProgram = brw_delete_program;
|
||||
functions->ProgramStringNotify = brw_program_string_notify;
|
||||
|
||||
functions->LinkShader = brw_link_shader;
|
||||
|
||||
functions->MemoryBarrier = brw_memory_barrier;
|
||||
functions->FramebufferFetchBarrier = brw_framebuffer_fetch_barrier;
|
||||
}
|
||||
|
||||
struct shader_times {
|
||||
uint64_t time;
|
||||
uint64_t written;
|
||||
uint64_t reset;
|
||||
};
|
||||
|
||||
void
|
||||
brw_init_shader_time(struct brw_context *brw)
|
||||
{
|
||||
const int max_entries = 2048;
|
||||
brw->shader_time.bo =
|
||||
brw_bo_alloc(brw->bufmgr, "shader time",
|
||||
max_entries * BRW_SHADER_TIME_STRIDE * 3,
|
||||
BRW_MEMZONE_OTHER);
|
||||
brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
|
||||
brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
|
||||
brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
|
||||
max_entries);
|
||||
brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
|
||||
max_entries);
|
||||
brw->shader_time.max_entries = max_entries;
|
||||
}
|
||||
|
||||
static int
|
||||
compare_time(const void *a, const void *b)
|
||||
{
|
||||
uint64_t * const *a_val = a;
|
||||
uint64_t * const *b_val = b;
|
||||
|
||||
/* We don't just subtract because we're turning the value to an int. */
|
||||
if (**a_val < **b_val)
|
||||
return -1;
|
||||
else if (**a_val == **b_val)
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
print_shader_time_line(const char *stage, const char *name,
|
||||
int shader_num, uint64_t time, uint64_t total)
|
||||
{
|
||||
fprintf(stderr, "%-6s%-18s", stage, name);
|
||||
|
||||
if (shader_num != 0)
|
||||
fprintf(stderr, "%4d: ", shader_num);
|
||||
else
|
||||
fprintf(stderr, " : ");
|
||||
|
||||
fprintf(stderr, "%16lld (%7.2f Gcycles) %4.1f%%\n",
|
||||
(long long)time,
|
||||
(double)time / 1000000000.0,
|
||||
(double)time / total * 100.0);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_report_shader_time(struct brw_context *brw)
|
||||
{
|
||||
if (!brw->shader_time.bo || !brw->shader_time.num_entries)
|
||||
return;
|
||||
|
||||
uint64_t scaled[brw->shader_time.num_entries];
|
||||
uint64_t *sorted[brw->shader_time.num_entries];
|
||||
uint64_t total_by_type[ST_CS + 1];
|
||||
memset(total_by_type, 0, sizeof(total_by_type));
|
||||
double total = 0;
|
||||
for (int i = 0; i < brw->shader_time.num_entries; i++) {
|
||||
uint64_t written = 0, reset = 0;
|
||||
enum shader_time_shader_type type = brw->shader_time.types[i];
|
||||
|
||||
sorted[i] = &scaled[i];
|
||||
|
||||
switch (type) {
|
||||
case ST_VS:
|
||||
case ST_TCS:
|
||||
case ST_TES:
|
||||
case ST_GS:
|
||||
case ST_FS8:
|
||||
case ST_FS16:
|
||||
case ST_FS32:
|
||||
case ST_CS:
|
||||
written = brw->shader_time.cumulative[i].written;
|
||||
reset = brw->shader_time.cumulative[i].reset;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* I sometimes want to print things that aren't the 3 shader times.
|
||||
* Just print the sum in that case.
|
||||
*/
|
||||
written = 1;
|
||||
reset = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
uint64_t time = brw->shader_time.cumulative[i].time;
|
||||
if (written) {
|
||||
scaled[i] = time / written * (written + reset);
|
||||
} else {
|
||||
scaled[i] = time;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case ST_VS:
|
||||
case ST_TCS:
|
||||
case ST_TES:
|
||||
case ST_GS:
|
||||
case ST_FS8:
|
||||
case ST_FS16:
|
||||
case ST_FS32:
|
||||
case ST_CS:
|
||||
total_by_type[type] += scaled[i];
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
total += scaled[i];
|
||||
}
|
||||
|
||||
if (total == 0) {
|
||||
fprintf(stderr, "No shader time collected yet\n");
|
||||
return;
|
||||
}
|
||||
|
||||
qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "type ID cycles spent %% of total\n");
|
||||
for (int s = 0; s < brw->shader_time.num_entries; s++) {
|
||||
const char *stage;
|
||||
/* Work back from the sorted pointers times to a time to print. */
|
||||
int i = sorted[s] - scaled;
|
||||
|
||||
if (scaled[i] == 0)
|
||||
continue;
|
||||
|
||||
int shader_num = brw->shader_time.ids[i];
|
||||
const char *shader_name = brw->shader_time.names[i];
|
||||
|
||||
switch (brw->shader_time.types[i]) {
|
||||
case ST_VS:
|
||||
stage = "vs";
|
||||
break;
|
||||
case ST_TCS:
|
||||
stage = "tcs";
|
||||
break;
|
||||
case ST_TES:
|
||||
stage = "tes";
|
||||
break;
|
||||
case ST_GS:
|
||||
stage = "gs";
|
||||
break;
|
||||
case ST_FS8:
|
||||
stage = "fs8";
|
||||
break;
|
||||
case ST_FS16:
|
||||
stage = "fs16";
|
||||
break;
|
||||
case ST_FS32:
|
||||
stage = "fs32";
|
||||
break;
|
||||
case ST_CS:
|
||||
stage = "cs";
|
||||
break;
|
||||
default:
|
||||
stage = "other";
|
||||
break;
|
||||
}
|
||||
|
||||
print_shader_time_line(stage, shader_name, shader_num,
|
||||
scaled[i], total);
|
||||
}
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
|
||||
print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
|
||||
print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
|
||||
print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
|
||||
print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
|
||||
print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
|
||||
print_shader_time_line("total", "fs32", 0, total_by_type[ST_FS32], total);
|
||||
print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_collect_shader_time(struct brw_context *brw)
|
||||
{
|
||||
if (!brw->shader_time.bo)
|
||||
return;
|
||||
|
||||
/* This probably stalls on the last rendering. We could fix that by
|
||||
* delaying reading the reports, but it doesn't look like it's a big
|
||||
* overhead compared to the cost of tracking the time in the first place.
|
||||
*/
|
||||
void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
|
||||
|
||||
for (int i = 0; i < brw->shader_time.num_entries; i++) {
|
||||
uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
|
||||
|
||||
brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
|
||||
brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
|
||||
brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
|
||||
}
|
||||
|
||||
/* Zero the BO out to clear it out for our next collection.
|
||||
*/
|
||||
memset(bo_map, 0, brw->shader_time.bo->size);
|
||||
brw_bo_unmap(brw->shader_time.bo);
|
||||
}
|
||||
|
||||
void
|
||||
brw_collect_and_report_shader_time(struct brw_context *brw)
|
||||
{
|
||||
brw_collect_shader_time(brw);
|
||||
|
||||
if (brw->shader_time.report_time == 0 ||
|
||||
get_time() - brw->shader_time.report_time >= 1.0) {
|
||||
brw_report_shader_time(brw);
|
||||
brw->shader_time.report_time = get_time();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Chooses an index in the shader_time buffer and sets up tracking information
|
||||
* for our printouts.
|
||||
*
|
||||
* Note that this holds on to references to the underlying programs, which may
|
||||
* change their lifetimes compared to normal operation.
|
||||
*/
|
||||
int
|
||||
brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
|
||||
enum shader_time_shader_type type, bool is_glsl_sh)
|
||||
{
|
||||
int shader_time_index = brw->shader_time.num_entries++;
|
||||
assert(shader_time_index < brw->shader_time.max_entries);
|
||||
brw->shader_time.types[shader_time_index] = type;
|
||||
|
||||
const char *name;
|
||||
if (prog->Id == 0) {
|
||||
name = "ff";
|
||||
} else if (is_glsl_sh) {
|
||||
name = prog->info.label ?
|
||||
ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
|
||||
} else {
|
||||
name = "prog";
|
||||
}
|
||||
|
||||
brw->shader_time.names[shader_time_index] = name;
|
||||
brw->shader_time.ids[shader_time_index] = prog->Id;
|
||||
|
||||
return shader_time_index;
|
||||
}
|
||||
|
||||
void
|
||||
brw_destroy_shader_time(struct brw_context *brw)
|
||||
{
|
||||
brw_bo_unreference(brw->shader_time.bo);
|
||||
brw->shader_time.bo = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
brw_stage_prog_data_free(const void *p)
|
||||
{
|
||||
struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
|
||||
|
||||
ralloc_free(prog_data->param);
|
||||
ralloc_free(prog_data->pull_param);
|
||||
}
|
||||
|
||||
void
|
||||
brw_dump_arb_asm(const char *stage, struct gl_program *prog)
|
||||
{
|
||||
fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
|
||||
stage, prog->Id, stage);
|
||||
_mesa_print_program(prog);
|
||||
}
|
||||
|
||||
void
|
||||
brw_setup_tex_for_precompile(const struct intel_device_info *devinfo,
|
||||
struct brw_sampler_prog_key_data *tex,
|
||||
const struct gl_program *prog)
|
||||
{
|
||||
const bool has_shader_channel_select = devinfo->verx10 >= 75;
|
||||
unsigned sampler_count = util_last_bit(prog->SamplersUsed);
|
||||
for (unsigned i = 0; i < sampler_count; i++) {
|
||||
if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
|
||||
/* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
|
||||
tex->swizzles[i] =
|
||||
MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
|
||||
} else {
|
||||
/* Color sampler: assume no swizzling. */
|
||||
tex->swizzles[i] = SWIZZLE_XYZW;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets up the starting offsets for the groups of binding table entries
|
||||
* common to all pipeline stages.
|
||||
*
|
||||
* Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
|
||||
* unused but also make sure that addition of small offsets to them will
|
||||
* trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
|
||||
*/
|
||||
uint32_t
|
||||
brw_assign_common_binding_table_offsets(const struct intel_device_info *devinfo,
|
||||
const struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
uint32_t next_binding_table_offset)
|
||||
{
|
||||
int num_textures = util_last_bit(prog->SamplersUsed);
|
||||
|
||||
stage_prog_data->binding_table.texture_start = next_binding_table_offset;
|
||||
next_binding_table_offset += num_textures;
|
||||
|
||||
if (prog->info.num_ubos) {
|
||||
assert(prog->info.num_ubos <= BRW_MAX_UBO);
|
||||
stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
|
||||
next_binding_table_offset += prog->info.num_ubos;
|
||||
} else {
|
||||
stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
|
||||
}
|
||||
|
||||
if (prog->info.num_ssbos || prog->info.num_abos) {
|
||||
assert(prog->info.num_abos <= BRW_MAX_ABO);
|
||||
assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
|
||||
stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
|
||||
next_binding_table_offset += prog->info.num_abos + prog->info.num_ssbos;
|
||||
} else {
|
||||
stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
|
||||
}
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
|
||||
stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
|
||||
next_binding_table_offset++;
|
||||
} else {
|
||||
stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
|
||||
}
|
||||
|
||||
if (prog->info.uses_texture_gather) {
|
||||
if (devinfo->ver >= 8) {
|
||||
stage_prog_data->binding_table.gather_texture_start =
|
||||
stage_prog_data->binding_table.texture_start;
|
||||
} else {
|
||||
stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
|
||||
next_binding_table_offset += num_textures;
|
||||
}
|
||||
} else {
|
||||
stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
|
||||
}
|
||||
|
||||
if (prog->info.num_images) {
|
||||
stage_prog_data->binding_table.image_start = next_binding_table_offset;
|
||||
next_binding_table_offset += prog->info.num_images;
|
||||
} else {
|
||||
stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
|
||||
}
|
||||
|
||||
/* This may or may not be used depending on how the compile goes. */
|
||||
stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
|
||||
next_binding_table_offset++;
|
||||
|
||||
/* Plane 0 is just the regular texture section */
|
||||
stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
|
||||
|
||||
stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
|
||||
next_binding_table_offset += num_textures;
|
||||
|
||||
stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
|
||||
next_binding_table_offset += num_textures;
|
||||
|
||||
/* Set the binding table size. Some callers may append new entries
|
||||
* and increase this accordingly.
|
||||
*/
|
||||
stage_prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
|
||||
|
||||
assert(next_binding_table_offset <= BRW_MAX_SURFACES);
|
||||
return next_binding_table_offset;
|
||||
}
|
||||
|
||||
void
|
||||
brw_populate_default_key(const struct brw_compiler *compiler,
|
||||
union brw_any_prog_key *prog_key,
|
||||
struct gl_shader_program *sh_prog,
|
||||
struct gl_program *prog)
|
||||
{
|
||||
switch (prog->info.stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
brw_vs_populate_default_key(compiler, &prog_key->vs, prog);
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
brw_tcs_populate_default_key(compiler, &prog_key->tcs, sh_prog, prog);
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
brw_tes_populate_default_key(compiler, &prog_key->tes, sh_prog, prog);
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
brw_gs_populate_default_key(compiler, &prog_key->gs, prog);
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
brw_wm_populate_default_key(compiler, &prog_key->wm, prog);
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
brw_cs_populate_default_key(compiler, &prog_key->cs, prog);
|
||||
break;
|
||||
default:
|
||||
unreachable("Unsupported stage!");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_debug_recompile(struct brw_context *brw,
|
||||
gl_shader_stage stage,
|
||||
unsigned api_id,
|
||||
struct brw_base_prog_key *key)
|
||||
{
|
||||
const struct brw_compiler *compiler = brw->screen->compiler;
|
||||
enum brw_cache_id cache_id = brw_stage_cache_id(stage);
|
||||
|
||||
brw_shader_perf_log(compiler, brw, "Recompiling %s shader for program %d\n",
|
||||
_mesa_shader_stage_to_string(stage), api_id);
|
||||
|
||||
const void *old_key =
|
||||
brw_find_previous_compile(&brw->cache, cache_id, key->program_string_id);
|
||||
|
||||
brw_debug_key_recompile(compiler, brw, stage, old_key, key);
|
||||
}
|
||||
|
|
@ -1,145 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2011 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_PROGRAM_H
|
||||
#define BRW_PROGRAM_H
|
||||
|
||||
#include "compiler/brw_compiler.h"
|
||||
#include "nir.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct brw_context;
|
||||
struct blob;
|
||||
struct blob_reader;
|
||||
|
||||
enum brw_param_domain {
|
||||
BRW_PARAM_DOMAIN_BUILTIN = 0,
|
||||
BRW_PARAM_DOMAIN_PARAMETER,
|
||||
BRW_PARAM_DOMAIN_UNIFORM,
|
||||
BRW_PARAM_DOMAIN_IMAGE,
|
||||
};
|
||||
|
||||
#define BRW_PARAM(domain, val) (BRW_PARAM_DOMAIN_##domain << 24 | (val))
|
||||
#define BRW_PARAM_DOMAIN(param) ((uint32_t)(param) >> 24)
|
||||
#define BRW_PARAM_VALUE(param) ((uint32_t)(param) & 0x00ffffff)
|
||||
|
||||
#define BRW_PARAM_PARAMETER(idx, comp) \
|
||||
BRW_PARAM(PARAMETER, ((idx) << 2) | (comp))
|
||||
#define BRW_PARAM_PARAMETER_IDX(param) (BRW_PARAM_VALUE(param) >> 2)
|
||||
#define BRW_PARAM_PARAMETER_COMP(param) (BRW_PARAM_VALUE(param) & 0x3)
|
||||
|
||||
#define BRW_PARAM_UNIFORM(idx) BRW_PARAM(UNIFORM, (idx))
|
||||
#define BRW_PARAM_UNIFORM_IDX(param) BRW_PARAM_VALUE(param)
|
||||
|
||||
#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset))
|
||||
#define BRW_PARAM_IMAGE_IDX(value) (BRW_PARAM_VALUE(value) >> 8)
|
||||
#define BRW_PARAM_IMAGE_OFFSET(value) (BRW_PARAM_VALUE(value) & 0xf)
|
||||
|
||||
struct nir_shader *brw_create_nir(struct brw_context *brw,
|
||||
const struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
gl_shader_stage stage,
|
||||
bool is_scalar);
|
||||
|
||||
void brw_nir_lower_resources(nir_shader *nir,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog,
|
||||
const struct intel_device_info *devinfo);
|
||||
|
||||
void brw_shader_gather_info(nir_shader *nir, struct gl_program *prog);
|
||||
|
||||
void brw_setup_tex_for_precompile(const struct intel_device_info *devinfo,
|
||||
struct brw_sampler_prog_key_data *tex,
|
||||
const struct gl_program *prog);
|
||||
|
||||
void brw_populate_base_prog_key(struct gl_context *ctx,
|
||||
const struct brw_program *prog,
|
||||
struct brw_base_prog_key *key);
|
||||
void brw_populate_default_base_prog_key(const struct intel_device_info *devinfo,
|
||||
const struct brw_program *prog,
|
||||
struct brw_base_prog_key *key);
|
||||
void brw_debug_recompile(struct brw_context *brw, gl_shader_stage stage,
|
||||
unsigned api_id, struct brw_base_prog_key *key);
|
||||
|
||||
uint32_t
|
||||
brw_assign_common_binding_table_offsets(const struct intel_device_info *devinfo,
|
||||
const struct gl_program *prog,
|
||||
struct brw_stage_prog_data *stage_prog_data,
|
||||
uint32_t next_binding_table_offset);
|
||||
|
||||
void
|
||||
brw_populate_default_key(const struct brw_compiler *compiler,
|
||||
union brw_any_prog_key *prog_key,
|
||||
struct gl_shader_program *sh_prog,
|
||||
struct gl_program *prog);
|
||||
|
||||
void
|
||||
brw_stage_prog_data_free(const void *prog_data);
|
||||
|
||||
void
|
||||
brw_dump_arb_asm(const char *stage, struct gl_program *prog);
|
||||
|
||||
bool brw_vs_precompile(struct gl_context *ctx, struct gl_program *prog);
|
||||
bool brw_tcs_precompile(struct gl_context *ctx,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog);
|
||||
bool brw_tes_precompile(struct gl_context *ctx,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog);
|
||||
bool brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog);
|
||||
bool brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog);
|
||||
bool brw_cs_precompile(struct gl_context *ctx, struct gl_program *prog);
|
||||
|
||||
GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
|
||||
|
||||
void brw_upload_tcs_prog(struct brw_context *brw);
|
||||
void brw_tcs_populate_key(struct brw_context *brw,
|
||||
struct brw_tcs_prog_key *key);
|
||||
void brw_tcs_populate_default_key(const struct brw_compiler *compiler,
|
||||
struct brw_tcs_prog_key *key,
|
||||
struct gl_shader_program *sh_prog,
|
||||
struct gl_program *prog);
|
||||
void brw_upload_tes_prog(struct brw_context *brw);
|
||||
void brw_tes_populate_key(struct brw_context *brw,
|
||||
struct brw_tes_prog_key *key);
|
||||
void brw_tes_populate_default_key(const struct brw_compiler *compiler,
|
||||
struct brw_tes_prog_key *key,
|
||||
struct gl_shader_program *sh_prog,
|
||||
struct gl_program *prog);
|
||||
|
||||
void brw_write_blob_program_data(struct blob *binary, gl_shader_stage stage,
|
||||
const void *program,
|
||||
struct brw_stage_prog_data *prog_data);
|
||||
bool brw_read_blob_program_data(struct blob_reader *binary,
|
||||
struct gl_program *prog, gl_shader_stage stage,
|
||||
const uint8_t **program,
|
||||
struct brw_stage_prog_data *prog_data);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -1,353 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "compiler/nir/nir_serialize.h"
|
||||
#include "util/build_id.h"
|
||||
#include "util/mesa-sha1.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_program.h"
|
||||
#include "brw_state.h"
|
||||
|
||||
static uint8_t driver_sha1[20];
|
||||
|
||||
void
|
||||
brw_program_binary_init(unsigned device_id)
|
||||
{
|
||||
const struct build_id_note *note =
|
||||
build_id_find_nhdr_for_addr(brw_program_binary_init);
|
||||
assert(note);
|
||||
|
||||
/**
|
||||
* With Mesa's megadrivers, taking the sha1 of i965_dri.so may not be
|
||||
* unique. Therefore, we make a sha1 of the "i965" string and the sha1
|
||||
* build id from i965_dri.so.
|
||||
*/
|
||||
struct mesa_sha1 ctx;
|
||||
_mesa_sha1_init(&ctx);
|
||||
char renderer[10];
|
||||
assert(device_id < 0x10000);
|
||||
int len = snprintf(renderer, sizeof(renderer), "i965_%04x", device_id);
|
||||
assert(len == sizeof(renderer) - 1);
|
||||
_mesa_sha1_update(&ctx, renderer, len);
|
||||
_mesa_sha1_update(&ctx, build_id_data(note), build_id_length(note));
|
||||
_mesa_sha1_final(&ctx, driver_sha1);
|
||||
}
|
||||
|
||||
void
|
||||
brw_get_program_binary_driver_sha1(struct gl_context *ctx, uint8_t *sha1)
|
||||
{
|
||||
memcpy(sha1, driver_sha1, sizeof(uint8_t) * 20);
|
||||
}
|
||||
|
||||
enum driver_cache_blob_part {
|
||||
END_PART,
|
||||
INTEL_PART,
|
||||
NIR_PART,
|
||||
};
|
||||
|
||||
static bool
|
||||
blob_parts_valid(void *blob, uint32_t size)
|
||||
{
|
||||
struct blob_reader reader;
|
||||
blob_reader_init(&reader, blob, size);
|
||||
|
||||
do {
|
||||
uint32_t part_type = blob_read_uint32(&reader);
|
||||
if (reader.overrun)
|
||||
return false;
|
||||
if (part_type == END_PART)
|
||||
return reader.current == reader.end;
|
||||
switch ((enum driver_cache_blob_part)part_type) {
|
||||
case INTEL_PART:
|
||||
case NIR_PART:
|
||||
/* Read the uint32_t part-size and skip over it */
|
||||
blob_skip_bytes(&reader, blob_read_uint32(&reader));
|
||||
if (reader.overrun)
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
} while (true);
|
||||
}
|
||||
|
||||
static bool
|
||||
blob_has_part(void *blob, uint32_t size, enum driver_cache_blob_part part)
|
||||
{
|
||||
struct blob_reader reader;
|
||||
blob_reader_init(&reader, blob, size);
|
||||
|
||||
assert(blob_parts_valid(blob, size));
|
||||
do {
|
||||
uint32_t part_type = blob_read_uint32(&reader);
|
||||
if (part_type == END_PART)
|
||||
return false;
|
||||
if (part_type == part)
|
||||
return true;
|
||||
blob_skip_bytes(&reader, blob_read_uint32(&reader));
|
||||
} while (true);
|
||||
}
|
||||
|
||||
static bool
|
||||
driver_blob_is_ready(void *blob, uint32_t size, bool with_intel_program)
|
||||
{
|
||||
if (!blob) {
|
||||
return false;
|
||||
} else if (!blob_parts_valid(blob, size)) {
|
||||
unreachable("Driver blob format is bad!");
|
||||
return false;
|
||||
} else if (blob_has_part(blob, size, INTEL_PART) == with_intel_program) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
serialize_nir_part(struct blob *writer, struct gl_program *prog)
|
||||
{
|
||||
blob_write_uint32(writer, NIR_PART);
|
||||
intptr_t size_offset = blob_reserve_uint32(writer);
|
||||
size_t nir_start = writer->size;
|
||||
nir_serialize(writer, prog->nir, false);
|
||||
blob_overwrite_uint32(writer, size_offset, writer->size - nir_start);
|
||||
}
|
||||
|
||||
void
|
||||
brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog)
|
||||
{
|
||||
if (driver_blob_is_ready(prog->driver_cache_blob,
|
||||
prog->driver_cache_blob_size, false))
|
||||
return;
|
||||
|
||||
if (prog->driver_cache_blob)
|
||||
ralloc_free(prog->driver_cache_blob);
|
||||
|
||||
struct blob writer;
|
||||
blob_init(&writer);
|
||||
serialize_nir_part(&writer, prog);
|
||||
blob_write_uint32(&writer, END_PART);
|
||||
prog->driver_cache_blob = ralloc_size(NULL, writer.size);
|
||||
memcpy(prog->driver_cache_blob, writer.data, writer.size);
|
||||
prog->driver_cache_blob_size = writer.size;
|
||||
blob_finish(&writer);
|
||||
}
|
||||
|
||||
static bool
|
||||
deserialize_intel_program(struct blob_reader *reader, struct gl_context *ctx,
|
||||
struct gl_program *prog, gl_shader_stage stage)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
|
||||
union brw_any_prog_key prog_key;
|
||||
blob_copy_bytes(reader, &prog_key, brw_prog_key_size(stage));
|
||||
prog_key.base.program_string_id = brw_program(prog)->id;
|
||||
|
||||
enum brw_cache_id cache_id = brw_stage_cache_id(stage);
|
||||
|
||||
const uint8_t *program;
|
||||
struct brw_stage_prog_data *prog_data =
|
||||
ralloc_size(NULL, sizeof(union brw_any_prog_data));
|
||||
|
||||
if (!brw_read_blob_program_data(reader, prog, stage, &program, prog_data)) {
|
||||
ralloc_free(prog_data);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t offset;
|
||||
void *out_prog_data;
|
||||
brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage),
|
||||
program, prog_data->program_size, prog_data,
|
||||
brw_prog_data_size(stage), &offset, &out_prog_data);
|
||||
|
||||
ralloc_free(prog_data);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
brw_program_deserialize_driver_blob(struct gl_context *ctx,
|
||||
struct gl_program *prog,
|
||||
gl_shader_stage stage)
|
||||
{
|
||||
if (!prog->driver_cache_blob)
|
||||
return;
|
||||
|
||||
struct blob_reader reader;
|
||||
blob_reader_init(&reader, prog->driver_cache_blob,
|
||||
prog->driver_cache_blob_size);
|
||||
|
||||
do {
|
||||
uint32_t part_type = blob_read_uint32(&reader);
|
||||
if ((enum driver_cache_blob_part)part_type == END_PART)
|
||||
break;
|
||||
switch ((enum driver_cache_blob_part)part_type) {
|
||||
case INTEL_PART: {
|
||||
ASSERTED uint32_t gen_size = blob_read_uint32(&reader);
|
||||
assert(!reader.overrun &&
|
||||
(uintptr_t)(reader.end - reader.current) > gen_size);
|
||||
deserialize_intel_program(&reader, ctx, prog, stage);
|
||||
break;
|
||||
}
|
||||
case NIR_PART: {
|
||||
ASSERTED uint32_t nir_size = blob_read_uint32(&reader);
|
||||
assert(!reader.overrun &&
|
||||
(uintptr_t)(reader.end - reader.current) > nir_size);
|
||||
const struct nir_shader_compiler_options *options =
|
||||
ctx->Const.ShaderCompilerOptions[stage].NirOptions;
|
||||
prog->nir = nir_deserialize(NULL, options, &reader);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("Unsupported blob part type!");
|
||||
break;
|
||||
}
|
||||
} while (true);
|
||||
|
||||
ralloc_free(prog->driver_cache_blob);
|
||||
prog->driver_cache_blob = NULL;
|
||||
prog->driver_cache_blob_size = 0;
|
||||
}
|
||||
|
||||
/* This is just a wrapper around brw_program_deserialize_nir() as i965
|
||||
* doesn't need gl_shader_program like other drivers do.
|
||||
*/
|
||||
void
|
||||
brw_deserialize_program_binary(struct gl_context *ctx,
|
||||
struct gl_shader_program *shProg,
|
||||
struct gl_program *prog)
|
||||
{
|
||||
brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage);
|
||||
}
|
||||
|
||||
static void
|
||||
serialize_intel_part(struct blob *writer, struct gl_context *ctx,
|
||||
struct gl_shader_program *sh_prog,
|
||||
struct gl_program *prog)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
|
||||
union brw_any_prog_key key;
|
||||
brw_populate_default_key(brw->screen->compiler, &key, sh_prog, prog);
|
||||
|
||||
const gl_shader_stage stage = prog->info.stage;
|
||||
uint32_t offset = 0;
|
||||
void *prog_data = NULL;
|
||||
if (brw_search_cache(&brw->cache, brw_stage_cache_id(stage), &key,
|
||||
brw_prog_key_size(stage), &offset, &prog_data,
|
||||
false)) {
|
||||
const void *program_map = brw->cache.map + offset;
|
||||
/* TODO: Improve perf for non-LLC. It would be best to save it at
|
||||
* program generation time when the program is in normal memory
|
||||
* accessible with cache to the CPU. Another easier change would be to
|
||||
* use _mesa_streaming_load_memcpy to read from the program mapped
|
||||
* memory.
|
||||
*/
|
||||
blob_write_uint32(writer, INTEL_PART);
|
||||
intptr_t size_offset = blob_reserve_uint32(writer);
|
||||
size_t gen_start = writer->size;
|
||||
blob_write_bytes(writer, &key, brw_prog_key_size(stage));
|
||||
brw_write_blob_program_data(writer, stage, program_map, prog_data);
|
||||
blob_overwrite_uint32(writer, size_offset, writer->size - gen_start);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_serialize_program_binary(struct gl_context *ctx,
|
||||
struct gl_shader_program *sh_prog,
|
||||
struct gl_program *prog)
|
||||
{
|
||||
if (driver_blob_is_ready(prog->driver_cache_blob,
|
||||
prog->driver_cache_blob_size, true))
|
||||
return;
|
||||
|
||||
if (prog->driver_cache_blob) {
|
||||
if (!prog->nir) {
|
||||
/* If we loaded from the disk shader cache, then the nir might not
|
||||
* have been deserialized yet.
|
||||
*/
|
||||
brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage);
|
||||
}
|
||||
ralloc_free(prog->driver_cache_blob);
|
||||
}
|
||||
|
||||
struct blob writer;
|
||||
blob_init(&writer);
|
||||
serialize_nir_part(&writer, prog);
|
||||
serialize_intel_part(&writer, ctx, sh_prog, prog);
|
||||
blob_write_uint32(&writer, END_PART);
|
||||
prog->driver_cache_blob = ralloc_size(NULL, writer.size);
|
||||
memcpy(prog->driver_cache_blob, writer.data, writer.size);
|
||||
prog->driver_cache_blob_size = writer.size;
|
||||
blob_finish(&writer);
|
||||
}
|
||||
|
||||
void
|
||||
brw_write_blob_program_data(struct blob *binary, gl_shader_stage stage,
|
||||
const void *program,
|
||||
struct brw_stage_prog_data *prog_data)
|
||||
{
|
||||
/* Write prog_data to blob. */
|
||||
blob_write_bytes(binary, prog_data, brw_prog_data_size(stage));
|
||||
|
||||
/* Write program to blob. */
|
||||
blob_write_bytes(binary, program, prog_data->program_size);
|
||||
|
||||
/* Write push params */
|
||||
blob_write_bytes(binary, prog_data->param,
|
||||
sizeof(uint32_t) * prog_data->nr_params);
|
||||
|
||||
/* Write pull params */
|
||||
blob_write_bytes(binary, prog_data->pull_param,
|
||||
sizeof(uint32_t) * prog_data->nr_pull_params);
|
||||
}
|
||||
|
||||
bool
|
||||
brw_read_blob_program_data(struct blob_reader *binary, struct gl_program *prog,
|
||||
gl_shader_stage stage, const uint8_t **program,
|
||||
struct brw_stage_prog_data *prog_data)
|
||||
{
|
||||
/* Read shader prog_data from blob. */
|
||||
blob_copy_bytes(binary, prog_data, brw_prog_data_size(stage));
|
||||
if (binary->overrun)
|
||||
return false;
|
||||
|
||||
/* Read shader program from blob. */
|
||||
*program = blob_read_bytes(binary, prog_data->program_size);
|
||||
|
||||
/* Read push params */
|
||||
prog_data->param = rzalloc_array(NULL, uint32_t, prog_data->nr_params);
|
||||
blob_copy_bytes(binary, prog_data->param,
|
||||
sizeof(uint32_t) * prog_data->nr_params);
|
||||
|
||||
/* Read pull params */
|
||||
prog_data->pull_param = rzalloc_array(NULL, uint32_t,
|
||||
prog_data->nr_pull_params);
|
||||
blob_copy_bytes(binary, prog_data->pull_param,
|
||||
sizeof(uint32_t) * prog_data->nr_pull_params);
|
||||
|
||||
return !binary->overrun;
|
||||
}
|
||||
|
|
@ -1,523 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
/** @file brw_program_cache.c
|
||||
*
|
||||
* This file implements a simple program cache for 965. The consumers can
|
||||
* query the hash table of programs using a cache_id and program key, and
|
||||
* receive the corresponding program buffer object (plus associated auxiliary
|
||||
* data) in return. Objects in the cache may not have relocations
|
||||
* (pointers to other BOs) in them.
|
||||
*
|
||||
* The inner workings are a simple hash table based on a FNV-1a of the
|
||||
* key data.
|
||||
*
|
||||
* Replacement is not implemented. Instead, when the cache gets too
|
||||
* big we throw out all of the cache data and let it get regenerated.
|
||||
*/
|
||||
|
||||
#include "main/streaming-load-memcpy.h"
|
||||
#include "x86/common_x86_asm.h"
|
||||
#include "brw_batch.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_wm.h"
|
||||
#include "brw_gs.h"
|
||||
#include "brw_cs.h"
|
||||
#include "brw_program.h"
|
||||
#include "compiler/brw_eu.h"
|
||||
#include "util/u_memory.h"
|
||||
#define XXH_INLINE_ALL
|
||||
#include "util/xxhash.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_STATE
|
||||
|
||||
struct brw_cache_item {
|
||||
/**
|
||||
* Effectively part of the key, cache_id identifies what kind of state
|
||||
* buffer is involved, and also which dirty flag should set.
|
||||
*/
|
||||
enum brw_cache_id cache_id;
|
||||
|
||||
/** 32-bit hash of the key data */
|
||||
GLuint hash;
|
||||
|
||||
/** for variable-sized keys */
|
||||
GLuint key_size;
|
||||
GLuint prog_data_size;
|
||||
const struct brw_base_prog_key *key;
|
||||
|
||||
uint32_t offset;
|
||||
uint32_t size;
|
||||
|
||||
struct brw_cache_item *next;
|
||||
};
|
||||
|
||||
enum brw_cache_id
|
||||
brw_stage_cache_id(gl_shader_stage stage)
|
||||
{
|
||||
static const enum brw_cache_id stage_ids[] = {
|
||||
BRW_CACHE_VS_PROG,
|
||||
BRW_CACHE_TCS_PROG,
|
||||
BRW_CACHE_TES_PROG,
|
||||
BRW_CACHE_GS_PROG,
|
||||
BRW_CACHE_FS_PROG,
|
||||
BRW_CACHE_CS_PROG,
|
||||
};
|
||||
assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_ids));
|
||||
return stage_ids[stage];
|
||||
}
|
||||
|
||||
static GLuint
|
||||
hash_key(struct brw_cache_item *item)
|
||||
{
|
||||
uint32_t hash = 0;
|
||||
hash = XXH32(&item->cache_id, sizeof(item->cache_id), hash);
|
||||
hash = XXH32(item->key, item->key_size, hash);
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
static int
|
||||
brw_cache_item_equals(const struct brw_cache_item *a,
|
||||
const struct brw_cache_item *b)
|
||||
{
|
||||
return a->cache_id == b->cache_id &&
|
||||
a->hash == b->hash &&
|
||||
a->key_size == b->key_size &&
|
||||
(memcmp(a->key, b->key, a->key_size) == 0);
|
||||
}
|
||||
|
||||
static struct brw_cache_item *
|
||||
search_cache(struct brw_cache *cache, GLuint hash,
|
||||
struct brw_cache_item *lookup)
|
||||
{
|
||||
struct brw_cache_item *c;
|
||||
|
||||
#if 0
|
||||
int bucketcount = 0;
|
||||
|
||||
for (c = cache->items[hash % cache->size]; c; c = c->next)
|
||||
bucketcount++;
|
||||
|
||||
fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
|
||||
cache->size, bucketcount, cache->n_items);
|
||||
#endif
|
||||
|
||||
for (c = cache->items[hash % cache->size]; c; c = c->next) {
|
||||
if (brw_cache_item_equals(lookup, c))
|
||||
return c;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
rehash(struct brw_cache *cache)
|
||||
{
|
||||
struct brw_cache_item **items;
|
||||
struct brw_cache_item *c, *next;
|
||||
GLuint size, i;
|
||||
|
||||
size = cache->size * 3;
|
||||
items = calloc(size, sizeof(*items));
|
||||
|
||||
for (i = 0; i < cache->size; i++)
|
||||
for (c = cache->items[i]; c; c = next) {
|
||||
next = c->next;
|
||||
c->next = items[c->hash % size];
|
||||
items[c->hash % size] = c;
|
||||
}
|
||||
|
||||
free(cache->items);
|
||||
cache->items = items;
|
||||
cache->size = size;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the buffer object matching cache_id and key, or NULL.
|
||||
*/
|
||||
bool
|
||||
brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
|
||||
const void *key, GLuint key_size, uint32_t *inout_offset,
|
||||
void *inout_prog_data, bool flag_state)
|
||||
{
|
||||
struct brw_cache_item *item;
|
||||
struct brw_cache_item lookup;
|
||||
GLuint hash;
|
||||
|
||||
lookup.cache_id = cache_id;
|
||||
lookup.key = key;
|
||||
lookup.key_size = key_size;
|
||||
hash = hash_key(&lookup);
|
||||
lookup.hash = hash;
|
||||
|
||||
item = search_cache(cache, hash, &lookup);
|
||||
|
||||
if (item == NULL)
|
||||
return false;
|
||||
|
||||
void *prog_data = ((char *) item->key) + item->key_size;
|
||||
|
||||
if (item->offset != *inout_offset ||
|
||||
prog_data != *((void **) inout_prog_data)) {
|
||||
if (likely(flag_state))
|
||||
cache->brw->ctx.NewDriverState |= (1 << cache_id);
|
||||
*inout_offset = item->offset;
|
||||
*((void **) inout_prog_data) = prog_data;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
|
||||
{
|
||||
struct brw_context *brw = cache->brw;
|
||||
struct brw_bo *new_bo;
|
||||
|
||||
perf_debug("Copying to larger program cache: %u kB -> %u kB\n",
|
||||
(unsigned) cache->bo->size / 1024, new_size / 1024);
|
||||
|
||||
new_bo = brw_bo_alloc(brw->bufmgr, "program cache", new_size,
|
||||
BRW_MEMZONE_SHADER);
|
||||
if (can_do_exec_capture(brw->screen))
|
||||
new_bo->kflags |= EXEC_OBJECT_CAPTURE;
|
||||
|
||||
void *map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE |
|
||||
MAP_ASYNC | MAP_PERSISTENT);
|
||||
|
||||
/* Copy any existing data that needs to be saved. */
|
||||
if (cache->next_offset != 0) {
|
||||
#ifdef USE_SSE41
|
||||
if (!cache->bo->cache_coherent && cpu_has_sse4_1)
|
||||
_mesa_streaming_load_memcpy(map, cache->map, cache->next_offset);
|
||||
else
|
||||
#endif
|
||||
memcpy(map, cache->map, cache->next_offset);
|
||||
}
|
||||
|
||||
brw_bo_unmap(cache->bo);
|
||||
brw_bo_unreference(cache->bo);
|
||||
cache->bo = new_bo;
|
||||
cache->map = map;
|
||||
|
||||
/* Since we have a new BO in place, we need to signal the units
|
||||
* that depend on it (state base address on gfx5+, or unit state before).
|
||||
*/
|
||||
brw->ctx.NewDriverState |= BRW_NEW_PROGRAM_CACHE;
|
||||
brw->batch.state_base_address_emitted = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to find an item in the cache with identical data.
|
||||
*/
|
||||
static const struct brw_cache_item *
|
||||
brw_lookup_prog(const struct brw_cache *cache,
|
||||
enum brw_cache_id cache_id,
|
||||
const void *data, unsigned data_size)
|
||||
{
|
||||
unsigned i;
|
||||
const struct brw_cache_item *item;
|
||||
|
||||
for (i = 0; i < cache->size; i++) {
|
||||
for (item = cache->items[i]; item; item = item->next) {
|
||||
if (item->cache_id != cache_id || item->size != data_size ||
|
||||
memcmp(cache->map + item->offset, data, item->size) != 0)
|
||||
continue;
|
||||
|
||||
return item;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
|
||||
{
|
||||
uint32_t offset;
|
||||
|
||||
/* Allocate space in the cache BO for our new program. */
|
||||
if (cache->next_offset + size > cache->bo->size) {
|
||||
uint32_t new_size = cache->bo->size * 2;
|
||||
|
||||
while (cache->next_offset + size > new_size)
|
||||
new_size *= 2;
|
||||
|
||||
brw_cache_new_bo(cache, new_size);
|
||||
}
|
||||
|
||||
offset = cache->next_offset;
|
||||
|
||||
/* Programs are always 64-byte aligned, so set up the next one now */
|
||||
cache->next_offset = ALIGN(offset + size, 64);
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
const void *
|
||||
brw_find_previous_compile(struct brw_cache *cache,
|
||||
enum brw_cache_id cache_id,
|
||||
unsigned program_string_id)
|
||||
{
|
||||
for (unsigned i = 0; i < cache->size; i++) {
|
||||
for (struct brw_cache_item *c = cache->items[i]; c; c = c->next) {
|
||||
if (c->cache_id == cache_id &&
|
||||
c->key->program_string_id == program_string_id) {
|
||||
return c->key;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
brw_upload_cache(struct brw_cache *cache,
|
||||
enum brw_cache_id cache_id,
|
||||
const void *key,
|
||||
GLuint key_size,
|
||||
const void *data,
|
||||
GLuint data_size,
|
||||
const void *prog_data,
|
||||
GLuint prog_data_size,
|
||||
uint32_t *out_offset,
|
||||
void *out_prog_data)
|
||||
{
|
||||
struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
|
||||
const struct brw_cache_item *matching_data =
|
||||
brw_lookup_prog(cache, cache_id, data, data_size);
|
||||
GLuint hash;
|
||||
void *tmp;
|
||||
|
||||
item->cache_id = cache_id;
|
||||
item->size = data_size;
|
||||
item->key = key;
|
||||
item->key_size = key_size;
|
||||
item->prog_data_size = prog_data_size;
|
||||
hash = hash_key(item);
|
||||
item->hash = hash;
|
||||
|
||||
/* If we can find a matching prog in the cache already, then reuse the
|
||||
* existing stuff without creating new copy into the underlying buffer
|
||||
* object. This is notably useful for programs generating shaders at
|
||||
* runtime, where multiple shaders may compile to the same thing in our
|
||||
* backend.
|
||||
*/
|
||||
if (matching_data) {
|
||||
item->offset = matching_data->offset;
|
||||
} else {
|
||||
item->offset = brw_alloc_item_data(cache, data_size);
|
||||
|
||||
/* Copy data to the buffer */
|
||||
memcpy(cache->map + item->offset, data, data_size);
|
||||
}
|
||||
|
||||
/* Set up the memory containing the key and prog_data */
|
||||
tmp = malloc(key_size + prog_data_size);
|
||||
|
||||
memcpy(tmp, key, key_size);
|
||||
memcpy(tmp + key_size, prog_data, prog_data_size);
|
||||
|
||||
item->key = tmp;
|
||||
|
||||
if (cache->n_items > cache->size * 1.5f)
|
||||
rehash(cache);
|
||||
|
||||
hash %= cache->size;
|
||||
item->next = cache->items[hash];
|
||||
cache->items[hash] = item;
|
||||
cache->n_items++;
|
||||
|
||||
*out_offset = item->offset;
|
||||
*(void **)out_prog_data = (void *)((char *)item->key + item->key_size);
|
||||
cache->brw->ctx.NewDriverState |= 1 << cache_id;
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_caches(struct brw_context *brw)
|
||||
{
|
||||
struct brw_cache *cache = &brw->cache;
|
||||
|
||||
cache->brw = brw;
|
||||
|
||||
cache->size = 7;
|
||||
cache->n_items = 0;
|
||||
cache->items =
|
||||
calloc(cache->size, sizeof(struct brw_cache_item *));
|
||||
|
||||
cache->bo = brw_bo_alloc(brw->bufmgr, "program cache", 16384,
|
||||
BRW_MEMZONE_SHADER);
|
||||
if (can_do_exec_capture(brw->screen))
|
||||
cache->bo->kflags |= EXEC_OBJECT_CAPTURE;
|
||||
|
||||
cache->map = brw_bo_map(brw, cache->bo, MAP_READ | MAP_WRITE |
|
||||
MAP_ASYNC | MAP_PERSISTENT);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
|
||||
{
|
||||
struct brw_cache_item *c, *next;
|
||||
GLuint i;
|
||||
|
||||
DBG("%s\n", __func__);
|
||||
|
||||
for (i = 0; i < cache->size; i++) {
|
||||
for (c = cache->items[i]; c; c = next) {
|
||||
next = c->next;
|
||||
if (c->cache_id == BRW_CACHE_VS_PROG ||
|
||||
c->cache_id == BRW_CACHE_TCS_PROG ||
|
||||
c->cache_id == BRW_CACHE_TES_PROG ||
|
||||
c->cache_id == BRW_CACHE_GS_PROG ||
|
||||
c->cache_id == BRW_CACHE_FS_PROG ||
|
||||
c->cache_id == BRW_CACHE_CS_PROG) {
|
||||
const void *item_prog_data = ((char *)c->key) + c->key_size;
|
||||
brw_stage_prog_data_free(item_prog_data);
|
||||
}
|
||||
free((void *)c->key);
|
||||
free(c);
|
||||
}
|
||||
cache->items[i] = NULL;
|
||||
}
|
||||
|
||||
cache->n_items = 0;
|
||||
|
||||
/* Start putting programs into the start of the BO again, since
|
||||
* we'll never find the old results.
|
||||
*/
|
||||
cache->next_offset = 0;
|
||||
|
||||
/* We need to make sure that the programs get regenerated, since
|
||||
* any offsets leftover in brw_context will no longer be valid.
|
||||
*/
|
||||
brw->NewGLState = ~0;
|
||||
brw->ctx.NewDriverState = ~0ull;
|
||||
brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0;
|
||||
brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull;
|
||||
brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0;
|
||||
brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull;
|
||||
|
||||
/* Also, NULL out any stale program pointers. */
|
||||
brw->vs.base.prog_data = NULL;
|
||||
brw->tcs.base.prog_data = NULL;
|
||||
brw->tes.base.prog_data = NULL;
|
||||
brw->gs.base.prog_data = NULL;
|
||||
brw->wm.base.prog_data = NULL;
|
||||
brw->cs.base.prog_data = NULL;
|
||||
|
||||
brw_batch_flush(brw);
|
||||
}
|
||||
|
||||
void
|
||||
brw_program_cache_check_size(struct brw_context *brw)
|
||||
{
|
||||
/* un-tuned guess. Each object is generally a page, so 2000 of them is 8 MB of
|
||||
* state cache.
|
||||
*/
|
||||
if (brw->cache.n_items > 2000) {
|
||||
perf_debug("Exceeded state cache size limit. Clearing the set "
|
||||
"of compiled programs, which will trigger recompiles\n");
|
||||
brw_clear_cache(brw, &brw->cache);
|
||||
brw_cache_new_bo(&brw->cache, brw->cache.bo->size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
|
||||
{
|
||||
|
||||
DBG("%s\n", __func__);
|
||||
|
||||
/* This can be NULL if context creation failed early on */
|
||||
if (cache->bo) {
|
||||
brw_bo_unmap(cache->bo);
|
||||
brw_bo_unreference(cache->bo);
|
||||
cache->bo = NULL;
|
||||
cache->map = NULL;
|
||||
}
|
||||
brw_clear_cache(brw, cache);
|
||||
free(cache->items);
|
||||
cache->items = NULL;
|
||||
cache->size = 0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
brw_destroy_caches(struct brw_context *brw)
|
||||
{
|
||||
brw_destroy_cache(brw, &brw->cache);
|
||||
}
|
||||
|
||||
static const char *
|
||||
cache_name(enum brw_cache_id cache_id)
|
||||
{
|
||||
switch (cache_id) {
|
||||
case BRW_CACHE_VS_PROG:
|
||||
return "VS kernel";
|
||||
case BRW_CACHE_TCS_PROG:
|
||||
return "TCS kernel";
|
||||
case BRW_CACHE_TES_PROG:
|
||||
return "TES kernel";
|
||||
case BRW_CACHE_FF_GS_PROG:
|
||||
return "Fixed-function GS kernel";
|
||||
case BRW_CACHE_GS_PROG:
|
||||
return "GS kernel";
|
||||
case BRW_CACHE_CLIP_PROG:
|
||||
return "CLIP kernel";
|
||||
case BRW_CACHE_SF_PROG:
|
||||
return "SF kernel";
|
||||
case BRW_CACHE_FS_PROG:
|
||||
return "FS kernel";
|
||||
case BRW_CACHE_CS_PROG:
|
||||
return "CS kernel";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_print_program_cache(struct brw_context *brw)
|
||||
{
|
||||
const struct brw_cache *cache = &brw->cache;
|
||||
struct brw_cache_item *item;
|
||||
|
||||
for (unsigned i = 0; i < cache->size; i++) {
|
||||
for (item = cache->items[i]; item; item = item->next) {
|
||||
fprintf(stderr, "%s:\n", cache_name(i));
|
||||
brw_disassemble_with_labels(&brw->screen->devinfo, cache->map,
|
||||
item->offset, item->size, stderr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,621 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2008 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Eric Anholt <eric@anholt.net>
|
||||
*
|
||||
*/
|
||||
|
||||
/** @file brw_queryobj.c
|
||||
*
|
||||
* Support for query objects (GL_ARB_occlusion_query, GL_ARB_timer_query,
|
||||
* GL_EXT_transform_feedback, and friends).
|
||||
*
|
||||
* The hardware provides a PIPE_CONTROL command that can report the number of
|
||||
* fragments that passed the depth test, or the hardware timer. They are
|
||||
* appropriately synced with the stage of the pipeline for our extensions'
|
||||
* needs.
|
||||
*/
|
||||
#include "main/queryobj.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_batch.h"
|
||||
|
||||
/* As best we know currently, the Gen HW timestamps are 36bits across
|
||||
* all platforms, which we need to account for when calculating a
|
||||
* delta to measure elapsed time.
|
||||
*
|
||||
* The timestamps read via glGetTimestamp() / brw_get_timestamp() sometimes
|
||||
* only have 32bits due to a kernel bug and so in that case we make sure to
|
||||
* treat all raw timestamps as 32bits so they overflow consistently and remain
|
||||
* comparable. (Note: the timestamps being passed here are not from the kernel
|
||||
* so we don't need to be taking the upper 32bits in this buggy kernel case we
|
||||
* are just clipping to 32bits here for consistency.)
|
||||
*/
|
||||
uint64_t
|
||||
brw_raw_timestamp_delta(struct brw_context *brw, uint64_t time0, uint64_t time1)
|
||||
{
|
||||
if (brw->screen->hw_has_timestamp == 2) {
|
||||
/* Kernel clips timestamps to 32bits in this case, so we also clip
|
||||
* PIPE_CONTROL timestamps for consistency.
|
||||
*/
|
||||
return (uint32_t)time1 - (uint32_t)time0;
|
||||
} else {
|
||||
if (time0 > time1) {
|
||||
return (1ULL << 36) + time1 - time0;
|
||||
} else {
|
||||
return time1 - time0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer.
|
||||
*/
|
||||
void
|
||||
brw_write_timestamp(struct brw_context *brw, struct brw_bo *query_bo, int idx)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
if (devinfo->ver == 6) {
|
||||
/* Emit Sandybridge workaround flush: */
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
||||
}
|
||||
|
||||
uint32_t flags = PIPE_CONTROL_WRITE_TIMESTAMP;
|
||||
|
||||
if (devinfo->ver == 9 && devinfo->gt == 4)
|
||||
flags |= PIPE_CONTROL_CS_STALL;
|
||||
|
||||
brw_emit_pipe_control_write(brw, flags,
|
||||
query_bo, idx * sizeof(uint64_t), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit PIPE_CONTROLs to write the PS_DEPTH_COUNT register into a buffer.
|
||||
*/
|
||||
void
|
||||
brw_write_depth_count(struct brw_context *brw, struct brw_bo *query_bo, int idx)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
uint32_t flags = PIPE_CONTROL_WRITE_DEPTH_COUNT | PIPE_CONTROL_DEPTH_STALL;
|
||||
|
||||
if (devinfo->ver == 9 && devinfo->gt == 4)
|
||||
flags |= PIPE_CONTROL_CS_STALL;
|
||||
|
||||
if (devinfo->ver >= 10) {
|
||||
/* "Driver must program PIPE_CONTROL with only Depth Stall Enable bit set
|
||||
* prior to programming a PIPE_CONTROL with Write PS Depth Count Post sync
|
||||
* operation."
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
|
||||
}
|
||||
|
||||
brw_emit_pipe_control_write(brw, flags,
|
||||
query_bo, idx * sizeof(uint64_t), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait on the query object's BO and calculate the final result.
|
||||
*/
|
||||
static void
|
||||
brw_queryobj_get_results(struct gl_context *ctx,
|
||||
struct brw_query_object *query)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
int i;
|
||||
uint64_t *results;
|
||||
|
||||
assert(devinfo->ver < 6);
|
||||
|
||||
if (query->bo == NULL)
|
||||
return;
|
||||
|
||||
/* If the application has requested the query result, but this batch is
|
||||
* still contributing to it, flush it now so the results will be present
|
||||
* when mapped.
|
||||
*/
|
||||
if (brw_batch_references(&brw->batch, query->bo))
|
||||
brw_batch_flush(brw);
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
if (brw_bo_busy(query->bo)) {
|
||||
perf_debug("Stalling on the GPU waiting for a query object.\n");
|
||||
}
|
||||
}
|
||||
|
||||
results = brw_bo_map(brw, query->bo, MAP_READ);
|
||||
switch (query->Base.Target) {
|
||||
case GL_TIME_ELAPSED_EXT:
|
||||
/* The query BO contains the starting and ending timestamps.
|
||||
* Subtract the two and convert to nanoseconds.
|
||||
*/
|
||||
query->Base.Result = brw_raw_timestamp_delta(brw, results[0], results[1]);
|
||||
query->Base.Result = intel_device_info_timebase_scale(devinfo, query->Base.Result);
|
||||
break;
|
||||
|
||||
case GL_TIMESTAMP:
|
||||
/* The query BO contains a single timestamp value in results[0]. */
|
||||
query->Base.Result = intel_device_info_timebase_scale(devinfo, results[0]);
|
||||
|
||||
/* Ensure the scaled timestamp overflows according to
|
||||
* GL_QUERY_COUNTER_BITS
|
||||
*/
|
||||
query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1;
|
||||
break;
|
||||
|
||||
case GL_SAMPLES_PASSED_ARB:
|
||||
/* Loop over pairs of values from the BO, which are the PS_DEPTH_COUNT
|
||||
* value at the start and end of the batchbuffer. Subtract them to
|
||||
* get the number of fragments which passed the depth test in each
|
||||
* individual batch, and add those differences up to get the number
|
||||
* of fragments for the entire query.
|
||||
*
|
||||
* Note that query->Base.Result may already be non-zero. We may have
|
||||
* run out of space in the query's BO and allocated a new one. If so,
|
||||
* this function was already called to accumulate the results so far.
|
||||
*/
|
||||
for (i = 0; i < query->last_index; i++) {
|
||||
query->Base.Result += results[i * 2 + 1] - results[i * 2];
|
||||
}
|
||||
break;
|
||||
|
||||
case GL_ANY_SAMPLES_PASSED:
|
||||
case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
|
||||
/* If the starting and ending PS_DEPTH_COUNT from any of the batches
|
||||
* differ, then some fragments passed the depth test.
|
||||
*/
|
||||
for (i = 0; i < query->last_index; i++) {
|
||||
if (results[i * 2 + 1] != results[i * 2]) {
|
||||
query->Base.Result = GL_TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Unrecognized query target in brw_queryobj_get_results()");
|
||||
}
|
||||
brw_bo_unmap(query->bo);
|
||||
|
||||
/* Now that we've processed the data stored in the query's buffer object,
|
||||
* we can release it.
|
||||
*/
|
||||
brw_bo_unreference(query->bo);
|
||||
query->bo = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* The NewQueryObject() driver hook.
|
||||
*
|
||||
* Allocates and initializes a new query object.
|
||||
*/
|
||||
static struct gl_query_object *
|
||||
brw_new_query_object(struct gl_context *ctx, GLuint id)
|
||||
{
|
||||
struct brw_query_object *query;
|
||||
|
||||
query = calloc(1, sizeof(struct brw_query_object));
|
||||
|
||||
query->Base.Id = id;
|
||||
query->Base.Result = 0;
|
||||
query->Base.Active = false;
|
||||
query->Base.Ready = true;
|
||||
|
||||
return &query->Base;
|
||||
}
|
||||
|
||||
/**
|
||||
* The DeleteQuery() driver hook.
|
||||
*/
|
||||
static void
|
||||
brw_delete_query(struct gl_context *ctx, struct gl_query_object *q)
|
||||
{
|
||||
struct brw_query_object *query = (struct brw_query_object *)q;
|
||||
|
||||
brw_bo_unreference(query->bo);
|
||||
_mesa_delete_query(ctx, q);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gfx4-5 driver hook for glBeginQuery().
|
||||
*
|
||||
* Initializes driver structures and emits any GPU commands required to begin
|
||||
* recording data for the query.
|
||||
*/
|
||||
static void
|
||||
brw_begin_query(struct gl_context *ctx, struct gl_query_object *q)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_query_object *query = (struct brw_query_object *)q;
|
||||
UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
assert(devinfo->ver < 6);
|
||||
|
||||
switch (query->Base.Target) {
|
||||
case GL_TIME_ELAPSED_EXT:
|
||||
/* For timestamp queries, we record the starting time right away so that
|
||||
* we measure the full time between BeginQuery and EndQuery. There's
|
||||
* some debate about whether this is the right thing to do. Our decision
|
||||
* is based on the following text from the ARB_timer_query extension:
|
||||
*
|
||||
* "(5) Should the extension measure total time elapsed between the full
|
||||
* completion of the BeginQuery and EndQuery commands, or just time
|
||||
* spent in the graphics library?
|
||||
*
|
||||
* RESOLVED: This extension will measure the total time elapsed
|
||||
* between the full completion of these commands. Future extensions
|
||||
* may implement a query to determine time elapsed at different stages
|
||||
* of the graphics pipeline."
|
||||
*
|
||||
* We write a starting timestamp now (at index 0). At EndQuery() time,
|
||||
* we'll write a second timestamp (at index 1), and subtract the two to
|
||||
* obtain the time elapsed. Notably, this includes time elapsed while
|
||||
* the system was doing other work, such as running other applications.
|
||||
*/
|
||||
brw_bo_unreference(query->bo);
|
||||
query->bo =
|
||||
brw_bo_alloc(brw->bufmgr, "timer query", 4096, BRW_MEMZONE_OTHER);
|
||||
brw_write_timestamp(brw, query->bo, 0);
|
||||
break;
|
||||
|
||||
case GL_ANY_SAMPLES_PASSED:
|
||||
case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
|
||||
case GL_SAMPLES_PASSED_ARB:
|
||||
/* For occlusion queries, we delay taking an initial sample until the
|
||||
* first drawing occurs in this batch. See the reasoning in the comments
|
||||
* for brw_emit_query_begin() below.
|
||||
*
|
||||
* Since we're starting a new query, we need to be sure to throw away
|
||||
* any previous occlusion query results.
|
||||
*/
|
||||
brw_bo_unreference(query->bo);
|
||||
query->bo = NULL;
|
||||
query->last_index = -1;
|
||||
|
||||
brw->query.obj = query;
|
||||
|
||||
/* Depth statistics on Gfx4 require strange workarounds, so we try to
|
||||
* avoid them when necessary. They're required for occlusion queries,
|
||||
* so turn them on now.
|
||||
*/
|
||||
brw->stats_wm++;
|
||||
brw->ctx.NewDriverState |= BRW_NEW_STATS_WM;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Unrecognized query target in brw_begin_query()");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gfx4-5 driver hook for glEndQuery().
|
||||
*
|
||||
* Emits GPU commands to record a final query value, ending any data capturing.
|
||||
* However, the final result isn't necessarily available until the GPU processes
|
||||
* those commands. brw_queryobj_get_results() processes the captured data to
|
||||
* produce the final result.
|
||||
*/
|
||||
static void
|
||||
brw_end_query(struct gl_context *ctx, struct gl_query_object *q)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_query_object *query = (struct brw_query_object *)q;
|
||||
UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
assert(devinfo->ver < 6);
|
||||
|
||||
switch (query->Base.Target) {
|
||||
case GL_TIME_ELAPSED_EXT:
|
||||
/* Write the final timestamp. */
|
||||
brw_write_timestamp(brw, query->bo, 1);
|
||||
break;
|
||||
|
||||
case GL_ANY_SAMPLES_PASSED:
|
||||
case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
|
||||
case GL_SAMPLES_PASSED_ARB:
|
||||
|
||||
/* No query->bo means that EndQuery was called after BeginQuery with no
|
||||
* intervening drawing. Rather than doing nothing at all here in this
|
||||
* case, we emit the query_begin and query_end state to the
|
||||
* hardware. This is to guarantee that waiting on the result of this
|
||||
* empty state will cause all previous queries to complete at all, as
|
||||
* required by the OpenGL 4.3 (Core Profile) spec, section 4.2.1:
|
||||
*
|
||||
* "It must always be true that if any query object returns
|
||||
* a result available of TRUE, all queries of the same type
|
||||
* issued prior to that query must also return TRUE."
|
||||
*/
|
||||
if (!query->bo) {
|
||||
brw_emit_query_begin(brw);
|
||||
}
|
||||
|
||||
assert(query->bo);
|
||||
|
||||
brw_emit_query_end(brw);
|
||||
|
||||
brw->query.obj = NULL;
|
||||
|
||||
brw->stats_wm--;
|
||||
brw->ctx.NewDriverState |= BRW_NEW_STATS_WM;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Unrecognized query target in brw_end_query()");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The Gfx4-5 WaitQuery() driver hook.
|
||||
*
|
||||
* Wait for a query result to become available and return it. This is the
|
||||
* backing for glGetQueryObjectiv() with the GL_QUERY_RESULT pname.
|
||||
*/
|
||||
static void brw_wait_query(struct gl_context *ctx, struct gl_query_object *q)
|
||||
{
|
||||
struct brw_query_object *query = (struct brw_query_object *)q;
|
||||
UNUSED const struct intel_device_info *devinfo =
|
||||
&brw_context(ctx)->screen->devinfo;
|
||||
|
||||
assert(devinfo->ver < 6);
|
||||
|
||||
brw_queryobj_get_results(ctx, query);
|
||||
query->Base.Ready = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* The Gfx4-5 CheckQuery() driver hook.
|
||||
*
|
||||
* Checks whether a query result is ready yet. If not, flushes.
|
||||
* This is the backing for glGetQueryObjectiv()'s QUERY_RESULT_AVAILABLE pname.
|
||||
*/
|
||||
static void brw_check_query(struct gl_context *ctx, struct gl_query_object *q)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_query_object *query = (struct brw_query_object *)q;
|
||||
UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
assert(devinfo->ver < 6);
|
||||
|
||||
/* From the GL_ARB_occlusion_query spec:
|
||||
*
|
||||
* "Instead of allowing for an infinite loop, performing a
|
||||
* QUERY_RESULT_AVAILABLE_ARB will perform a flush if the result is
|
||||
* not ready yet on the first time it is queried. This ensures that
|
||||
* the async query will return true in finite time.
|
||||
*/
|
||||
if (query->bo && brw_batch_references(&brw->batch, query->bo))
|
||||
brw_batch_flush(brw);
|
||||
|
||||
if (query->bo == NULL || !brw_bo_busy(query->bo)) {
|
||||
brw_queryobj_get_results(ctx, query);
|
||||
query->Base.Ready = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure there query's BO has enough space to store a new pair of values.
|
||||
*
|
||||
* If not, gather the existing BO's results and create a new buffer of the
|
||||
* same size.
|
||||
*/
|
||||
static void
|
||||
ensure_bo_has_space(struct gl_context *ctx, struct brw_query_object *query)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
assert(devinfo->ver < 6);
|
||||
|
||||
if (!query->bo || query->last_index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
|
||||
|
||||
if (query->bo != NULL) {
|
||||
/* The old query BO did not have enough space, so we allocated a new
|
||||
* one. Gather the results so far (adding up the differences) and
|
||||
* release the old BO.
|
||||
*/
|
||||
brw_queryobj_get_results(ctx, query);
|
||||
}
|
||||
|
||||
query->bo = brw_bo_alloc(brw->bufmgr, "query", 4096, BRW_MEMZONE_OTHER);
|
||||
query->last_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Record the PS_DEPTH_COUNT value (for occlusion queries) just before
|
||||
* primitive drawing.
|
||||
*
|
||||
* In a pre-hardware context world, the single PS_DEPTH_COUNT register is
|
||||
* shared among all applications using the GPU. However, our query value
|
||||
* needs to only include fragments generated by our application/GL context.
|
||||
*
|
||||
* To accommodate this, we record PS_DEPTH_COUNT at the start and end of
|
||||
* each batchbuffer (technically, the first primitive drawn and flush time).
|
||||
* Subtracting each pair of values calculates the change in PS_DEPTH_COUNT
|
||||
* caused by a batchbuffer. Since there is no preemption inside batches,
|
||||
* this is guaranteed to only measure the effects of our current application.
|
||||
*
|
||||
* Adding each of these differences (in case drawing is done over many batches)
|
||||
* produces the final expected value.
|
||||
*
|
||||
* In a world with hardware contexts, PS_DEPTH_COUNT is saved and restored
|
||||
* as part of the context state, so this is unnecessary, and skipped.
|
||||
*/
|
||||
void
|
||||
brw_emit_query_begin(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct brw_query_object *query = brw->query.obj;
|
||||
|
||||
/* Skip if we're not doing any queries, or we've already recorded the
|
||||
* initial query value for this batchbuffer.
|
||||
*/
|
||||
if (!query || brw->query.begin_emitted)
|
||||
return;
|
||||
|
||||
ensure_bo_has_space(ctx, query);
|
||||
|
||||
brw_write_depth_count(brw, query->bo, query->last_index * 2);
|
||||
|
||||
brw->query.begin_emitted = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called at batchbuffer flush to get an ending PS_DEPTH_COUNT
|
||||
* (for non-hardware context platforms).
|
||||
*
|
||||
* See the explanation in brw_emit_query_begin().
|
||||
*/
|
||||
void
|
||||
brw_emit_query_end(struct brw_context *brw)
|
||||
{
|
||||
struct brw_query_object *query = brw->query.obj;
|
||||
|
||||
if (!brw->query.begin_emitted)
|
||||
return;
|
||||
|
||||
brw_write_depth_count(brw, query->bo, query->last_index * 2 + 1);
|
||||
|
||||
brw->query.begin_emitted = false;
|
||||
query->last_index++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Driver hook for glQueryCounter().
|
||||
*
|
||||
* This handles GL_TIMESTAMP queries, which perform a pipelined read of the
|
||||
* current GPU time. This is unlike GL_TIME_ELAPSED, which measures the
|
||||
* time while the query is active.
|
||||
*/
|
||||
void
|
||||
brw_query_counter(struct gl_context *ctx, struct gl_query_object *q)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_query_object *query = (struct brw_query_object *) q;
|
||||
|
||||
assert(q->Target == GL_TIMESTAMP);
|
||||
|
||||
brw_bo_unreference(query->bo);
|
||||
query->bo =
|
||||
brw_bo_alloc(brw->bufmgr, "timestamp query", 4096, BRW_MEMZONE_OTHER);
|
||||
brw_write_timestamp(brw, query->bo, 0);
|
||||
|
||||
query->flushed = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the TIMESTAMP register immediately (in a non-pipelined fashion).
|
||||
*
|
||||
* This is used to implement the GetTimestamp() driver hook.
|
||||
*/
|
||||
static uint64_t
|
||||
brw_get_timestamp(struct gl_context *ctx)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
uint64_t result = 0;
|
||||
|
||||
switch (brw->screen->hw_has_timestamp) {
|
||||
case 3: /* New kernel, always full 36bit accuracy */
|
||||
brw_reg_read(brw->bufmgr, TIMESTAMP | 1, &result);
|
||||
break;
|
||||
case 2: /* 64bit kernel, result is left-shifted by 32bits, losing 4bits */
|
||||
brw_reg_read(brw->bufmgr, TIMESTAMP, &result);
|
||||
result = result >> 32;
|
||||
break;
|
||||
case 1: /* 32bit kernel, result is 36bit wide but may be inaccurate! */
|
||||
brw_reg_read(brw->bufmgr, TIMESTAMP, &result);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Scale to nanosecond units */
|
||||
result = intel_device_info_timebase_scale(devinfo, result);
|
||||
|
||||
/* Ensure the scaled timestamp overflows according to
|
||||
* GL_QUERY_COUNTER_BITS. Technically this isn't required if
|
||||
* querying GL_TIMESTAMP via glGetInteger but it seems best to keep
|
||||
* QueryObject and GetInteger timestamps consistent.
|
||||
*/
|
||||
result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this type of query written by PIPE_CONTROL?
|
||||
*/
|
||||
bool
|
||||
brw_is_query_pipelined(struct brw_query_object *query)
|
||||
{
|
||||
switch (query->Base.Target) {
|
||||
case GL_TIMESTAMP:
|
||||
case GL_TIME_ELAPSED:
|
||||
case GL_ANY_SAMPLES_PASSED:
|
||||
case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
|
||||
case GL_SAMPLES_PASSED_ARB:
|
||||
return true;
|
||||
|
||||
case GL_PRIMITIVES_GENERATED:
|
||||
case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
|
||||
case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
|
||||
case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
|
||||
case GL_VERTICES_SUBMITTED_ARB:
|
||||
case GL_PRIMITIVES_SUBMITTED_ARB:
|
||||
case GL_VERTEX_SHADER_INVOCATIONS_ARB:
|
||||
case GL_GEOMETRY_SHADER_INVOCATIONS:
|
||||
case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
|
||||
case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
|
||||
case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
|
||||
case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
|
||||
case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
|
||||
case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
|
||||
case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
|
||||
return false;
|
||||
|
||||
default:
|
||||
unreachable("Unrecognized query target in is_query_pipelined()");
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize query object functions used on all generations. */
|
||||
void brw_init_common_queryobj_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->NewQueryObject = brw_new_query_object;
|
||||
functions->DeleteQuery = brw_delete_query;
|
||||
functions->GetTimestamp = brw_get_timestamp;
|
||||
}
|
||||
|
||||
/* Initialize Gfx4/5-specific query object functions. */
|
||||
void gfx4_init_queryobj_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->BeginQuery = brw_begin_query;
|
||||
functions->EndQuery = brw_end_query;
|
||||
functions->CheckQuery = brw_check_query;
|
||||
functions->WaitQuery = brw_wait_query;
|
||||
functions->QueryCounter = brw_query_counter;
|
||||
}
|
||||
|
|
@ -1,86 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2012 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/context.h"
|
||||
|
||||
#include <xf86drm.h>
|
||||
#include "brw_context.h"
|
||||
|
||||
/**
|
||||
* Query information about GPU resets observed by this context
|
||||
*
|
||||
* Called via \c dd_function_table::GetGraphicsResetStatus.
|
||||
*/
|
||||
GLenum
|
||||
brw_get_graphics_reset_status(struct gl_context *ctx)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct drm_i915_reset_stats stats = { .ctx_id = brw->hw_ctx };
|
||||
|
||||
/* If hardware contexts are not being used (or
|
||||
* DRM_IOCTL_I915_GET_RESET_STATS is not supported), this function should
|
||||
* not be accessible.
|
||||
*/
|
||||
assert(brw->hw_ctx != 0);
|
||||
|
||||
/* A reset status other than NO_ERROR was returned last time. I915 returns
|
||||
* nonzero active/pending only if reset has been encountered and completed.
|
||||
* Return NO_ERROR from now on.
|
||||
*/
|
||||
if (brw->reset_count != 0)
|
||||
return GL_NO_ERROR;
|
||||
|
||||
if (drmIoctl(brw->screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats) != 0)
|
||||
return GL_NO_ERROR;
|
||||
|
||||
/* A reset was observed while a batch from this context was executing.
|
||||
* Assume that this context was at fault.
|
||||
*/
|
||||
if (stats.batch_active != 0) {
|
||||
brw->reset_count = stats.reset_count;
|
||||
return GL_GUILTY_CONTEXT_RESET_ARB;
|
||||
}
|
||||
|
||||
/* A reset was observed while a batch from this context was in progress,
|
||||
* but the batch was not executing. In this case, assume that the context
|
||||
* was not at fault.
|
||||
*/
|
||||
if (stats.batch_pending != 0) {
|
||||
brw->reset_count = stats.reset_count;
|
||||
return GL_INNOCENT_CONTEXT_RESET_ARB;
|
||||
}
|
||||
|
||||
return GL_NO_ERROR;
|
||||
}
|
||||
|
||||
void
|
||||
brw_check_for_reset(struct brw_context *brw)
|
||||
{
|
||||
struct drm_i915_reset_stats stats = { .ctx_id = brw->hw_ctx };
|
||||
|
||||
if (drmIoctl(brw->screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats) != 0)
|
||||
return;
|
||||
|
||||
if (stats.batch_active > 0 || stats.batch_pending > 0)
|
||||
_mesa_set_context_lost_dispatch(&brw->ctx);
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,173 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _INTEL_INIT_H_
|
||||
#define _INTEL_INIT_H_
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <GL/internal/dri_interface.h>
|
||||
|
||||
#include "isl/isl.h"
|
||||
#include "dri_util.h"
|
||||
#include "brw_bufmgr.h"
|
||||
#include "dev/intel_device_info.h"
|
||||
#include "drm-uapi/i915_drm.h"
|
||||
#include "util/xmlconfig.h"
|
||||
|
||||
#include "isl/isl.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct brw_screen
|
||||
{
|
||||
int deviceID;
|
||||
struct intel_device_info devinfo;
|
||||
|
||||
__DRIscreen *driScrnPriv;
|
||||
|
||||
uint64_t max_gtt_map_object_size;
|
||||
|
||||
/** Bytes of aperture usage beyond which execbuf is likely to fail. */
|
||||
uint64_t aperture_threshold;
|
||||
|
||||
/** DRM fd associated with this screen. Not owned by this object. Do not close. */
|
||||
int fd;
|
||||
|
||||
bool has_exec_fence; /**< I915_PARAM_HAS_EXEC_FENCE */
|
||||
|
||||
int hw_has_timestamp;
|
||||
|
||||
struct isl_device isl_dev;
|
||||
|
||||
/**
|
||||
* Does the kernel support context reset notifications?
|
||||
*/
|
||||
bool has_context_reset_notification;
|
||||
|
||||
/**
|
||||
* Does the kernel support features such as pipelined register access to
|
||||
* specific registers?
|
||||
*/
|
||||
unsigned kernel_features;
|
||||
#define KERNEL_ALLOWS_SOL_OFFSET_WRITES (1<<0)
|
||||
#define KERNEL_ALLOWS_PREDICATE_WRITES (1<<1)
|
||||
#define KERNEL_ALLOWS_MI_MATH_AND_LRR (1<<2)
|
||||
#define KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3 (1<<3)
|
||||
#define KERNEL_ALLOWS_COMPUTE_DISPATCH (1<<4)
|
||||
#define KERNEL_ALLOWS_EXEC_CAPTURE (1<<5)
|
||||
#define KERNEL_ALLOWS_EXEC_BATCH_FIRST (1<<6)
|
||||
#define KERNEL_ALLOWS_CONTEXT_ISOLATION (1<<7)
|
||||
|
||||
struct brw_bufmgr *bufmgr;
|
||||
|
||||
/**
|
||||
* A unique ID for shader programs.
|
||||
*/
|
||||
unsigned program_id;
|
||||
|
||||
int winsys_msaa_samples_override;
|
||||
|
||||
struct brw_compiler *compiler;
|
||||
|
||||
/**
|
||||
* Configuration cache with default values for all contexts
|
||||
*/
|
||||
driOptionCache optionCache;
|
||||
|
||||
/**
|
||||
* Version of the command parser reported by the
|
||||
* I915_PARAM_CMD_PARSER_VERSION parameter
|
||||
*/
|
||||
int cmd_parser_version;
|
||||
|
||||
bool mesa_format_supports_texture[MESA_FORMAT_COUNT];
|
||||
bool mesa_format_supports_render[MESA_FORMAT_COUNT];
|
||||
enum isl_format mesa_to_isl_render_format[MESA_FORMAT_COUNT];
|
||||
|
||||
struct disk_cache *disk_cache;
|
||||
};
|
||||
|
||||
extern void brw_destroy_context(__DRIcontext *driContextPriv);
|
||||
|
||||
extern GLboolean brw_unbind_context(__DRIcontext *driContextPriv);
|
||||
|
||||
PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void);
|
||||
extern const __DRI2fenceExtension brwFenceExtension;
|
||||
|
||||
extern GLboolean
|
||||
brw_make_current(__DRIcontext *driContextPriv,
|
||||
__DRIdrawable *driDrawPriv,
|
||||
__DRIdrawable *driReadPriv);
|
||||
|
||||
double get_time(void);
|
||||
|
||||
const int*
|
||||
brw_supported_msaa_modes(const struct brw_screen *screen);
|
||||
|
||||
static inline bool
|
||||
can_do_pipelined_register_writes(const struct brw_screen *screen)
|
||||
{
|
||||
return screen->kernel_features & KERNEL_ALLOWS_SOL_OFFSET_WRITES;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
can_do_hsw_l3_atomics(const struct brw_screen *screen)
|
||||
{
|
||||
return screen->kernel_features & KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
can_do_mi_math_and_lrr(const struct brw_screen *screen)
|
||||
{
|
||||
return screen->kernel_features & KERNEL_ALLOWS_MI_MATH_AND_LRR;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
can_do_compute_dispatch(const struct brw_screen *screen)
|
||||
{
|
||||
return screen->kernel_features & KERNEL_ALLOWS_COMPUTE_DISPATCH;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
can_do_predicate_writes(const struct brw_screen *screen)
|
||||
{
|
||||
return screen->kernel_features & KERNEL_ALLOWS_PREDICATE_WRITES;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
can_do_exec_capture(const struct brw_screen *screen)
|
||||
{
|
||||
return screen->kernel_features & KERNEL_ALLOWS_EXEC_CAPTURE;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -1,171 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "main/macros.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "main/enums.h"
|
||||
#include "main/fbobject.h"
|
||||
#include "main/state.h"
|
||||
|
||||
#include "brw_batch.h"
|
||||
|
||||
#include "brw_defines.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_util.h"
|
||||
#include "brw_state.h"
|
||||
#include "compiler/brw_eu.h"
|
||||
|
||||
#include "util/ralloc.h"
|
||||
|
||||
static void
|
||||
compile_sf_prog(struct brw_context *brw, struct brw_sf_prog_key *key)
|
||||
{
|
||||
const unsigned *program;
|
||||
void *mem_ctx;
|
||||
unsigned program_size;
|
||||
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
|
||||
struct brw_sf_prog_data prog_data;
|
||||
program = brw_compile_sf(brw->screen->compiler, mem_ctx, key, &prog_data,
|
||||
&brw->vue_map_geom_out, &program_size);
|
||||
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_SF_PROG,
|
||||
key, sizeof(*key),
|
||||
program, program_size,
|
||||
&prog_data, sizeof(prog_data),
|
||||
&brw->sf.prog_offset, &brw->sf.prog_data);
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
/* Calculate interpolants for triangle and line rasterization.
|
||||
*/
|
||||
void
|
||||
brw_upload_sf_prog(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct brw_sf_prog_key key;
|
||||
|
||||
if (!brw_state_dirty(brw,
|
||||
_NEW_BUFFERS |
|
||||
_NEW_HINT |
|
||||
_NEW_LIGHT |
|
||||
_NEW_POINT |
|
||||
_NEW_POLYGON |
|
||||
_NEW_PROGRAM |
|
||||
_NEW_TRANSFORM,
|
||||
BRW_NEW_BLORP |
|
||||
BRW_NEW_FS_PROG_DATA |
|
||||
BRW_NEW_REDUCED_PRIMITIVE |
|
||||
BRW_NEW_VUE_MAP_GEOM_OUT))
|
||||
return;
|
||||
|
||||
/* _NEW_BUFFERS */
|
||||
bool flip_y = ctx->DrawBuffer->FlipY;
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
|
||||
/* Populate the key, noting state dependencies:
|
||||
*/
|
||||
/* BRW_NEW_VUE_MAP_GEOM_OUT */
|
||||
key.attrs = brw->vue_map_geom_out.slots_valid;
|
||||
|
||||
/* BRW_NEW_REDUCED_PRIMITIVE */
|
||||
switch (brw->reduced_primitive) {
|
||||
case GL_TRIANGLES:
|
||||
/* NOTE: We just use the edgeflag attribute as an indicator that
|
||||
* unfilled triangles are active. We don't actually do the
|
||||
* edgeflag testing here, it is already done in the clip
|
||||
* program.
|
||||
*/
|
||||
if (key.attrs & BITFIELD64_BIT(VARYING_SLOT_EDGE))
|
||||
key.primitive = BRW_SF_PRIM_UNFILLED_TRIS;
|
||||
else
|
||||
key.primitive = BRW_SF_PRIM_TRIANGLES;
|
||||
break;
|
||||
case GL_LINES:
|
||||
key.primitive = BRW_SF_PRIM_LINES;
|
||||
break;
|
||||
case GL_POINTS:
|
||||
key.primitive = BRW_SF_PRIM_POINTS;
|
||||
break;
|
||||
}
|
||||
|
||||
/* _NEW_TRANSFORM */
|
||||
key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
|
||||
|
||||
/* _NEW_POINT */
|
||||
key.do_point_sprite = ctx->Point.PointSprite;
|
||||
if (key.do_point_sprite) {
|
||||
key.point_sprite_coord_replace = ctx->Point.CoordReplace & 0xff;
|
||||
}
|
||||
if (brw->programs[MESA_SHADER_FRAGMENT]->info.inputs_read &
|
||||
BITFIELD64_BIT(VARYING_SLOT_PNTC)) {
|
||||
key.do_point_coord = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Window coordinates in a FBO are inverted, which means point
|
||||
* sprite origin must be inverted, too.
|
||||
*/
|
||||
if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y)
|
||||
key.sprite_origin_lower_left = true;
|
||||
|
||||
/* BRW_NEW_FS_PROG_DATA */
|
||||
const struct brw_wm_prog_data *wm_prog_data =
|
||||
brw_wm_prog_data(brw->wm.base.prog_data);
|
||||
if (wm_prog_data) {
|
||||
key.contains_flat_varying = wm_prog_data->contains_flat_varying;
|
||||
|
||||
STATIC_ASSERT(sizeof(key.interp_mode) ==
|
||||
sizeof(wm_prog_data->interp_mode));
|
||||
memcpy(key.interp_mode, wm_prog_data->interp_mode,
|
||||
sizeof(key.interp_mode));
|
||||
}
|
||||
|
||||
/* _NEW_LIGHT | _NEW_PROGRAM */
|
||||
key.do_twoside_color = _mesa_vertex_program_two_side_enabled(ctx);
|
||||
|
||||
/* _NEW_POLYGON */
|
||||
if (key.do_twoside_color) {
|
||||
/* If we're rendering to a FBO, we have to invert the polygon
|
||||
* face orientation, just as we invert the viewport in
|
||||
* sf_unit_create_from_key().
|
||||
*/
|
||||
key.frontface_ccw = brw->polygon_front_bit != flip_y;
|
||||
}
|
||||
|
||||
if (!brw_search_cache(&brw->cache, BRW_CACHE_SF_PROG, &key, sizeof(key),
|
||||
&brw->sf.prog_offset, &brw->sf.prog_data, true)) {
|
||||
compile_sf_prog( brw, &key );
|
||||
}
|
||||
}
|
||||
|
|
@ -1,119 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/context.h"
|
||||
#include "main/macros.h"
|
||||
#include "main/enums.h"
|
||||
#include "main/dd.h"
|
||||
|
||||
#include "brw_screen.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_defines.h"
|
||||
|
||||
int
|
||||
brw_translate_shadow_compare_func(GLenum func)
|
||||
{
|
||||
/* GL specifies the result of shadow comparisons as:
|
||||
* 1 if ref <op> texel,
|
||||
* 0 otherwise.
|
||||
*
|
||||
* The hardware does:
|
||||
* 0 if texel <op> ref,
|
||||
* 1 otherwise.
|
||||
*
|
||||
* So, these look a bit strange because there's both a negation
|
||||
* and swapping of the arguments involved.
|
||||
*/
|
||||
switch (func) {
|
||||
case GL_NEVER:
|
||||
return BRW_COMPAREFUNCTION_ALWAYS;
|
||||
case GL_LESS:
|
||||
return BRW_COMPAREFUNCTION_LEQUAL;
|
||||
case GL_LEQUAL:
|
||||
return BRW_COMPAREFUNCTION_LESS;
|
||||
case GL_GREATER:
|
||||
return BRW_COMPAREFUNCTION_GEQUAL;
|
||||
case GL_GEQUAL:
|
||||
return BRW_COMPAREFUNCTION_GREATER;
|
||||
case GL_NOTEQUAL:
|
||||
return BRW_COMPAREFUNCTION_EQUAL;
|
||||
case GL_EQUAL:
|
||||
return BRW_COMPAREFUNCTION_NOTEQUAL;
|
||||
case GL_ALWAYS:
|
||||
return BRW_COMPAREFUNCTION_NEVER;
|
||||
}
|
||||
|
||||
unreachable("Invalid shadow comparison function.");
|
||||
}
|
||||
|
||||
int
|
||||
brw_translate_compare_func(GLenum func)
|
||||
{
|
||||
switch (func) {
|
||||
case GL_NEVER:
|
||||
return BRW_COMPAREFUNCTION_NEVER;
|
||||
case GL_LESS:
|
||||
return BRW_COMPAREFUNCTION_LESS;
|
||||
case GL_LEQUAL:
|
||||
return BRW_COMPAREFUNCTION_LEQUAL;
|
||||
case GL_GREATER:
|
||||
return BRW_COMPAREFUNCTION_GREATER;
|
||||
case GL_GEQUAL:
|
||||
return BRW_COMPAREFUNCTION_GEQUAL;
|
||||
case GL_NOTEQUAL:
|
||||
return BRW_COMPAREFUNCTION_NOTEQUAL;
|
||||
case GL_EQUAL:
|
||||
return BRW_COMPAREFUNCTION_EQUAL;
|
||||
case GL_ALWAYS:
|
||||
return BRW_COMPAREFUNCTION_ALWAYS;
|
||||
}
|
||||
|
||||
unreachable("Invalid comparison function.");
|
||||
}
|
||||
|
||||
int
|
||||
brw_translate_stencil_op(GLenum op)
|
||||
{
|
||||
switch (op) {
|
||||
case GL_KEEP:
|
||||
return BRW_STENCILOP_KEEP;
|
||||
case GL_ZERO:
|
||||
return BRW_STENCILOP_ZERO;
|
||||
case GL_REPLACE:
|
||||
return BRW_STENCILOP_REPLACE;
|
||||
case GL_INCR:
|
||||
return BRW_STENCILOP_INCRSAT;
|
||||
case GL_DECR:
|
||||
return BRW_STENCILOP_DECRSAT;
|
||||
case GL_INCR_WRAP:
|
||||
return BRW_STENCILOP_INCR;
|
||||
case GL_DECR_WRAP:
|
||||
return BRW_STENCILOP_DECR;
|
||||
case GL_INVERT:
|
||||
return BRW_STENCILOP_INVERT;
|
||||
default:
|
||||
return BRW_STENCILOP_ZERO;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,370 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
|
||||
#ifndef BRW_STATE_H
|
||||
#define BRW_STATE_H
|
||||
|
||||
#include "brw_context.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum intel_msaa_layout;
|
||||
|
||||
extern const struct brw_tracked_state brw_blend_constant_color;
|
||||
extern const struct brw_tracked_state brw_clip_unit;
|
||||
extern const struct brw_tracked_state brw_vs_pull_constants;
|
||||
extern const struct brw_tracked_state brw_tcs_pull_constants;
|
||||
extern const struct brw_tracked_state brw_tes_pull_constants;
|
||||
extern const struct brw_tracked_state brw_gs_pull_constants;
|
||||
extern const struct brw_tracked_state brw_wm_pull_constants;
|
||||
extern const struct brw_tracked_state brw_cs_pull_constants;
|
||||
extern const struct brw_tracked_state brw_constant_buffer;
|
||||
extern const struct brw_tracked_state brw_curbe_offsets;
|
||||
extern const struct brw_tracked_state brw_binding_table_pointers;
|
||||
extern const struct brw_tracked_state brw_depthbuffer;
|
||||
extern const struct brw_tracked_state brw_recalculate_urb_fence;
|
||||
extern const struct brw_tracked_state brw_sf_vp;
|
||||
extern const struct brw_tracked_state brw_cs_texture_surfaces;
|
||||
extern const struct brw_tracked_state brw_vs_ubo_surfaces;
|
||||
extern const struct brw_tracked_state brw_vs_image_surfaces;
|
||||
extern const struct brw_tracked_state brw_tcs_ubo_surfaces;
|
||||
extern const struct brw_tracked_state brw_tcs_image_surfaces;
|
||||
extern const struct brw_tracked_state brw_tes_ubo_surfaces;
|
||||
extern const struct brw_tracked_state brw_tes_image_surfaces;
|
||||
extern const struct brw_tracked_state brw_gs_ubo_surfaces;
|
||||
extern const struct brw_tracked_state brw_gs_image_surfaces;
|
||||
extern const struct brw_tracked_state brw_renderbuffer_surfaces;
|
||||
extern const struct brw_tracked_state brw_renderbuffer_read_surfaces;
|
||||
extern const struct brw_tracked_state brw_texture_surfaces;
|
||||
extern const struct brw_tracked_state brw_wm_binding_table;
|
||||
extern const struct brw_tracked_state brw_gs_binding_table;
|
||||
extern const struct brw_tracked_state brw_tes_binding_table;
|
||||
extern const struct brw_tracked_state brw_tcs_binding_table;
|
||||
extern const struct brw_tracked_state brw_vs_binding_table;
|
||||
extern const struct brw_tracked_state brw_wm_ubo_surfaces;
|
||||
extern const struct brw_tracked_state brw_wm_image_surfaces;
|
||||
extern const struct brw_tracked_state brw_cs_ubo_surfaces;
|
||||
extern const struct brw_tracked_state brw_cs_image_surfaces;
|
||||
|
||||
extern const struct brw_tracked_state brw_psp_urb_cbs;
|
||||
|
||||
extern const struct brw_tracked_state brw_indices;
|
||||
extern const struct brw_tracked_state brw_index_buffer;
|
||||
extern const struct brw_tracked_state gfx7_cs_push_constants;
|
||||
extern const struct brw_tracked_state gfx6_binding_table_pointers;
|
||||
extern const struct brw_tracked_state gfx6_gs_binding_table;
|
||||
extern const struct brw_tracked_state gfx6_renderbuffer_surfaces;
|
||||
extern const struct brw_tracked_state gfx6_sampler_state;
|
||||
extern const struct brw_tracked_state gfx6_sol_surface;
|
||||
extern const struct brw_tracked_state gfx6_sf_vp;
|
||||
extern const struct brw_tracked_state gfx6_urb;
|
||||
extern const struct brw_tracked_state gfx7_l3_state;
|
||||
extern const struct brw_tracked_state gfx7_push_constant_space;
|
||||
extern const struct brw_tracked_state gfx7_urb;
|
||||
extern const struct brw_tracked_state gfx8_pma_fix;
|
||||
extern const struct brw_tracked_state brw_cs_work_groups_surface;
|
||||
|
||||
void gfx4_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset,
|
||||
uint64_t imm);
|
||||
void gfx45_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset,
|
||||
uint64_t imm);
|
||||
void gfx5_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset,
|
||||
uint64_t imm);
|
||||
void gfx6_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset,
|
||||
uint64_t imm);
|
||||
void gfx7_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset,
|
||||
uint64_t imm);
|
||||
void gfx75_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset,
|
||||
uint64_t imm);
|
||||
void gfx8_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset,
|
||||
uint64_t imm);
|
||||
void gfx9_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset,
|
||||
uint64_t imm);
|
||||
void gfx11_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset,
|
||||
uint64_t imm);
|
||||
|
||||
static inline bool
|
||||
brw_state_dirty(const struct brw_context *brw,
|
||||
GLuint mesa_flags, uint64_t brw_flags)
|
||||
{
|
||||
return ((brw->NewGLState & mesa_flags) |
|
||||
(brw->ctx.NewDriverState & brw_flags)) != 0;
|
||||
}
|
||||
|
||||
/* brw_binding_tables.c */
|
||||
void brw_upload_binding_table(struct brw_context *brw,
|
||||
uint32_t packet_name,
|
||||
const struct brw_stage_prog_data *prog_data,
|
||||
struct brw_stage_state *stage_state);
|
||||
|
||||
/* brw_misc_state.c */
|
||||
void brw_upload_invariant_state(struct brw_context *brw);
|
||||
uint32_t
|
||||
brw_depthbuffer_format(struct brw_context *brw);
|
||||
|
||||
/* gfx8_depth_state.c */
|
||||
void gfx8_write_pma_stall_bits(struct brw_context *brw,
|
||||
uint32_t pma_stall_bits);
|
||||
|
||||
/* brw_disk_cache.c */
|
||||
void brw_disk_cache_init(struct brw_screen *screen);
|
||||
bool brw_disk_cache_upload_program(struct brw_context *brw,
|
||||
gl_shader_stage stage);
|
||||
void brw_disk_cache_write_compute_program(struct brw_context *brw);
|
||||
void brw_disk_cache_write_render_programs(struct brw_context *brw);
|
||||
|
||||
/***********************************************************************
|
||||
* brw_state_upload.c
|
||||
*/
|
||||
void brw_upload_render_state(struct brw_context *brw);
|
||||
void brw_render_state_finished(struct brw_context *brw);
|
||||
void brw_upload_compute_state(struct brw_context *brw);
|
||||
void brw_compute_state_finished(struct brw_context *brw);
|
||||
void brw_init_state(struct brw_context *brw);
|
||||
void brw_destroy_state(struct brw_context *brw);
|
||||
void brw_emit_select_pipeline(struct brw_context *brw,
|
||||
enum brw_pipeline pipeline);
|
||||
void brw_enable_obj_preemption(struct brw_context *brw, bool enable);
|
||||
|
||||
static inline void
|
||||
brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
|
||||
{
|
||||
if (unlikely(brw->last_pipeline != pipeline)) {
|
||||
assert(pipeline < BRW_NUM_PIPELINES);
|
||||
brw_emit_select_pipeline(brw, pipeline);
|
||||
brw->last_pipeline = pipeline;
|
||||
}
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* brw_program_cache.c
|
||||
*/
|
||||
|
||||
void brw_upload_cache(struct brw_cache *cache,
|
||||
enum brw_cache_id cache_id,
|
||||
const void *key,
|
||||
GLuint key_sz,
|
||||
const void *data,
|
||||
GLuint data_sz,
|
||||
const void *aux,
|
||||
GLuint aux_sz,
|
||||
uint32_t *out_offset, void *out_aux);
|
||||
|
||||
bool brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
|
||||
const void *key, GLuint key_size, uint32_t *inout_offset,
|
||||
void *inout_aux, bool flag_state);
|
||||
|
||||
const void *brw_find_previous_compile(struct brw_cache *cache,
|
||||
enum brw_cache_id cache_id,
|
||||
unsigned program_string_id);
|
||||
|
||||
void brw_program_cache_check_size(struct brw_context *brw);
|
||||
|
||||
void brw_init_caches( struct brw_context *brw );
|
||||
void brw_destroy_caches( struct brw_context *brw );
|
||||
|
||||
void brw_print_program_cache(struct brw_context *brw);
|
||||
|
||||
enum brw_cache_id brw_stage_cache_id(gl_shader_stage stage);
|
||||
|
||||
/* brw_batch.c */
|
||||
void brw_require_statebuffer_space(struct brw_context *brw, int size);
|
||||
void *brw_state_batch(struct brw_context *brw,
|
||||
int size, int alignment, uint32_t *out_offset);
|
||||
|
||||
/* brw_wm_surface_state.c */
|
||||
uint32_t brw_get_surface_tiling_bits(uint32_t tiling);
|
||||
uint32_t brw_get_surface_num_multisamples(unsigned num_samples);
|
||||
enum isl_format brw_isl_format_for_mesa_format(mesa_format mesa_format);
|
||||
|
||||
GLuint translate_tex_target(GLenum target);
|
||||
|
||||
enum isl_format translate_tex_format(struct brw_context *brw,
|
||||
mesa_format mesa_format,
|
||||
GLenum srgb_decode);
|
||||
|
||||
int brw_get_texture_swizzle(const struct gl_context *ctx,
|
||||
const struct gl_texture_object *t);
|
||||
|
||||
void brw_emit_buffer_surface_state(struct brw_context *brw,
|
||||
uint32_t *out_offset,
|
||||
struct brw_bo *bo,
|
||||
unsigned buffer_offset,
|
||||
unsigned surface_format,
|
||||
unsigned buffer_size,
|
||||
unsigned pitch,
|
||||
unsigned reloc_flags);
|
||||
|
||||
/* brw_sampler_state.c */
|
||||
void brw_emit_sampler_state(struct brw_context *brw,
|
||||
uint32_t *sampler_state,
|
||||
uint32_t batch_offset_for_sampler_state,
|
||||
unsigned min_filter,
|
||||
unsigned mag_filter,
|
||||
unsigned mip_filter,
|
||||
unsigned max_anisotropy,
|
||||
unsigned address_rounding,
|
||||
unsigned wrap_s,
|
||||
unsigned wrap_t,
|
||||
unsigned wrap_r,
|
||||
unsigned base_level,
|
||||
unsigned min_lod,
|
||||
unsigned max_lod,
|
||||
int lod_bias,
|
||||
unsigned shadow_function,
|
||||
bool non_normalized_coordinates,
|
||||
uint32_t border_color_offset);
|
||||
|
||||
/* gfx6_constant_state.c */
|
||||
void
|
||||
brw_populate_constant_data(struct brw_context *brw,
|
||||
const struct gl_program *prog,
|
||||
const struct brw_stage_state *stage_state,
|
||||
void *dst,
|
||||
const uint32_t *param,
|
||||
unsigned nr_params);
|
||||
void
|
||||
brw_upload_pull_constants(struct brw_context *brw,
|
||||
GLbitfield64 brw_new_constbuf,
|
||||
const struct gl_program *prog,
|
||||
struct brw_stage_state *stage_state,
|
||||
const struct brw_stage_prog_data *prog_data);
|
||||
void
|
||||
brw_upload_cs_push_constants(struct brw_context *brw,
|
||||
const struct gl_program *prog,
|
||||
const struct brw_cs_prog_data *cs_prog_data,
|
||||
struct brw_stage_state *stage_state);
|
||||
|
||||
/* gfx7_vs_state.c */
|
||||
void
|
||||
gfx7_upload_constant_state(struct brw_context *brw,
|
||||
const struct brw_stage_state *stage_state,
|
||||
bool active, unsigned opcode);
|
||||
|
||||
/* brw_clip.c */
|
||||
void brw_upload_clip_prog(struct brw_context *brw);
|
||||
|
||||
/* brw_sf.c */
|
||||
void brw_upload_sf_prog(struct brw_context *brw);
|
||||
|
||||
bool brw_is_drawing_points(const struct brw_context *brw);
|
||||
bool brw_is_drawing_lines(const struct brw_context *brw);
|
||||
|
||||
/* gfx7_l3_state.c */
|
||||
void
|
||||
gfx7_restore_default_l3_config(struct brw_context *brw);
|
||||
|
||||
static inline bool
|
||||
use_state_point_size(const struct brw_context *brw)
|
||||
{
|
||||
const struct gl_context *ctx = &brw->ctx;
|
||||
|
||||
/* Section 14.4 (Points) of the OpenGL 4.5 specification says:
|
||||
*
|
||||
* "If program point size mode is enabled, the derived point size is
|
||||
* taken from the (potentially clipped) shader built-in gl_PointSize
|
||||
* written by:
|
||||
*
|
||||
* * the geometry shader, if active;
|
||||
* * the tessellation evaluation shader, if active and no
|
||||
* geometry shader is active;
|
||||
* * the vertex shader, otherwise
|
||||
*
|
||||
* and clamped to the implementation-dependent point size range. If
|
||||
* the value written to gl_PointSize is less than or equal to zero,
|
||||
* or if no value was written to gl_PointSize, results are undefined.
|
||||
* If program point size mode is disabled, the derived point size is
|
||||
* specified with the command
|
||||
*
|
||||
* void PointSize(float size);
|
||||
*
|
||||
* size specifies the requested size of a point. The default value
|
||||
* is 1.0."
|
||||
*
|
||||
* The rules for GLES come from the ES 3.2, OES_geometry_point_size, and
|
||||
* OES_tessellation_point_size specifications. To summarize: if the last
|
||||
* stage before rasterization is a GS or TES, then use gl_PointSize from
|
||||
* the shader if written. Otherwise, use 1.0. If the last stage is a
|
||||
* vertex shader, use gl_PointSize, or it is undefined.
|
||||
*
|
||||
* We can combine these rules into a single condition for both APIs.
|
||||
* Using the state point size when the last shader stage doesn't write
|
||||
* gl_PointSize satisfies GL's requirements, as it's undefined. Because
|
||||
* ES doesn't have a PointSize() command, the state point size will
|
||||
* remain 1.0, satisfying the ES default value in the GS/TES case, and
|
||||
* the VS case (1.0 works for "undefined"). Mesa sets the program point
|
||||
* mode flag to always-enabled in ES, so we can safely check that, and
|
||||
* it'll be ignored for ES.
|
||||
*
|
||||
* _NEW_PROGRAM | _NEW_POINT
|
||||
* BRW_NEW_VUE_MAP_GEOM_OUT
|
||||
*/
|
||||
return (!ctx->VertexProgram.PointSizeEnabled && !ctx->Point._Attenuated) ||
|
||||
(brw->vue_map_geom_out.slots_valid & VARYING_BIT_PSIZ) == 0;
|
||||
}
|
||||
|
||||
void brw_copy_pipeline_atoms(struct brw_context *brw,
|
||||
enum brw_pipeline pipeline,
|
||||
const struct brw_tracked_state **atoms,
|
||||
int num_atoms);
|
||||
void gfx4_init_atoms(struct brw_context *brw);
|
||||
void gfx45_init_atoms(struct brw_context *brw);
|
||||
void gfx5_init_atoms(struct brw_context *brw);
|
||||
void gfx6_init_atoms(struct brw_context *brw);
|
||||
void gfx7_init_atoms(struct brw_context *brw);
|
||||
void gfx75_init_atoms(struct brw_context *brw);
|
||||
void gfx8_init_atoms(struct brw_context *brw);
|
||||
void gfx9_init_atoms(struct brw_context *brw);
|
||||
void gfx11_init_atoms(struct brw_context *brw);
|
||||
|
||||
static inline uint32_t
|
||||
brw_mocs(const struct isl_device *dev, struct brw_bo *bo)
|
||||
{
|
||||
return isl_mocs(dev, 0, bo && bo->external);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -1,789 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_program.h"
|
||||
#include "drivers/common/meta.h"
|
||||
#include "brw_batch.h"
|
||||
#include "brw_buffers.h"
|
||||
#include "brw_vs.h"
|
||||
#include "brw_ff_gs.h"
|
||||
#include "brw_gs.h"
|
||||
#include "brw_wm.h"
|
||||
#include "brw_cs.h"
|
||||
#include "genxml/genX_bits.h"
|
||||
#include "main/framebuffer.h"
|
||||
|
||||
void
|
||||
brw_enable_obj_preemption(struct brw_context *brw, bool enable)
|
||||
{
|
||||
ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
assert(devinfo->ver >= 9);
|
||||
|
||||
if (enable == brw->object_preemption)
|
||||
return;
|
||||
|
||||
/* A fixed function pipe flush is required before modifying this field */
|
||||
brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
|
||||
|
||||
bool replay_mode = enable ?
|
||||
GFX9_REPLAY_MODE_MIDOBJECT : GFX9_REPLAY_MODE_MIDBUFFER;
|
||||
|
||||
/* enable object level preemption */
|
||||
brw_load_register_imm32(brw, CS_CHICKEN1,
|
||||
replay_mode | GFX9_REPLAY_MODE_MASK);
|
||||
|
||||
brw->object_preemption = enable;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_upload_gfx11_slice_hashing_state(struct brw_context *brw)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
int subslices_delta =
|
||||
devinfo->ppipe_subslices[0] - devinfo->ppipe_subslices[1];
|
||||
if (subslices_delta == 0)
|
||||
return;
|
||||
|
||||
unsigned size = GFX11_SLICE_HASH_TABLE_length * 4;
|
||||
uint32_t hash_address;
|
||||
|
||||
uint32_t *map = brw_state_batch(brw, size, 64, &hash_address);
|
||||
|
||||
unsigned idx = 0;
|
||||
|
||||
unsigned sl_small = 0;
|
||||
unsigned sl_big = 1;
|
||||
if (subslices_delta > 0) {
|
||||
sl_small = 1;
|
||||
sl_big = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a 16x16 slice hashing table like the following one:
|
||||
*
|
||||
* [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
|
||||
* [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
|
||||
* [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
|
||||
* [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
|
||||
* [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
|
||||
* [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
|
||||
* [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
|
||||
* [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
|
||||
* [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
|
||||
* [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
|
||||
* [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
|
||||
* [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
|
||||
* [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
|
||||
* [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
|
||||
* [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
|
||||
* [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
|
||||
*
|
||||
* The table above is used when the pixel pipe 0 has less subslices than
|
||||
* pixel pipe 1. When pixel pipe 0 has more subslices, then a similar table
|
||||
* with 0's and 1's inverted is used.
|
||||
*/
|
||||
for (int i = 0; i < GFX11_SLICE_HASH_TABLE_length; i++) {
|
||||
uint32_t dw = 0;
|
||||
|
||||
for (int j = 0; j < 8; j++) {
|
||||
unsigned slice = idx++ % 3 ? sl_big : sl_small;
|
||||
dw |= slice << (j * 4);
|
||||
}
|
||||
map[i] = dw;
|
||||
}
|
||||
|
||||
BEGIN_BATCH(2);
|
||||
OUT_BATCH(_3DSTATE_SLICE_TABLE_STATE_POINTERS << 16 | (2 - 2));
|
||||
OUT_RELOC(brw->batch.state.bo, 0, hash_address | 1);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
/* From gfx10/gfx11 workaround table in h/w specs:
|
||||
*
|
||||
* "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
|
||||
* a value of 0xFFFF"
|
||||
*
|
||||
* This means that whenever we update a field with this instruction, we need
|
||||
* to update all the others.
|
||||
*
|
||||
* Since this is the first time we emit this
|
||||
* instruction, we are only setting the fSLICE_HASHING_TABLE_ENABLE flag,
|
||||
* and leaving everything else at their default state (0).
|
||||
*/
|
||||
BEGIN_BATCH(2);
|
||||
OUT_BATCH(_3DSTATE_3D_MODE << 16 | (2 - 2));
|
||||
OUT_BATCH(0xffff0000 | SLICE_HASHING_TABLE_ENABLE);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
static void
|
||||
brw_upload_initial_gpu_state(struct brw_context *brw)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
const struct brw_compiler *compiler = brw->screen->compiler;
|
||||
|
||||
/* On platforms with hardware contexts, we can set our initial GPU state
|
||||
* right away rather than doing it via state atoms. This saves a small
|
||||
* amount of overhead on every draw call.
|
||||
*/
|
||||
if (!brw->hw_ctx)
|
||||
return;
|
||||
|
||||
if (devinfo->ver == 6)
|
||||
brw_emit_post_sync_nonzero_flush(brw);
|
||||
|
||||
brw_upload_invariant_state(brw);
|
||||
|
||||
if (devinfo->ver == 11) {
|
||||
/* The default behavior of bit 5 "Headerless Message for Pre-emptable
|
||||
* Contexts" in SAMPLER MODE register is set to 0, which means
|
||||
* headerless sampler messages are not allowed for pre-emptable
|
||||
* contexts. Set the bit 5 to 1 to allow them.
|
||||
*/
|
||||
brw_load_register_imm32(brw, GFX11_SAMPLER_MODE,
|
||||
HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
|
||||
HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
|
||||
|
||||
/* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
|
||||
* HALF_SLICE_CHICKEN7 register.
|
||||
*/
|
||||
brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
|
||||
TEXEL_OFFSET_FIX_MASK |
|
||||
TEXEL_OFFSET_FIX_ENABLE);
|
||||
|
||||
/* Wa_1406697149: Bit 9 "Error Detection Behavior Control" must be set
|
||||
* in L3CNTLREG register. The default setting of the bit is not the
|
||||
* desirable behavior.
|
||||
*/
|
||||
brw_load_register_imm32(brw, GFX8_L3CNTLREG,
|
||||
GFX8_L3CNTLREG_EDBC_NO_HANG);
|
||||
}
|
||||
|
||||
/* hardware specification recommends disabling repacking for
|
||||
* the compatibility with decompression mechanism in display controller.
|
||||
*/
|
||||
if (devinfo->disable_ccs_repack) {
|
||||
brw_load_register_imm32(brw, GFX7_CACHE_MODE_0,
|
||||
GFX11_DISABLE_REPACKING_FOR_COMPRESSION |
|
||||
REG_MASK(GFX11_DISABLE_REPACKING_FOR_COMPRESSION));
|
||||
}
|
||||
|
||||
if (devinfo->ver == 9) {
|
||||
/* Recommended optimizations for Victim Cache eviction and floating
|
||||
* point blending.
|
||||
*/
|
||||
brw_load_register_imm32(brw, GFX7_CACHE_MODE_1,
|
||||
REG_MASK(GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
|
||||
REG_MASK(GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT) |
|
||||
REG_MASK(GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
|
||||
GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
|
||||
GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT |
|
||||
GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC);
|
||||
}
|
||||
|
||||
if (devinfo->ver >= 8) {
|
||||
gfx8_emit_3dstate_sample_pattern(brw);
|
||||
|
||||
BEGIN_BATCH(5);
|
||||
OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
BEGIN_BATCH(2);
|
||||
OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
/* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
|
||||
* 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
|
||||
*
|
||||
* This is only safe on kernels with context isolation support.
|
||||
*/
|
||||
if (!compiler->constant_buffer_0_is_relative) {
|
||||
if (devinfo->ver >= 9) {
|
||||
BEGIN_BATCH(3);
|
||||
OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
|
||||
OUT_BATCH(CS_DEBUG_MODE2);
|
||||
OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
|
||||
CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
|
||||
ADVANCE_BATCH();
|
||||
} else if (devinfo->ver == 8) {
|
||||
BEGIN_BATCH(3);
|
||||
OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
|
||||
OUT_BATCH(INSTPM);
|
||||
OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
|
||||
INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
}
|
||||
|
||||
brw->object_preemption = false;
|
||||
|
||||
if (devinfo->ver >= 10)
|
||||
brw_enable_obj_preemption(brw, true);
|
||||
|
||||
if (devinfo->ver == 11)
|
||||
brw_upload_gfx11_slice_hashing_state(brw);
|
||||
}
|
||||
|
||||
static inline const struct brw_tracked_state *
|
||||
brw_get_pipeline_atoms(struct brw_context *brw,
|
||||
enum brw_pipeline pipeline)
|
||||
{
|
||||
switch (pipeline) {
|
||||
case BRW_RENDER_PIPELINE:
|
||||
return brw->render_atoms;
|
||||
case BRW_COMPUTE_PIPELINE:
|
||||
return brw->compute_atoms;
|
||||
default:
|
||||
STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
|
||||
unreachable("Unsupported pipeline");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_copy_pipeline_atoms(struct brw_context *brw,
|
||||
enum brw_pipeline pipeline,
|
||||
const struct brw_tracked_state **atoms,
|
||||
int num_atoms)
|
||||
{
|
||||
/* This is to work around brw_context::atoms being declared const. We want
|
||||
* it to be const, but it needs to be initialized somehow!
|
||||
*/
|
||||
struct brw_tracked_state *context_atoms =
|
||||
(struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
|
||||
|
||||
for (int i = 0; i < num_atoms; i++) {
|
||||
context_atoms[i] = *atoms[i];
|
||||
assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
|
||||
assert(context_atoms[i].emit);
|
||||
}
|
||||
|
||||
brw->num_atoms[pipeline] = num_atoms;
|
||||
}
|
||||
|
||||
void brw_init_state( struct brw_context *brw )
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
/* Force the first brw_select_pipeline to emit pipeline select */
|
||||
brw->last_pipeline = BRW_NUM_PIPELINES;
|
||||
|
||||
brw_init_caches(brw);
|
||||
|
||||
if (devinfo->ver >= 11)
|
||||
gfx11_init_atoms(brw);
|
||||
else if (devinfo->ver >= 10)
|
||||
unreachable("Gfx10 support dropped.");
|
||||
else if (devinfo->ver >= 9)
|
||||
gfx9_init_atoms(brw);
|
||||
else if (devinfo->ver >= 8)
|
||||
gfx8_init_atoms(brw);
|
||||
else if (devinfo->verx10 >= 75)
|
||||
gfx75_init_atoms(brw);
|
||||
else if (devinfo->ver >= 7)
|
||||
gfx7_init_atoms(brw);
|
||||
else if (devinfo->ver >= 6)
|
||||
gfx6_init_atoms(brw);
|
||||
else if (devinfo->ver >= 5)
|
||||
gfx5_init_atoms(brw);
|
||||
else if (devinfo->verx10 >= 45)
|
||||
gfx45_init_atoms(brw);
|
||||
else
|
||||
gfx4_init_atoms(brw);
|
||||
|
||||
brw_upload_initial_gpu_state(brw);
|
||||
|
||||
brw->NewGLState = ~0;
|
||||
brw->ctx.NewDriverState = ~0ull;
|
||||
|
||||
/* ~0 is a nonsensical value which won't match anything we program, so
|
||||
* the programming will take effect on the first time around.
|
||||
*/
|
||||
brw->pma_stall_bits = ~0;
|
||||
|
||||
/* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
|
||||
* dirty flags.
|
||||
*/
|
||||
STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
|
||||
|
||||
ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
|
||||
ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
|
||||
ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
|
||||
ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
|
||||
ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
|
||||
ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
|
||||
ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
|
||||
ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
|
||||
ctx->DriverFlags.NewTessState = BRW_NEW_DEFAULT_TESS_LEVELS;
|
||||
ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
|
||||
}
|
||||
|
||||
|
||||
void brw_destroy_state( struct brw_context *brw )
|
||||
{
|
||||
brw_destroy_caches(brw);
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
*/
|
||||
|
||||
static bool
|
||||
check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
|
||||
{
|
||||
return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
|
||||
}
|
||||
|
||||
static void
|
||||
accumulate_state(struct brw_state_flags *a, const struct brw_state_flags *b)
|
||||
{
|
||||
a->mesa |= b->mesa;
|
||||
a->brw |= b->brw;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
xor_states(struct brw_state_flags *result,
|
||||
const struct brw_state_flags *a,
|
||||
const struct brw_state_flags *b)
|
||||
{
|
||||
result->mesa = a->mesa ^ b->mesa;
|
||||
result->brw = a->brw ^ b->brw;
|
||||
}
|
||||
|
||||
struct dirty_bit_map {
|
||||
uint64_t bit;
|
||||
char *name;
|
||||
uint32_t count;
|
||||
};
|
||||
|
||||
#define DEFINE_BIT(name) {name, #name, 0}
|
||||
|
||||
static struct dirty_bit_map mesa_bits[] = {
|
||||
DEFINE_BIT(_NEW_MODELVIEW),
|
||||
DEFINE_BIT(_NEW_PROJECTION),
|
||||
DEFINE_BIT(_NEW_TEXTURE_MATRIX),
|
||||
DEFINE_BIT(_NEW_COLOR),
|
||||
DEFINE_BIT(_NEW_DEPTH),
|
||||
DEFINE_BIT(_NEW_FOG),
|
||||
DEFINE_BIT(_NEW_HINT),
|
||||
DEFINE_BIT(_NEW_LIGHT),
|
||||
DEFINE_BIT(_NEW_LINE),
|
||||
DEFINE_BIT(_NEW_PIXEL),
|
||||
DEFINE_BIT(_NEW_POINT),
|
||||
DEFINE_BIT(_NEW_POLYGON),
|
||||
DEFINE_BIT(_NEW_POLYGONSTIPPLE),
|
||||
DEFINE_BIT(_NEW_SCISSOR),
|
||||
DEFINE_BIT(_NEW_STENCIL),
|
||||
DEFINE_BIT(_NEW_TEXTURE_OBJECT),
|
||||
DEFINE_BIT(_NEW_TRANSFORM),
|
||||
DEFINE_BIT(_NEW_VIEWPORT),
|
||||
DEFINE_BIT(_NEW_TEXTURE_STATE),
|
||||
DEFINE_BIT(_NEW_RENDERMODE),
|
||||
DEFINE_BIT(_NEW_BUFFERS),
|
||||
DEFINE_BIT(_NEW_CURRENT_ATTRIB),
|
||||
DEFINE_BIT(_NEW_MULTISAMPLE),
|
||||
DEFINE_BIT(_NEW_TRACK_MATRIX),
|
||||
DEFINE_BIT(_NEW_PROGRAM),
|
||||
DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
|
||||
DEFINE_BIT(_NEW_FRAG_CLAMP),
|
||||
{0, 0, 0}
|
||||
};
|
||||
|
||||
static struct dirty_bit_map brw_bits[] = {
|
||||
DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
|
||||
DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
|
||||
DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
|
||||
DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
|
||||
DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
|
||||
DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
|
||||
DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
|
||||
DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
|
||||
DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
|
||||
DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
|
||||
DEFINE_BIT(BRW_NEW_URB_FENCE),
|
||||
DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
|
||||
DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
|
||||
DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
|
||||
DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
|
||||
DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
|
||||
DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
|
||||
DEFINE_BIT(BRW_NEW_PRIMITIVE),
|
||||
DEFINE_BIT(BRW_NEW_CONTEXT),
|
||||
DEFINE_BIT(BRW_NEW_PSP),
|
||||
DEFINE_BIT(BRW_NEW_SURFACES),
|
||||
DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
|
||||
DEFINE_BIT(BRW_NEW_INDICES),
|
||||
DEFINE_BIT(BRW_NEW_VERTICES),
|
||||
DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
|
||||
DEFINE_BIT(BRW_NEW_BATCH),
|
||||
DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
|
||||
DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
|
||||
DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
|
||||
DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
|
||||
DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
|
||||
DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
|
||||
DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
|
||||
DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
|
||||
DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
|
||||
DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
|
||||
DEFINE_BIT(BRW_NEW_STATS_WM),
|
||||
DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
|
||||
DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
|
||||
DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
|
||||
DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
|
||||
DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
|
||||
DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
|
||||
DEFINE_BIT(BRW_NEW_GFX4_UNIT_STATE),
|
||||
DEFINE_BIT(BRW_NEW_CC_VP),
|
||||
DEFINE_BIT(BRW_NEW_SF_VP),
|
||||
DEFINE_BIT(BRW_NEW_CLIP_VP),
|
||||
DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
|
||||
DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
|
||||
DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
|
||||
DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
|
||||
DEFINE_BIT(BRW_NEW_URB_SIZE),
|
||||
DEFINE_BIT(BRW_NEW_CC_STATE),
|
||||
DEFINE_BIT(BRW_NEW_BLORP),
|
||||
DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
|
||||
DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
|
||||
DEFINE_BIT(BRW_NEW_DRAW_CALL),
|
||||
DEFINE_BIT(BRW_NEW_AUX_STATE),
|
||||
{0, 0, 0}
|
||||
};
|
||||
|
||||
static void
|
||||
brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
|
||||
{
|
||||
for (int i = 0; bit_map[i].bit != 0; i++) {
|
||||
if (bit_map[i].bit & bits)
|
||||
bit_map[i].count++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
brw_print_dirty_count(struct dirty_bit_map *bit_map)
|
||||
{
|
||||
for (int i = 0; bit_map[i].bit != 0; i++) {
|
||||
if (bit_map[i].count > 1) {
|
||||
fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
|
||||
bit_map[i].bit, bit_map[i].count, bit_map[i].name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_upload_tess_programs(struct brw_context *brw)
|
||||
{
|
||||
if (brw->programs[MESA_SHADER_TESS_EVAL]) {
|
||||
brw_upload_tcs_prog(brw);
|
||||
brw_upload_tes_prog(brw);
|
||||
} else {
|
||||
brw->tcs.base.prog_data = NULL;
|
||||
brw->tes.base.prog_data = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_upload_programs(struct brw_context *brw,
|
||||
enum brw_pipeline pipeline)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
if (pipeline == BRW_RENDER_PIPELINE) {
|
||||
brw_upload_vs_prog(brw);
|
||||
brw_upload_tess_programs(brw);
|
||||
|
||||
if (brw->programs[MESA_SHADER_GEOMETRY]) {
|
||||
brw_upload_gs_prog(brw);
|
||||
} else {
|
||||
brw->gs.base.prog_data = NULL;
|
||||
if (devinfo->ver < 7)
|
||||
brw_upload_ff_gs_prog(brw);
|
||||
}
|
||||
|
||||
/* Update the VUE map for data exiting the GS stage of the pipeline.
|
||||
* This comes from the last enabled shader stage.
|
||||
*/
|
||||
GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
|
||||
bool old_separate = brw->vue_map_geom_out.separate;
|
||||
struct brw_vue_prog_data *vue_prog_data;
|
||||
if (brw->programs[MESA_SHADER_GEOMETRY])
|
||||
vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
|
||||
else if (brw->programs[MESA_SHADER_TESS_EVAL])
|
||||
vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
|
||||
else
|
||||
vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
|
||||
|
||||
brw->vue_map_geom_out = vue_prog_data->vue_map;
|
||||
|
||||
/* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
|
||||
if (old_slots != brw->vue_map_geom_out.slots_valid ||
|
||||
old_separate != brw->vue_map_geom_out.separate)
|
||||
brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
|
||||
|
||||
if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
|
||||
VARYING_BIT_VIEWPORT) {
|
||||
ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
|
||||
brw->clip.viewport_count =
|
||||
(brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
|
||||
ctx->Const.MaxViewports : 1;
|
||||
}
|
||||
|
||||
brw_upload_wm_prog(brw);
|
||||
|
||||
if (devinfo->ver < 6) {
|
||||
brw_upload_clip_prog(brw);
|
||||
brw_upload_sf_prog(brw);
|
||||
}
|
||||
|
||||
brw_disk_cache_write_render_programs(brw);
|
||||
} else if (pipeline == BRW_COMPUTE_PIPELINE) {
|
||||
brw_upload_cs_prog(brw);
|
||||
brw_disk_cache_write_compute_program(brw);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
merge_ctx_state(struct brw_context *brw,
|
||||
struct brw_state_flags *state)
|
||||
{
|
||||
state->mesa |= brw->NewGLState;
|
||||
state->brw |= brw->ctx.NewDriverState;
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE void
|
||||
check_and_emit_atom(struct brw_context *brw,
|
||||
struct brw_state_flags *state,
|
||||
const struct brw_tracked_state *atom)
|
||||
{
|
||||
if (check_state(state, &atom->dirty)) {
|
||||
atom->emit(brw);
|
||||
merge_ctx_state(brw, state);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_upload_pipeline_state(struct brw_context *brw,
|
||||
enum brw_pipeline pipeline)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
int i;
|
||||
static int dirty_count = 0;
|
||||
struct brw_state_flags state = brw->state.pipelines[pipeline];
|
||||
const unsigned fb_samples =
|
||||
MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
|
||||
|
||||
brw_select_pipeline(brw, pipeline);
|
||||
|
||||
if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1)
|
||||
brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_REEMIT)) {
|
||||
/* Always re-emit all state. */
|
||||
brw->NewGLState = ~0;
|
||||
ctx->NewDriverState = ~0ull;
|
||||
}
|
||||
|
||||
if (pipeline == BRW_RENDER_PIPELINE) {
|
||||
if (brw->programs[MESA_SHADER_FRAGMENT] !=
|
||||
ctx->FragmentProgram._Current) {
|
||||
brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
|
||||
brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
|
||||
}
|
||||
|
||||
if (brw->programs[MESA_SHADER_TESS_EVAL] !=
|
||||
ctx->TessEvalProgram._Current) {
|
||||
brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
|
||||
brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
|
||||
}
|
||||
|
||||
if (brw->programs[MESA_SHADER_TESS_CTRL] !=
|
||||
ctx->TessCtrlProgram._Current) {
|
||||
brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
|
||||
brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
|
||||
}
|
||||
|
||||
if (brw->programs[MESA_SHADER_GEOMETRY] !=
|
||||
ctx->GeometryProgram._Current) {
|
||||
brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
|
||||
brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
|
||||
}
|
||||
|
||||
if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
|
||||
brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
|
||||
brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
|
||||
}
|
||||
}
|
||||
|
||||
if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
|
||||
brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
|
||||
brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
|
||||
}
|
||||
|
||||
if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
|
||||
brw->meta_in_progress = _mesa_meta_in_progress(ctx);
|
||||
brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
|
||||
}
|
||||
|
||||
if (brw->num_samples != fb_samples) {
|
||||
brw->num_samples = fb_samples;
|
||||
brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
|
||||
}
|
||||
|
||||
/* Exit early if no state is flagged as dirty */
|
||||
merge_ctx_state(brw, &state);
|
||||
if ((state.mesa | state.brw) == 0)
|
||||
return;
|
||||
|
||||
/* Emit Sandybridge workaround flushes on every primitive, for safety. */
|
||||
if (devinfo->ver == 6)
|
||||
brw_emit_post_sync_nonzero_flush(brw);
|
||||
|
||||
brw_upload_programs(brw, pipeline);
|
||||
merge_ctx_state(brw, &state);
|
||||
|
||||
brw->vtbl.emit_state_base_address(brw);
|
||||
|
||||
const struct brw_tracked_state *atoms =
|
||||
brw_get_pipeline_atoms(brw, pipeline);
|
||||
const int num_atoms = brw->num_atoms[pipeline];
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_ANY)) {
|
||||
/* Debug version which enforces various sanity checks on the
|
||||
* state flags which are generated and checked to help ensure
|
||||
* state atoms are ordered correctly in the list.
|
||||
*/
|
||||
struct brw_state_flags examined, prev;
|
||||
memset(&examined, 0, sizeof(examined));
|
||||
prev = state;
|
||||
|
||||
for (i = 0; i < num_atoms; i++) {
|
||||
const struct brw_tracked_state *atom = &atoms[i];
|
||||
struct brw_state_flags generated;
|
||||
|
||||
check_and_emit_atom(brw, &state, atom);
|
||||
|
||||
accumulate_state(&examined, &atom->dirty);
|
||||
|
||||
/* generated = (prev ^ state)
|
||||
* if (examined & generated)
|
||||
* fail;
|
||||
*/
|
||||
xor_states(&generated, &prev, &state);
|
||||
assert(!check_state(&examined, &generated));
|
||||
prev = state;
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (i = 0; i < num_atoms; i++) {
|
||||
const struct brw_tracked_state *atom = &atoms[i];
|
||||
|
||||
check_and_emit_atom(brw, &state, atom);
|
||||
}
|
||||
}
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_STATE)) {
|
||||
STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
|
||||
|
||||
brw_update_dirty_count(mesa_bits, state.mesa);
|
||||
brw_update_dirty_count(brw_bits, state.brw);
|
||||
if (dirty_count++ % 1000 == 0) {
|
||||
brw_print_dirty_count(mesa_bits);
|
||||
brw_print_dirty_count(brw_bits);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* Emit all state:
|
||||
*/
|
||||
void brw_upload_render_state(struct brw_context *brw)
|
||||
{
|
||||
brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_pipeline_state_finished(struct brw_context *brw,
|
||||
enum brw_pipeline pipeline)
|
||||
{
|
||||
/* Save all dirty state into the other pipelines */
|
||||
for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
|
||||
if (i != pipeline) {
|
||||
brw->state.pipelines[i].mesa |= brw->NewGLState;
|
||||
brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
|
||||
} else {
|
||||
memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
|
||||
}
|
||||
}
|
||||
|
||||
brw->NewGLState = 0;
|
||||
brw->ctx.NewDriverState = 0ull;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear dirty bits to account for the fact that the state emitted by
|
||||
* brw_upload_render_state() has been committed to the hardware. This is a
|
||||
* separate call from brw_upload_render_state() because it's possible that
|
||||
* after the call to brw_upload_render_state(), we will discover that we've
|
||||
* run out of aperture space, and need to rewind the batch buffer to the state
|
||||
* it had before the brw_upload_render_state() call.
|
||||
*/
|
||||
void
|
||||
brw_render_state_finished(struct brw_context *brw)
|
||||
{
|
||||
brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
|
||||
}
|
||||
|
||||
void
|
||||
brw_upload_compute_state(struct brw_context *brw)
|
||||
{
|
||||
brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
|
||||
}
|
||||
|
||||
void
|
||||
brw_compute_state_finished(struct brw_context *brw)
|
||||
{
|
||||
brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
|
||||
}
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
|
||||
#ifndef BRW_STRUCTS_H
|
||||
#define BRW_STRUCTS_H
|
||||
|
||||
struct brw_urb_fence
|
||||
{
|
||||
struct
|
||||
{
|
||||
unsigned length:8;
|
||||
unsigned vs_realloc:1;
|
||||
unsigned gs_realloc:1;
|
||||
unsigned clp_realloc:1;
|
||||
unsigned sf_realloc:1;
|
||||
unsigned vfe_realloc:1;
|
||||
unsigned cs_realloc:1;
|
||||
unsigned pad:2;
|
||||
unsigned opcode:16;
|
||||
} header;
|
||||
|
||||
struct
|
||||
{
|
||||
unsigned vs_fence:10;
|
||||
unsigned gs_fence:10;
|
||||
unsigned clp_fence:10;
|
||||
unsigned pad:2;
|
||||
} bits0;
|
||||
|
||||
struct
|
||||
{
|
||||
unsigned sf_fence:10;
|
||||
unsigned vf_fence:10;
|
||||
unsigned cs_fence:11;
|
||||
unsigned pad:1;
|
||||
} bits1;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
@ -1,558 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2011 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
#include "main/mtypes.h"
|
||||
|
||||
#include "isl/isl.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_defines.h"
|
||||
|
||||
enum isl_format
|
||||
brw_isl_format_for_mesa_format(mesa_format mesa_format)
|
||||
{
|
||||
/* This table is ordered according to the enum ordering in formats.h. We do
|
||||
* expect that enum to be extended without our explicit initialization
|
||||
* staying in sync, so we initialize to 0 even though
|
||||
* ISL_FORMAT_R32G32B32A32_FLOAT happens to also be 0.
|
||||
*/
|
||||
static const enum isl_format table[MESA_FORMAT_COUNT] = {
|
||||
[0 ... MESA_FORMAT_COUNT-1] = ISL_FORMAT_UNSUPPORTED,
|
||||
|
||||
[MESA_FORMAT_R8G8B8A8_UNORM] = ISL_FORMAT_R8G8B8A8_UNORM,
|
||||
[MESA_FORMAT_B8G8R8A8_UNORM] = ISL_FORMAT_B8G8R8A8_UNORM,
|
||||
[MESA_FORMAT_R8G8B8X8_UNORM] = ISL_FORMAT_R8G8B8X8_UNORM,
|
||||
[MESA_FORMAT_B8G8R8X8_UNORM] = ISL_FORMAT_B8G8R8X8_UNORM,
|
||||
[MESA_FORMAT_RGB_UNORM8] = ISL_FORMAT_R8G8B8_UNORM,
|
||||
[MESA_FORMAT_B5G6R5_UNORM] = ISL_FORMAT_B5G6R5_UNORM,
|
||||
[MESA_FORMAT_B4G4R4A4_UNORM] = ISL_FORMAT_B4G4R4A4_UNORM,
|
||||
[MESA_FORMAT_B5G5R5A1_UNORM] = ISL_FORMAT_B5G5R5A1_UNORM,
|
||||
[MESA_FORMAT_LA_UNORM8] = ISL_FORMAT_L8A8_UNORM,
|
||||
[MESA_FORMAT_LA_UNORM16] = ISL_FORMAT_L16A16_UNORM,
|
||||
[MESA_FORMAT_A_UNORM8] = ISL_FORMAT_A8_UNORM,
|
||||
[MESA_FORMAT_A_UNORM16] = ISL_FORMAT_A16_UNORM,
|
||||
[MESA_FORMAT_L_UNORM8] = ISL_FORMAT_L8_UNORM,
|
||||
[MESA_FORMAT_L_UNORM16] = ISL_FORMAT_L16_UNORM,
|
||||
[MESA_FORMAT_I_UNORM8] = ISL_FORMAT_I8_UNORM,
|
||||
[MESA_FORMAT_I_UNORM16] = ISL_FORMAT_I16_UNORM,
|
||||
[MESA_FORMAT_YCBCR_REV] = ISL_FORMAT_YCRCB_NORMAL,
|
||||
[MESA_FORMAT_YCBCR] = ISL_FORMAT_YCRCB_SWAPUVY,
|
||||
[MESA_FORMAT_R_UNORM8] = ISL_FORMAT_R8_UNORM,
|
||||
[MESA_FORMAT_RG_UNORM8] = ISL_FORMAT_R8G8_UNORM,
|
||||
[MESA_FORMAT_R_UNORM16] = ISL_FORMAT_R16_UNORM,
|
||||
[MESA_FORMAT_RG_UNORM16] = ISL_FORMAT_R16G16_UNORM,
|
||||
[MESA_FORMAT_B10G10R10A2_UNORM] = ISL_FORMAT_B10G10R10A2_UNORM,
|
||||
[MESA_FORMAT_S_UINT8] = ISL_FORMAT_R8_UINT,
|
||||
|
||||
[MESA_FORMAT_B8G8R8A8_SRGB] = ISL_FORMAT_B8G8R8A8_UNORM_SRGB,
|
||||
[MESA_FORMAT_R8G8B8A8_SRGB] = ISL_FORMAT_R8G8B8A8_UNORM_SRGB,
|
||||
[MESA_FORMAT_B8G8R8X8_SRGB] = ISL_FORMAT_B8G8R8X8_UNORM_SRGB,
|
||||
[MESA_FORMAT_R_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB,
|
||||
[MESA_FORMAT_L_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB,
|
||||
[MESA_FORMAT_LA_SRGB8] = ISL_FORMAT_L8A8_UNORM_SRGB,
|
||||
[MESA_FORMAT_SRGB_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB,
|
||||
[MESA_FORMAT_SRGBA_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB,
|
||||
[MESA_FORMAT_SRGBA_DXT3] = ISL_FORMAT_BC2_UNORM_SRGB,
|
||||
[MESA_FORMAT_SRGBA_DXT5] = ISL_FORMAT_BC3_UNORM_SRGB,
|
||||
|
||||
[MESA_FORMAT_RGB_FXT1] = ISL_FORMAT_FXT1,
|
||||
[MESA_FORMAT_RGBA_FXT1] = ISL_FORMAT_FXT1,
|
||||
[MESA_FORMAT_RGB_DXT1] = ISL_FORMAT_BC1_UNORM,
|
||||
[MESA_FORMAT_RGBA_DXT1] = ISL_FORMAT_BC1_UNORM,
|
||||
[MESA_FORMAT_RGBA_DXT3] = ISL_FORMAT_BC2_UNORM,
|
||||
[MESA_FORMAT_RGBA_DXT5] = ISL_FORMAT_BC3_UNORM,
|
||||
|
||||
[MESA_FORMAT_RGBA_FLOAT32] = ISL_FORMAT_R32G32B32A32_FLOAT,
|
||||
[MESA_FORMAT_RGBA_FLOAT16] = ISL_FORMAT_R16G16B16A16_FLOAT,
|
||||
[MESA_FORMAT_RGB_FLOAT32] = ISL_FORMAT_R32G32B32_FLOAT,
|
||||
[MESA_FORMAT_A_FLOAT32] = ISL_FORMAT_A32_FLOAT,
|
||||
[MESA_FORMAT_A_FLOAT16] = ISL_FORMAT_A16_FLOAT,
|
||||
[MESA_FORMAT_L_FLOAT32] = ISL_FORMAT_L32_FLOAT,
|
||||
[MESA_FORMAT_L_FLOAT16] = ISL_FORMAT_L16_FLOAT,
|
||||
[MESA_FORMAT_LA_FLOAT32] = ISL_FORMAT_L32A32_FLOAT,
|
||||
[MESA_FORMAT_LA_FLOAT16] = ISL_FORMAT_L16A16_FLOAT,
|
||||
[MESA_FORMAT_I_FLOAT32] = ISL_FORMAT_I32_FLOAT,
|
||||
[MESA_FORMAT_I_FLOAT16] = ISL_FORMAT_I16_FLOAT,
|
||||
[MESA_FORMAT_R_FLOAT32] = ISL_FORMAT_R32_FLOAT,
|
||||
[MESA_FORMAT_R_FLOAT16] = ISL_FORMAT_R16_FLOAT,
|
||||
[MESA_FORMAT_RG_FLOAT32] = ISL_FORMAT_R32G32_FLOAT,
|
||||
[MESA_FORMAT_RG_FLOAT16] = ISL_FORMAT_R16G16_FLOAT,
|
||||
|
||||
[MESA_FORMAT_R_SINT8] = ISL_FORMAT_R8_SINT,
|
||||
[MESA_FORMAT_RG_SINT8] = ISL_FORMAT_R8G8_SINT,
|
||||
[MESA_FORMAT_RGB_SINT8] = ISL_FORMAT_R8G8B8_SINT,
|
||||
[MESA_FORMAT_RGBA_SINT8] = ISL_FORMAT_R8G8B8A8_SINT,
|
||||
[MESA_FORMAT_R_SINT16] = ISL_FORMAT_R16_SINT,
|
||||
[MESA_FORMAT_RG_SINT16] = ISL_FORMAT_R16G16_SINT,
|
||||
[MESA_FORMAT_RGB_SINT16] = ISL_FORMAT_R16G16B16_SINT,
|
||||
[MESA_FORMAT_RGBA_SINT16] = ISL_FORMAT_R16G16B16A16_SINT,
|
||||
[MESA_FORMAT_R_SINT32] = ISL_FORMAT_R32_SINT,
|
||||
[MESA_FORMAT_RG_SINT32] = ISL_FORMAT_R32G32_SINT,
|
||||
[MESA_FORMAT_RGB_SINT32] = ISL_FORMAT_R32G32B32_SINT,
|
||||
[MESA_FORMAT_RGBA_SINT32] = ISL_FORMAT_R32G32B32A32_SINT,
|
||||
|
||||
[MESA_FORMAT_R_UINT8] = ISL_FORMAT_R8_UINT,
|
||||
[MESA_FORMAT_RG_UINT8] = ISL_FORMAT_R8G8_UINT,
|
||||
[MESA_FORMAT_RGB_UINT8] = ISL_FORMAT_R8G8B8_UINT,
|
||||
[MESA_FORMAT_RGBA_UINT8] = ISL_FORMAT_R8G8B8A8_UINT,
|
||||
[MESA_FORMAT_R_UINT16] = ISL_FORMAT_R16_UINT,
|
||||
[MESA_FORMAT_RG_UINT16] = ISL_FORMAT_R16G16_UINT,
|
||||
[MESA_FORMAT_RGB_UINT16] = ISL_FORMAT_R16G16B16_UINT,
|
||||
[MESA_FORMAT_RGBA_UINT16] = ISL_FORMAT_R16G16B16A16_UINT,
|
||||
[MESA_FORMAT_R_UINT32] = ISL_FORMAT_R32_UINT,
|
||||
[MESA_FORMAT_RG_UINT32] = ISL_FORMAT_R32G32_UINT,
|
||||
[MESA_FORMAT_RGB_UINT32] = ISL_FORMAT_R32G32B32_UINT,
|
||||
[MESA_FORMAT_RGBA_UINT32] = ISL_FORMAT_R32G32B32A32_UINT,
|
||||
|
||||
[MESA_FORMAT_R_SNORM8] = ISL_FORMAT_R8_SNORM,
|
||||
[MESA_FORMAT_RG_SNORM8] = ISL_FORMAT_R8G8_SNORM,
|
||||
[MESA_FORMAT_R8G8B8A8_SNORM] = ISL_FORMAT_R8G8B8A8_SNORM,
|
||||
[MESA_FORMAT_R_SNORM16] = ISL_FORMAT_R16_SNORM,
|
||||
[MESA_FORMAT_RG_SNORM16] = ISL_FORMAT_R16G16_SNORM,
|
||||
[MESA_FORMAT_RGB_SNORM16] = ISL_FORMAT_R16G16B16_SNORM,
|
||||
[MESA_FORMAT_RGBA_SNORM16] = ISL_FORMAT_R16G16B16A16_SNORM,
|
||||
[MESA_FORMAT_RGBA_UNORM16] = ISL_FORMAT_R16G16B16A16_UNORM,
|
||||
|
||||
[MESA_FORMAT_R_RGTC1_UNORM] = ISL_FORMAT_BC4_UNORM,
|
||||
[MESA_FORMAT_R_RGTC1_SNORM] = ISL_FORMAT_BC4_SNORM,
|
||||
[MESA_FORMAT_RG_RGTC2_UNORM] = ISL_FORMAT_BC5_UNORM,
|
||||
[MESA_FORMAT_RG_RGTC2_SNORM] = ISL_FORMAT_BC5_SNORM,
|
||||
|
||||
[MESA_FORMAT_ETC1_RGB8] = ISL_FORMAT_ETC1_RGB8,
|
||||
[MESA_FORMAT_ETC2_RGB8] = ISL_FORMAT_ETC2_RGB8,
|
||||
[MESA_FORMAT_ETC2_SRGB8] = ISL_FORMAT_ETC2_SRGB8,
|
||||
[MESA_FORMAT_ETC2_RGBA8_EAC] = ISL_FORMAT_ETC2_EAC_RGBA8,
|
||||
[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = ISL_FORMAT_ETC2_EAC_SRGB8_A8,
|
||||
[MESA_FORMAT_ETC2_R11_EAC] = ISL_FORMAT_EAC_R11,
|
||||
[MESA_FORMAT_ETC2_RG11_EAC] = ISL_FORMAT_EAC_RG11,
|
||||
[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = ISL_FORMAT_EAC_SIGNED_R11,
|
||||
[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = ISL_FORMAT_EAC_SIGNED_RG11,
|
||||
[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = ISL_FORMAT_ETC2_RGB8_PTA,
|
||||
[MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = ISL_FORMAT_ETC2_SRGB8_PTA,
|
||||
|
||||
[MESA_FORMAT_BPTC_RGBA_UNORM] = ISL_FORMAT_BC7_UNORM,
|
||||
[MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM] = ISL_FORMAT_BC7_UNORM_SRGB,
|
||||
[MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT] = ISL_FORMAT_BC6H_SF16,
|
||||
[MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT] = ISL_FORMAT_BC6H_UF16,
|
||||
|
||||
[MESA_FORMAT_RGBA_ASTC_4x4] = ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_5x4] = ISL_FORMAT_ASTC_LDR_2D_5X4_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_5x5] = ISL_FORMAT_ASTC_LDR_2D_5X5_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_6x5] = ISL_FORMAT_ASTC_LDR_2D_6X5_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_6x6] = ISL_FORMAT_ASTC_LDR_2D_6X6_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_8x5] = ISL_FORMAT_ASTC_LDR_2D_8X5_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_8x6] = ISL_FORMAT_ASTC_LDR_2D_8X6_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_8x8] = ISL_FORMAT_ASTC_LDR_2D_8X8_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_10x5] = ISL_FORMAT_ASTC_LDR_2D_10X5_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_10x6] = ISL_FORMAT_ASTC_LDR_2D_10X6_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_10x8] = ISL_FORMAT_ASTC_LDR_2D_10X8_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_10x10] = ISL_FORMAT_ASTC_LDR_2D_10X10_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_12x10] = ISL_FORMAT_ASTC_LDR_2D_12X10_FLT16,
|
||||
[MESA_FORMAT_RGBA_ASTC_12x12] = ISL_FORMAT_ASTC_LDR_2D_12X12_FLT16,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4] = ISL_FORMAT_ASTC_LDR_2D_4X4_U8SRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4] = ISL_FORMAT_ASTC_LDR_2D_5X4_U8SRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5] = ISL_FORMAT_ASTC_LDR_2D_5X5_U8SRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5] = ISL_FORMAT_ASTC_LDR_2D_6X5_U8SRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6] = ISL_FORMAT_ASTC_LDR_2D_6X6_U8SRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5] = ISL_FORMAT_ASTC_LDR_2D_8X5_U8SRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6] = ISL_FORMAT_ASTC_LDR_2D_8X6_U8SRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8] = ISL_FORMAT_ASTC_LDR_2D_8X8_U8SRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5] = ISL_FORMAT_ASTC_LDR_2D_10X5_U8SRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6] = ISL_FORMAT_ASTC_LDR_2D_10X6_U8SRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8] = ISL_FORMAT_ASTC_LDR_2D_10X8_U8SRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10] = ISL_FORMAT_ASTC_LDR_2D_10X10_U8SRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10] = ISL_FORMAT_ASTC_LDR_2D_12X10_U8SRGB,
|
||||
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12] = ISL_FORMAT_ASTC_LDR_2D_12X12_U8SRGB,
|
||||
|
||||
[MESA_FORMAT_R9G9B9E5_FLOAT] = ISL_FORMAT_R9G9B9E5_SHAREDEXP,
|
||||
[MESA_FORMAT_R11G11B10_FLOAT] = ISL_FORMAT_R11G11B10_FLOAT,
|
||||
|
||||
[MESA_FORMAT_R10G10B10A2_UNORM] = ISL_FORMAT_R10G10B10A2_UNORM,
|
||||
[MESA_FORMAT_B10G10R10A2_UINT] = ISL_FORMAT_B10G10R10A2_UINT,
|
||||
[MESA_FORMAT_R10G10B10A2_UINT] = ISL_FORMAT_R10G10B10A2_UINT,
|
||||
|
||||
[MESA_FORMAT_B5G5R5X1_UNORM] = ISL_FORMAT_B5G5R5X1_UNORM,
|
||||
[MESA_FORMAT_R8G8B8X8_SRGB] = ISL_FORMAT_R8G8B8X8_UNORM_SRGB,
|
||||
[MESA_FORMAT_B10G10R10X2_UNORM] = ISL_FORMAT_B10G10R10X2_UNORM,
|
||||
[MESA_FORMAT_RGBX_UNORM16] = ISL_FORMAT_R16G16B16X16_UNORM,
|
||||
[MESA_FORMAT_RGBX_FLOAT16] = ISL_FORMAT_R16G16B16X16_FLOAT,
|
||||
[MESA_FORMAT_RGBX_FLOAT32] = ISL_FORMAT_R32G32B32X32_FLOAT,
|
||||
};
|
||||
|
||||
assert(mesa_format < MESA_FORMAT_COUNT);
|
||||
return table[mesa_format];
|
||||
}
|
||||
|
||||
void
|
||||
brw_screen_init_surface_formats(struct brw_screen *screen)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
mesa_format format;
|
||||
|
||||
memset(&screen->mesa_format_supports_texture, 0,
|
||||
sizeof(screen->mesa_format_supports_texture));
|
||||
|
||||
for (format = MESA_FORMAT_NONE + 1; format < MESA_FORMAT_COUNT; format++) {
|
||||
if (!_mesa_get_format_name(format))
|
||||
continue;
|
||||
enum isl_format texture, render;
|
||||
bool is_integer = _mesa_is_format_integer_color(format);
|
||||
|
||||
render = texture = brw_isl_format_for_mesa_format(format);
|
||||
|
||||
/* Only exposed with EXT_memory_object_* support which
|
||||
* is not for older gens.
|
||||
*/
|
||||
if (devinfo->ver < 7 && format == MESA_FORMAT_Z_UNORM16)
|
||||
continue;
|
||||
|
||||
if (texture == ISL_FORMAT_UNSUPPORTED)
|
||||
continue;
|
||||
|
||||
/* Don't advertise 8 and 16-bit RGB formats to core mesa. This ensures
|
||||
* that they are renderable from an API perspective since core mesa will
|
||||
* fall back to RGBA or RGBX (we can't render to non-power-of-two
|
||||
* formats). For 8-bit, formats, this also keeps us from hitting some
|
||||
* nasty corners in brw_miptree_map_blit if you ever try to map one.
|
||||
*/
|
||||
int format_size = _mesa_get_format_bytes(format);
|
||||
if (format_size == 3 || format_size == 6)
|
||||
continue;
|
||||
|
||||
if (isl_format_supports_sampling(devinfo, texture) &&
|
||||
(isl_format_supports_filtering(devinfo, texture) || is_integer))
|
||||
screen->mesa_format_supports_texture[format] = true;
|
||||
|
||||
/* Re-map some render target formats to make them supported when they
|
||||
* wouldn't be using their format for texturing.
|
||||
*/
|
||||
switch (render) {
|
||||
/* For these formats, we just need to read/write the first
|
||||
* channel into R, which is to say that we just treat them as
|
||||
* GL_RED.
|
||||
*/
|
||||
case ISL_FORMAT_I32_FLOAT:
|
||||
case ISL_FORMAT_L32_FLOAT:
|
||||
render = ISL_FORMAT_R32_FLOAT;
|
||||
break;
|
||||
case ISL_FORMAT_I16_FLOAT:
|
||||
case ISL_FORMAT_L16_FLOAT:
|
||||
render = ISL_FORMAT_R16_FLOAT;
|
||||
break;
|
||||
case ISL_FORMAT_I8_UNORM:
|
||||
case ISL_FORMAT_L8_UNORM:
|
||||
render = ISL_FORMAT_R8_UNORM;
|
||||
break;
|
||||
case ISL_FORMAT_I16_UNORM:
|
||||
case ISL_FORMAT_L16_UNORM:
|
||||
render = ISL_FORMAT_R16_UNORM;
|
||||
break;
|
||||
case ISL_FORMAT_R16G16B16X16_UNORM:
|
||||
render = ISL_FORMAT_R16G16B16A16_UNORM;
|
||||
break;
|
||||
case ISL_FORMAT_R16G16B16X16_FLOAT:
|
||||
render = ISL_FORMAT_R16G16B16A16_FLOAT;
|
||||
break;
|
||||
case ISL_FORMAT_B8G8R8X8_UNORM:
|
||||
/* XRGB is handled as ARGB because the chips in this family
|
||||
* cannot render to XRGB targets. This means that we have to
|
||||
* mask writes to alpha (ala glColorMask) and reconfigure the
|
||||
* alpha blending hardware to use GL_ONE (or GL_ZERO) for
|
||||
* cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is
|
||||
* used. On Gfx8+ BGRX is actually allowed (but not RGBX).
|
||||
*/
|
||||
if (!isl_format_supports_rendering(devinfo, texture))
|
||||
render = ISL_FORMAT_B8G8R8A8_UNORM;
|
||||
break;
|
||||
case ISL_FORMAT_B8G8R8X8_UNORM_SRGB:
|
||||
if (!isl_format_supports_rendering(devinfo, texture))
|
||||
render = ISL_FORMAT_B8G8R8A8_UNORM_SRGB;
|
||||
break;
|
||||
case ISL_FORMAT_R8G8B8X8_UNORM:
|
||||
render = ISL_FORMAT_R8G8B8A8_UNORM;
|
||||
break;
|
||||
case ISL_FORMAT_R8G8B8X8_UNORM_SRGB:
|
||||
render = ISL_FORMAT_R8G8B8A8_UNORM_SRGB;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Note that GL_EXT_texture_integer says that blending doesn't occur for
|
||||
* integer, so we don't need hardware support for blending on it. Other
|
||||
* than that, GL in general requires alpha blending for render targets,
|
||||
* even though we don't support it for some formats.
|
||||
*/
|
||||
if (isl_format_supports_rendering(devinfo, render) &&
|
||||
(isl_format_supports_alpha_blending(devinfo, render) || is_integer)) {
|
||||
screen->mesa_to_isl_render_format[format] = render;
|
||||
screen->mesa_format_supports_render[format] = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* We will check this table for FBO completeness, but the surface format
|
||||
* table above only covered color rendering.
|
||||
*/
|
||||
screen->mesa_format_supports_render[MESA_FORMAT_Z24_UNORM_S8_UINT] = true;
|
||||
screen->mesa_format_supports_render[MESA_FORMAT_Z24_UNORM_X8_UINT] = true;
|
||||
screen->mesa_format_supports_render[MESA_FORMAT_S_UINT8] = true;
|
||||
screen->mesa_format_supports_render[MESA_FORMAT_Z_FLOAT32] = true;
|
||||
screen->mesa_format_supports_render[MESA_FORMAT_Z32_FLOAT_S8X24_UINT] = true;
|
||||
if (devinfo->ver >= 8)
|
||||
screen->mesa_format_supports_render[MESA_FORMAT_Z_UNORM16] = true;
|
||||
|
||||
/* We remap depth formats to a supported texturing format in
|
||||
* translate_tex_format().
|
||||
*/
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_Z24_UNORM_S8_UINT] = true;
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_Z24_UNORM_X8_UINT] = true;
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_Z_FLOAT32] = true;
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_Z32_FLOAT_S8X24_UINT] = true;
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_S_UINT8] = true;
|
||||
|
||||
/* Benchmarking shows that Z16 is slower than Z24, so there's no reason to
|
||||
* use it unless you're under memory (not memory bandwidth) pressure.
|
||||
*
|
||||
* Apparently, the GPU's depth scoreboarding works on a 32-bit granularity,
|
||||
* which corresponds to one pixel in the depth buffer for Z24 or Z32 formats.
|
||||
* However, it corresponds to two pixels with Z16, which means both need to
|
||||
* hit the early depth case in order for it to happen.
|
||||
*
|
||||
* Other speculation is that we may be hitting increased fragment shader
|
||||
* execution from GL_LEQUAL/GL_EQUAL depth tests at reduced precision.
|
||||
*
|
||||
* With the PMA stall workaround in place, Z16 is faster than Z24, as it
|
||||
* should be.
|
||||
*/
|
||||
if (devinfo->ver >= 8)
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_Z_UNORM16] = true;
|
||||
|
||||
/* The RGBX formats are not renderable. Normally these get mapped
|
||||
* internally to RGBA formats when rendering. However on Gfx9+ when this
|
||||
* internal override is used fast clears don't work so they are disabled in
|
||||
* brw_meta_fast_clear. To avoid this problem we can just pretend not to
|
||||
* support RGBX formats at all. This will cause the upper layers of Mesa to
|
||||
* pick the RGBA formats instead. This works fine because when it is used
|
||||
* as a texture source the swizzle state is programmed to force the alpha
|
||||
* channel to 1.0 anyway. We could also do this for all gens except that
|
||||
* it's a bit more difficult when the hardware doesn't support texture
|
||||
* swizzling. Gens using the blorp have further problems because that
|
||||
* doesn't implement this swizzle override. We don't need to do this for
|
||||
* BGRX because that actually is supported natively on Gfx8+.
|
||||
*/
|
||||
if (devinfo->ver >= 9) {
|
||||
static const mesa_format rgbx_formats[] = {
|
||||
MESA_FORMAT_R8G8B8X8_UNORM,
|
||||
MESA_FORMAT_R8G8B8X8_SRGB,
|
||||
MESA_FORMAT_RGBX_UNORM16,
|
||||
MESA_FORMAT_RGBX_FLOAT16,
|
||||
MESA_FORMAT_RGBX_FLOAT32
|
||||
};
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(rgbx_formats); i++) {
|
||||
screen->mesa_format_supports_texture[rgbx_formats[i]] = false;
|
||||
screen->mesa_format_supports_render[rgbx_formats[i]] = false;
|
||||
}
|
||||
}
|
||||
|
||||
/* On hardware that lacks support for ETC1, we map ETC1 to RGBX
|
||||
* during glCompressedTexImage2D(). See brw_mipmap_tree::wraps_etc1.
|
||||
*/
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_ETC1_RGB8] = true;
|
||||
|
||||
/* On hardware that lacks support for ETC2, we map ETC2 to a suitable
|
||||
* MESA_FORMAT during glCompressedTexImage2D().
|
||||
* See brw_mipmap_tree::wraps_etc2.
|
||||
*/
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGB8] = true;
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8] = true;
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGBA8_EAC] = true;
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = true;
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_R11_EAC] = true;
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RG11_EAC] = true;
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = true;
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = true;
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = true;
|
||||
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = true;
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_surface_formats(struct brw_context *brw)
|
||||
{
|
||||
struct brw_screen *screen = brw->screen;
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
|
||||
brw->mesa_format_supports_render = screen->mesa_format_supports_render;
|
||||
brw->mesa_to_isl_render_format = screen->mesa_to_isl_render_format;
|
||||
|
||||
STATIC_ASSERT(ARRAY_SIZE(ctx->TextureFormatSupported) ==
|
||||
ARRAY_SIZE(screen->mesa_format_supports_texture));
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(ctx->TextureFormatSupported); ++i) {
|
||||
ctx->TextureFormatSupported[i] = screen->mesa_format_supports_texture[i];
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
brw_render_target_supported(struct brw_context *brw,
|
||||
struct gl_renderbuffer *rb)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
mesa_format format = rb->Format;
|
||||
|
||||
/* Many integer formats are promoted to RGBA (like XRGB8888 is), which means
|
||||
* we would consider them renderable even though we don't have surface
|
||||
* support for their alpha behavior and don't have the blending unit
|
||||
* available to fake it like we do for XRGB8888. Force them to being
|
||||
* unsupported.
|
||||
*/
|
||||
if (_mesa_is_format_integer_color(format) &&
|
||||
rb->_BaseFormat != GL_RGBA &&
|
||||
rb->_BaseFormat != GL_RG &&
|
||||
rb->_BaseFormat != GL_RED)
|
||||
return false;
|
||||
|
||||
/* Under some conditions, MSAA is not supported for formats whose width is
|
||||
* more than 64 bits.
|
||||
*/
|
||||
if (devinfo->ver < 8 &&
|
||||
rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) {
|
||||
/* Gfx6: MSAA on >64 bit formats is unsupported. */
|
||||
if (devinfo->ver <= 6)
|
||||
return false;
|
||||
|
||||
/* Gfx7: 8x MSAA on >64 bit formats is unsupported. */
|
||||
if (rb->NumSamples >= 8)
|
||||
return false;
|
||||
}
|
||||
|
||||
return brw->mesa_format_supports_render[format];
|
||||
}
|
||||
|
||||
enum isl_format
|
||||
translate_tex_format(struct brw_context *brw,
|
||||
mesa_format mesa_format,
|
||||
GLenum srgb_decode)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
if (srgb_decode == GL_SKIP_DECODE_EXT)
|
||||
mesa_format = _mesa_get_srgb_format_linear(mesa_format);
|
||||
|
||||
switch( mesa_format ) {
|
||||
|
||||
case MESA_FORMAT_Z_UNORM16:
|
||||
return ISL_FORMAT_R16_UNORM;
|
||||
|
||||
case MESA_FORMAT_Z24_UNORM_S8_UINT:
|
||||
case MESA_FORMAT_Z24_UNORM_X8_UINT:
|
||||
return ISL_FORMAT_R24_UNORM_X8_TYPELESS;
|
||||
|
||||
case MESA_FORMAT_Z_FLOAT32:
|
||||
return ISL_FORMAT_R32_FLOAT;
|
||||
|
||||
case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
|
||||
return ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS;
|
||||
|
||||
case MESA_FORMAT_RGBA_FLOAT32:
|
||||
/* The value of this ISL surface format is 0, which tricks the
|
||||
* assertion below.
|
||||
*/
|
||||
return ISL_FORMAT_R32G32B32A32_FLOAT;
|
||||
|
||||
case MESA_FORMAT_RGBA_ASTC_4x4:
|
||||
case MESA_FORMAT_RGBA_ASTC_5x4:
|
||||
case MESA_FORMAT_RGBA_ASTC_5x5:
|
||||
case MESA_FORMAT_RGBA_ASTC_6x5:
|
||||
case MESA_FORMAT_RGBA_ASTC_6x6:
|
||||
case MESA_FORMAT_RGBA_ASTC_8x5:
|
||||
case MESA_FORMAT_RGBA_ASTC_8x6:
|
||||
case MESA_FORMAT_RGBA_ASTC_8x8:
|
||||
case MESA_FORMAT_RGBA_ASTC_10x5:
|
||||
case MESA_FORMAT_RGBA_ASTC_10x6:
|
||||
case MESA_FORMAT_RGBA_ASTC_10x8:
|
||||
case MESA_FORMAT_RGBA_ASTC_10x10:
|
||||
case MESA_FORMAT_RGBA_ASTC_12x10:
|
||||
case MESA_FORMAT_RGBA_ASTC_12x12: {
|
||||
enum isl_format isl_fmt =
|
||||
brw_isl_format_for_mesa_format(mesa_format);
|
||||
|
||||
/**
|
||||
* It is possible to process these formats using the LDR Profile
|
||||
* or the Full Profile mode of the hardware. Because, it isn't
|
||||
* possible to determine if an HDR or LDR texture is being rendered, we
|
||||
* can't determine which mode to enable in the hardware. Therefore, to
|
||||
* handle all cases, always default to Full profile unless we are
|
||||
* processing sRGBs, which are incompatible with this mode.
|
||||
*/
|
||||
if (ctx->Extensions.KHR_texture_compression_astc_hdr)
|
||||
isl_fmt |= GFX9_SURFACE_ASTC_HDR_FORMAT_BIT;
|
||||
|
||||
return isl_fmt;
|
||||
}
|
||||
|
||||
default:
|
||||
return brw_isl_format_for_mesa_format(mesa_format);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a MESA_FORMAT to the corresponding BRW_DEPTHFORMAT enum.
|
||||
*/
|
||||
uint32_t
|
||||
brw_depth_format(struct brw_context *brw, mesa_format format)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
switch (format) {
|
||||
case MESA_FORMAT_Z_UNORM16:
|
||||
return BRW_DEPTHFORMAT_D16_UNORM;
|
||||
case MESA_FORMAT_Z_FLOAT32:
|
||||
return BRW_DEPTHFORMAT_D32_FLOAT;
|
||||
case MESA_FORMAT_Z24_UNORM_X8_UINT:
|
||||
if (devinfo->ver >= 6) {
|
||||
return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
|
||||
} else {
|
||||
/* Use D24_UNORM_S8, not D24_UNORM_X8.
|
||||
*
|
||||
* D24_UNORM_X8 was not introduced until Gfx5. (See the Ironlake PRM,
|
||||
* Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
|
||||
* 3DSTATE_DEPTH_BUFFER.Surface_Format).
|
||||
*
|
||||
* However, on Gfx5, D24_UNORM_X8 may be used only if separate
|
||||
* stencil is enabled, and we never enable it. From the Ironlake PRM,
|
||||
* same section as above, 3DSTATE_DEPTH_BUFFER's
|
||||
* "Separate Stencil Buffer Enable" bit:
|
||||
*
|
||||
* "If this field is disabled, the Surface Format of the depth
|
||||
* buffer cannot be D24_UNORM_X8_UINT."
|
||||
*/
|
||||
return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
|
||||
}
|
||||
case MESA_FORMAT_Z24_UNORM_S8_UINT:
|
||||
return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
|
||||
case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
|
||||
return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
|
||||
default:
|
||||
unreachable("Unexpected depth format.");
|
||||
}
|
||||
}
|
||||
|
|
@ -1,642 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2008 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Eric Anholt <eric@anholt.net>
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Support for GL_ARB_sync and EGL_KHR_fence_sync.
|
||||
*
|
||||
* GL_ARB_sync is implemented by flushing the current batchbuffer and keeping a
|
||||
* reference on it. We can then check for completion or wait for completion
|
||||
* using the normal buffer object mechanisms. This does mean that if an
|
||||
* application is using many sync objects, it will emit small batchbuffers
|
||||
* which may end up being a significant overhead. In other tests of removing
|
||||
* gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant
|
||||
* performance bottleneck, though.
|
||||
*/
|
||||
|
||||
#include <libsync.h> /* Requires Android or libdrm-2.4.72 */
|
||||
|
||||
#include "util/os_file.h"
|
||||
#include "util/u_memory.h"
|
||||
#include <xf86drm.h>
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_batch.h"
|
||||
#include "mesa/main/externalobjects.h"
|
||||
|
||||
struct brw_fence {
|
||||
struct brw_context *brw;
|
||||
|
||||
enum brw_fence_type {
|
||||
/** The fence waits for completion of brw_fence::batch_bo. */
|
||||
BRW_FENCE_TYPE_BO_WAIT,
|
||||
|
||||
/** The fence waits for brw_fence::sync_fd to signal. */
|
||||
BRW_FENCE_TYPE_SYNC_FD,
|
||||
} type;
|
||||
|
||||
union {
|
||||
struct brw_bo *batch_bo;
|
||||
|
||||
/* This struct owns the fd. */
|
||||
int sync_fd;
|
||||
};
|
||||
|
||||
mtx_t mutex;
|
||||
bool signalled;
|
||||
};
|
||||
|
||||
struct brw_gl_sync {
|
||||
struct gl_sync_object gl;
|
||||
struct brw_fence fence;
|
||||
};
|
||||
|
||||
struct intel_semaphore_object {
|
||||
struct gl_semaphore_object Base;
|
||||
struct drm_syncobj_handle *syncobj;
|
||||
};
|
||||
|
||||
static inline struct intel_semaphore_object *
|
||||
intel_semaphore_object(struct gl_semaphore_object *sem_obj) {
|
||||
return (struct intel_semaphore_object*) sem_obj;
|
||||
}
|
||||
|
||||
static struct gl_semaphore_object *
|
||||
intel_semaphoreobj_alloc(struct gl_context *ctx, GLuint name)
|
||||
{
|
||||
struct intel_semaphore_object *is_obj = CALLOC_STRUCT(intel_semaphore_object);
|
||||
if (!is_obj)
|
||||
return NULL;
|
||||
|
||||
_mesa_initialize_semaphore_object(ctx, &is_obj->Base, name);
|
||||
return &is_obj->Base;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_semaphoreobj_free(struct gl_context *ctx,
|
||||
struct gl_semaphore_object *semObj)
|
||||
{
|
||||
_mesa_delete_semaphore_object(ctx, semObj);
|
||||
}
|
||||
|
||||
static void
|
||||
intel_semaphoreobj_import(struct gl_context *ctx,
|
||||
struct gl_semaphore_object *semObj,
|
||||
int fd)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_screen *screen = brw->screen;
|
||||
struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
|
||||
iSemObj->syncobj = CALLOC_STRUCT(drm_syncobj_handle);
|
||||
iSemObj->syncobj->fd = fd;
|
||||
|
||||
if (drmIoctl(screen->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, iSemObj->syncobj) < 0) {
|
||||
fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE failed: %s\n",
|
||||
strerror(errno));
|
||||
free(iSemObj->syncobj);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
intel_semaphoreobj_signal(struct gl_context *ctx,
|
||||
struct gl_semaphore_object *semObj,
|
||||
GLuint numBufferBarriers,
|
||||
struct gl_buffer_object **bufObjs,
|
||||
GLuint numTextureBarriers,
|
||||
struct gl_texture_object **texObjs,
|
||||
const GLenum *dstLayouts)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
|
||||
struct drm_i915_gem_exec_fence *fence =
|
||||
util_dynarray_grow(&brw->batch.exec_fences, struct drm_i915_gem_exec_fence *, 1);
|
||||
fence->flags = I915_EXEC_FENCE_SIGNAL;
|
||||
fence->handle = iSemObj->syncobj->handle;
|
||||
brw->batch.contains_fence_signal = true;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_semaphoreobj_wait(struct gl_context *ctx,
|
||||
struct gl_semaphore_object *semObj,
|
||||
GLuint numBufferBarriers,
|
||||
struct gl_buffer_object **bufObjs,
|
||||
GLuint numTextureBarriers,
|
||||
struct gl_texture_object **texObjs,
|
||||
const GLenum *srcLayouts)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_screen *screen = brw->screen;
|
||||
struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
|
||||
struct drm_syncobj_wait args = {
|
||||
.handles = (uintptr_t)&iSemObj->syncobj->handle,
|
||||
.count_handles = 1,
|
||||
};
|
||||
|
||||
drmIoctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_fence_init(struct brw_context *brw, struct brw_fence *fence,
|
||||
enum brw_fence_type type)
|
||||
{
|
||||
fence->brw = brw;
|
||||
fence->type = type;
|
||||
mtx_init(&fence->mutex, mtx_plain);
|
||||
|
||||
switch (type) {
|
||||
case BRW_FENCE_TYPE_BO_WAIT:
|
||||
fence->batch_bo = NULL;
|
||||
break;
|
||||
case BRW_FENCE_TYPE_SYNC_FD:
|
||||
fence->sync_fd = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
brw_fence_finish(struct brw_fence *fence)
|
||||
{
|
||||
switch (fence->type) {
|
||||
case BRW_FENCE_TYPE_BO_WAIT:
|
||||
if (fence->batch_bo)
|
||||
brw_bo_unreference(fence->batch_bo);
|
||||
break;
|
||||
case BRW_FENCE_TYPE_SYNC_FD:
|
||||
if (fence->sync_fd != -1)
|
||||
close(fence->sync_fd);
|
||||
break;
|
||||
}
|
||||
|
||||
mtx_destroy(&fence->mutex);
|
||||
}
|
||||
|
||||
static bool MUST_CHECK
|
||||
brw_fence_insert_locked(struct brw_context *brw, struct brw_fence *fence)
|
||||
{
|
||||
__DRIcontext *driContext = brw->driContext;
|
||||
__DRIdrawable *driDrawable = driContext->driDrawablePriv;
|
||||
|
||||
/*
|
||||
* From KHR_fence_sync:
|
||||
*
|
||||
* When the condition of the sync object is satisfied by the fence
|
||||
* command, the sync is signaled by the associated client API context,
|
||||
* causing any eglClientWaitSyncKHR commands (see below) blocking on
|
||||
* <sync> to unblock. The only condition currently supported is
|
||||
* EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR, which is satisfied by
|
||||
* completion of the fence command corresponding to the sync object,
|
||||
* and all preceding commands in the associated client API context's
|
||||
* command stream. The sync object will not be signaled until all
|
||||
* effects from these commands on the client API's internal and
|
||||
* framebuffer state are fully realized. No other state is affected by
|
||||
* execution of the fence command.
|
||||
*
|
||||
* Note the emphasis there on ensuring that the framebuffer is fully
|
||||
* realised before the fence is signaled. We cannot just flush the batch,
|
||||
* but must also resolve the drawable first. The importance of this is,
|
||||
* for example, in creating a fence for a frame to be passed to a
|
||||
* remote compositor. Without us flushing the drawable explicitly, the
|
||||
* resolve will be in a following batch (when the client finally calls
|
||||
* SwapBuffers, or triggers a resolve via some other path) and so the
|
||||
* compositor may read the incomplete framebuffer instead.
|
||||
*/
|
||||
if (driDrawable)
|
||||
brw_resolve_for_dri2_flush(brw, driDrawable);
|
||||
brw_emit_mi_flush(brw);
|
||||
|
||||
switch (fence->type) {
|
||||
case BRW_FENCE_TYPE_BO_WAIT:
|
||||
assert(!fence->batch_bo);
|
||||
assert(!fence->signalled);
|
||||
|
||||
fence->batch_bo = brw->batch.batch.bo;
|
||||
brw_bo_reference(fence->batch_bo);
|
||||
|
||||
if (brw_batch_flush(brw) < 0) {
|
||||
brw_bo_unreference(fence->batch_bo);
|
||||
fence->batch_bo = NULL;
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case BRW_FENCE_TYPE_SYNC_FD:
|
||||
assert(!fence->signalled);
|
||||
|
||||
if (fence->sync_fd == -1) {
|
||||
/* Create an out-fence that signals after all pending commands
|
||||
* complete.
|
||||
*/
|
||||
if (brw_batch_flush_fence(brw, -1, &fence->sync_fd) < 0)
|
||||
return false;
|
||||
assert(fence->sync_fd != -1);
|
||||
} else {
|
||||
/* Wait on the in-fence before executing any subsequently submitted
|
||||
* commands.
|
||||
*/
|
||||
if (brw_batch_flush(brw) < 0)
|
||||
return false;
|
||||
|
||||
/* Emit a dummy batch just for the fence. */
|
||||
brw_emit_mi_flush(brw);
|
||||
if (brw_batch_flush_fence(brw, fence->sync_fd, NULL) < 0)
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool MUST_CHECK
|
||||
brw_fence_insert(struct brw_context *brw, struct brw_fence *fence)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
mtx_lock(&fence->mutex);
|
||||
ret = brw_fence_insert_locked(brw, fence);
|
||||
mtx_unlock(&fence->mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_fence_has_completed_locked(struct brw_fence *fence)
|
||||
{
|
||||
if (fence->signalled)
|
||||
return true;
|
||||
|
||||
switch (fence->type) {
|
||||
case BRW_FENCE_TYPE_BO_WAIT:
|
||||
if (!fence->batch_bo) {
|
||||
/* There may be no batch if brw_batch_flush() failed. */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (brw_bo_busy(fence->batch_bo))
|
||||
return false;
|
||||
|
||||
brw_bo_unreference(fence->batch_bo);
|
||||
fence->batch_bo = NULL;
|
||||
fence->signalled = true;
|
||||
|
||||
return true;
|
||||
|
||||
case BRW_FENCE_TYPE_SYNC_FD:
|
||||
assert(fence->sync_fd != -1);
|
||||
|
||||
if (sync_wait(fence->sync_fd, 0) == -1)
|
||||
return false;
|
||||
|
||||
fence->signalled = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_fence_has_completed(struct brw_fence *fence)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
mtx_lock(&fence->mutex);
|
||||
ret = brw_fence_has_completed_locked(fence);
|
||||
mtx_unlock(&fence->mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_fence_client_wait_locked(struct brw_context *brw, struct brw_fence *fence,
|
||||
uint64_t timeout)
|
||||
{
|
||||
int32_t timeout_i32;
|
||||
|
||||
if (fence->signalled)
|
||||
return true;
|
||||
|
||||
switch (fence->type) {
|
||||
case BRW_FENCE_TYPE_BO_WAIT:
|
||||
if (!fence->batch_bo) {
|
||||
/* There may be no batch if brw_batch_flush() failed. */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns
|
||||
* immediately for timeouts <= 0. The best we can do is to clamp the
|
||||
* timeout to INT64_MAX. This limits the maximum timeout from 584 years to
|
||||
* 292 years - likely not a big deal.
|
||||
*/
|
||||
if (timeout > INT64_MAX)
|
||||
timeout = INT64_MAX;
|
||||
|
||||
if (brw_bo_wait(fence->batch_bo, timeout) != 0)
|
||||
return false;
|
||||
|
||||
fence->signalled = true;
|
||||
brw_bo_unreference(fence->batch_bo);
|
||||
fence->batch_bo = NULL;
|
||||
|
||||
return true;
|
||||
case BRW_FENCE_TYPE_SYNC_FD:
|
||||
if (fence->sync_fd == -1)
|
||||
return false;
|
||||
|
||||
if (timeout > INT32_MAX)
|
||||
timeout_i32 = -1;
|
||||
else
|
||||
timeout_i32 = timeout;
|
||||
|
||||
if (sync_wait(fence->sync_fd, timeout_i32) == -1)
|
||||
return false;
|
||||
|
||||
fence->signalled = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
assert(!"bad enum brw_fence_type");
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the function successfully signals or has already signalled.
|
||||
* (This matches the behavior expected from __DRI2fence::client_wait_sync).
|
||||
*/
|
||||
static bool
|
||||
brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
|
||||
uint64_t timeout)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
mtx_lock(&fence->mutex);
|
||||
ret = brw_fence_client_wait_locked(brw, fence, timeout);
|
||||
mtx_unlock(&fence->mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence)
|
||||
{
|
||||
switch (fence->type) {
|
||||
case BRW_FENCE_TYPE_BO_WAIT:
|
||||
/* We have nothing to do for WaitSync. Our GL command stream is sequential,
|
||||
* so given that the sync object has already flushed the batchbuffer, any
|
||||
* batchbuffers coming after this waitsync will naturally not occur until
|
||||
* the previous one is done.
|
||||
*/
|
||||
break;
|
||||
case BRW_FENCE_TYPE_SYNC_FD:
|
||||
assert(fence->sync_fd != -1);
|
||||
|
||||
/* The user wants explicit synchronization, so give them what they want. */
|
||||
if (!brw_fence_insert(brw, fence)) {
|
||||
/* FIXME: There exists no way yet to report an error here. If an error
|
||||
* occurs, continue silently and hope for the best.
|
||||
*/
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static struct gl_sync_object *
|
||||
brw_gl_new_sync(struct gl_context *ctx)
|
||||
{
|
||||
struct brw_gl_sync *sync;
|
||||
|
||||
sync = calloc(1, sizeof(*sync));
|
||||
if (!sync)
|
||||
return NULL;
|
||||
|
||||
return &sync->gl;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_gl_delete_sync(struct gl_context *ctx, struct gl_sync_object *_sync)
|
||||
{
|
||||
struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
|
||||
|
||||
brw_fence_finish(&sync->fence);
|
||||
free(sync->gl.Label);
|
||||
free(sync);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
|
||||
GLenum condition, GLbitfield flags)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
|
||||
|
||||
/* brw_fence_insert_locked() assumes it must do a complete flush */
|
||||
assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
|
||||
|
||||
brw_fence_init(brw, &sync->fence, BRW_FENCE_TYPE_BO_WAIT);
|
||||
|
||||
if (!brw_fence_insert_locked(brw, &sync->fence)) {
|
||||
/* FIXME: There exists no way to report a GL error here. If an error
|
||||
* occurs, continue silently and hope for the best.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
brw_gl_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
|
||||
GLbitfield flags, GLuint64 timeout)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
|
||||
|
||||
if (brw_fence_client_wait(brw, &sync->fence, timeout))
|
||||
sync->gl.StatusFlag = 1;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_gl_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
|
||||
GLbitfield flags, GLuint64 timeout)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
|
||||
|
||||
brw_fence_server_wait(brw, &sync->fence);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_gl_check_sync(struct gl_context *ctx, struct gl_sync_object *_sync)
|
||||
{
|
||||
struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
|
||||
|
||||
if (brw_fence_has_completed(&sync->fence))
|
||||
sync->gl.StatusFlag = 1;
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_syncobj_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->NewSyncObject = brw_gl_new_sync;
|
||||
functions->DeleteSyncObject = brw_gl_delete_sync;
|
||||
functions->FenceSync = brw_gl_fence_sync;
|
||||
functions->CheckSync = brw_gl_check_sync;
|
||||
functions->ClientWaitSync = brw_gl_client_wait_sync;
|
||||
functions->ServerWaitSync = brw_gl_server_wait_sync;
|
||||
functions->NewSemaphoreObject = intel_semaphoreobj_alloc;
|
||||
functions->DeleteSemaphoreObject = intel_semaphoreobj_free;
|
||||
functions->ImportSemaphoreFd = intel_semaphoreobj_import;
|
||||
functions->ServerSignalSemaphoreObject = intel_semaphoreobj_signal;
|
||||
functions->ServerWaitSemaphoreObject = intel_semaphoreobj_wait;
|
||||
}
|
||||
|
||||
static void *
|
||||
brw_dri_create_fence(__DRIcontext *ctx)
|
||||
{
|
||||
struct brw_context *brw = ctx->driverPrivate;
|
||||
struct brw_fence *fence;
|
||||
|
||||
fence = calloc(1, sizeof(*fence));
|
||||
if (!fence)
|
||||
return NULL;
|
||||
|
||||
brw_fence_init(brw, fence, BRW_FENCE_TYPE_BO_WAIT);
|
||||
|
||||
if (!brw_fence_insert_locked(brw, fence)) {
|
||||
brw_fence_finish(fence);
|
||||
free(fence);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return fence;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_dri_destroy_fence(__DRIscreen *dri_screen, void *_fence)
|
||||
{
|
||||
struct brw_fence *fence = _fence;
|
||||
|
||||
brw_fence_finish(fence);
|
||||
free(fence);
|
||||
}
|
||||
|
||||
static GLboolean
|
||||
brw_dri_client_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags,
|
||||
uint64_t timeout)
|
||||
{
|
||||
struct brw_fence *fence = _fence;
|
||||
|
||||
return brw_fence_client_wait(fence->brw, fence, timeout);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_dri_server_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags)
|
||||
{
|
||||
struct brw_fence *fence = _fence;
|
||||
|
||||
/* We might be called here with a NULL fence as a result of WaitSyncKHR
|
||||
* on a EGL_KHR_reusable_sync fence. Nothing to do here in such case.
|
||||
*/
|
||||
if (!fence)
|
||||
return;
|
||||
|
||||
brw_fence_server_wait(fence->brw, fence);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
brw_dri_get_capabilities(__DRIscreen *dri_screen)
|
||||
{
|
||||
struct brw_screen *screen = dri_screen->driverPrivate;
|
||||
unsigned caps = 0;
|
||||
|
||||
if (screen->has_exec_fence)
|
||||
caps |= __DRI_FENCE_CAP_NATIVE_FD;
|
||||
|
||||
return caps;
|
||||
}
|
||||
|
||||
static void *
|
||||
brw_dri_create_fence_fd(__DRIcontext *dri_ctx, int fd)
|
||||
{
|
||||
struct brw_context *brw = dri_ctx->driverPrivate;
|
||||
struct brw_fence *fence;
|
||||
|
||||
assert(brw->screen->has_exec_fence);
|
||||
|
||||
fence = calloc(1, sizeof(*fence));
|
||||
if (!fence)
|
||||
return NULL;
|
||||
|
||||
brw_fence_init(brw, fence, BRW_FENCE_TYPE_SYNC_FD);
|
||||
|
||||
if (fd == -1) {
|
||||
/* Create an out-fence fd */
|
||||
if (!brw_fence_insert_locked(brw, fence))
|
||||
goto fail;
|
||||
} else {
|
||||
/* Import the sync fd as an in-fence. */
|
||||
fence->sync_fd = os_dupfd_cloexec(fd);
|
||||
}
|
||||
|
||||
assert(fence->sync_fd != -1);
|
||||
|
||||
return fence;
|
||||
|
||||
fail:
|
||||
brw_fence_finish(fence);
|
||||
free(fence);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
brw_dri_get_fence_fd_locked(struct brw_fence *fence)
|
||||
{
|
||||
assert(fence->type == BRW_FENCE_TYPE_SYNC_FD);
|
||||
return os_dupfd_cloexec(fence->sync_fd);
|
||||
}
|
||||
|
||||
static int
|
||||
brw_dri_get_fence_fd(__DRIscreen *dri_screen, void *_fence)
|
||||
{
|
||||
struct brw_fence *fence = _fence;
|
||||
int fd;
|
||||
|
||||
mtx_lock(&fence->mutex);
|
||||
fd = brw_dri_get_fence_fd_locked(fence);
|
||||
mtx_unlock(&fence->mutex);
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
const __DRI2fenceExtension brwFenceExtension = {
|
||||
.base = { __DRI2_FENCE, 2 },
|
||||
|
||||
.create_fence = brw_dri_create_fence,
|
||||
.destroy_fence = brw_dri_destroy_fence,
|
||||
.client_wait_sync = brw_dri_client_wait_sync,
|
||||
.server_wait_sync = brw_dri_server_wait_sync,
|
||||
.get_fence_from_cl_event = NULL,
|
||||
.get_capabilities = brw_dri_get_capabilities,
|
||||
.create_fence_fd = brw_dri_create_fence_fd,
|
||||
.get_fence_fd = brw_dri_get_fence_fd,
|
||||
};
|
||||
|
|
@ -1,295 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2014 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file brw_tcs.c
|
||||
*
|
||||
* Tessellation control shader state upload code.
|
||||
*/
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "compiler/brw_nir.h"
|
||||
#include "brw_program.h"
|
||||
#include "brw_state.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
static bool
|
||||
brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
|
||||
struct brw_program *tep, struct brw_tcs_prog_key *key)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
const struct brw_compiler *compiler = brw->screen->compiler;
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
struct brw_stage_state *stage_state = &brw->tcs.base;
|
||||
nir_shader *nir;
|
||||
struct brw_tcs_prog_data prog_data;
|
||||
bool start_busy = false;
|
||||
double start_time = 0;
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
if (tcp) {
|
||||
nir = nir_shader_clone(mem_ctx, tcp->program.nir);
|
||||
} else {
|
||||
const nir_shader_compiler_options *options =
|
||||
ctx->Const.ShaderCompilerOptions[MESA_SHADER_TESS_CTRL].NirOptions;
|
||||
nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key);
|
||||
}
|
||||
|
||||
memset(&prog_data, 0, sizeof(prog_data));
|
||||
|
||||
if (tcp) {
|
||||
brw_assign_common_binding_table_offsets(devinfo, &tcp->program,
|
||||
&prog_data.base.base, 0);
|
||||
|
||||
brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tcp->program,
|
||||
&prog_data.base.base,
|
||||
compiler->scalar_stage[MESA_SHADER_TESS_CTRL]);
|
||||
if (brw->can_push_ubos) {
|
||||
brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
|
||||
prog_data.base.base.ubo_ranges);
|
||||
}
|
||||
} else {
|
||||
/* Upload the Patch URB Header as the first two uniforms.
|
||||
* Do the annoying scrambling so the shader doesn't have to.
|
||||
*/
|
||||
assert(nir->num_uniforms == 32);
|
||||
prog_data.base.base.param = rzalloc_array(mem_ctx, uint32_t, 8);
|
||||
prog_data.base.base.nr_params = 8;
|
||||
|
||||
uint32_t *param = prog_data.base.base.param;
|
||||
for (int i = 0; i < 8; i++)
|
||||
param[i] = BRW_PARAM_BUILTIN_ZERO;
|
||||
|
||||
if (key->tes_primitive_mode == GL_QUADS) {
|
||||
for (int i = 0; i < 4; i++)
|
||||
param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
|
||||
|
||||
param[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
|
||||
param[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
|
||||
} else if (key->tes_primitive_mode == GL_TRIANGLES) {
|
||||
for (int i = 0; i < 3; i++)
|
||||
param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
|
||||
|
||||
param[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
|
||||
} else {
|
||||
assert(key->tes_primitive_mode == GL_ISOLINES);
|
||||
param[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
|
||||
param[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
|
||||
}
|
||||
}
|
||||
|
||||
int st_index = -1;
|
||||
if (INTEL_DEBUG(DEBUG_SHADER_TIME) && tep)
|
||||
st_index = brw_get_shader_time_index(brw, &tep->program, ST_TCS, true);
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo);
|
||||
start_time = get_time();
|
||||
}
|
||||
|
||||
char *error_str;
|
||||
const unsigned *program =
|
||||
brw_compile_tcs(compiler, brw, mem_ctx, key, &prog_data, nir, st_index,
|
||||
NULL, &error_str);
|
||||
if (program == NULL) {
|
||||
if (tep) {
|
||||
tep->program.sh.data->LinkStatus = LINKING_FAILURE;
|
||||
ralloc_strcat(&tep->program.sh.data->InfoLog, error_str);
|
||||
}
|
||||
|
||||
_mesa_problem(NULL, "Failed to compile tessellation control shader: "
|
||||
"%s\n", error_str);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
if (tcp) {
|
||||
if (tcp->compiled_once) {
|
||||
brw_debug_recompile(brw, MESA_SHADER_TESS_CTRL, tcp->program.Id,
|
||||
&key->base);
|
||||
}
|
||||
tcp->compiled_once = true;
|
||||
}
|
||||
|
||||
if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
|
||||
perf_debug("TCS compile took %.03f ms and stalled the GPU\n",
|
||||
(get_time() - start_time) * 1000);
|
||||
}
|
||||
}
|
||||
|
||||
/* Scratch space is used for register spilling */
|
||||
brw_alloc_stage_scratch(brw, stage_state,
|
||||
prog_data.base.base.total_scratch);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.base.param);
|
||||
ralloc_steal(NULL, prog_data.base.base.pull_param);
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_TCS_PROG,
|
||||
key, sizeof(*key),
|
||||
program, prog_data.base.base.program_size,
|
||||
&prog_data, sizeof(prog_data),
|
||||
&stage_state->prog_offset, &brw->tcs.base.prog_data);
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
brw_tcs_populate_key(struct brw_context *brw,
|
||||
struct brw_tcs_prog_key *key)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
const struct brw_compiler *compiler = brw->screen->compiler;
|
||||
struct brw_program *tcp =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
|
||||
struct brw_program *tep =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
|
||||
struct gl_program *tes_prog = &tep->program;
|
||||
|
||||
uint64_t per_vertex_slots = tes_prog->info.inputs_read;
|
||||
uint32_t per_patch_slots = tes_prog->info.patch_inputs_read;
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
if (tcp) {
|
||||
struct gl_program *prog = &tcp->program;
|
||||
per_vertex_slots |= prog->info.outputs_written;
|
||||
per_patch_slots |= prog->info.patch_outputs_written;
|
||||
}
|
||||
|
||||
if (devinfo->ver < 8 || !tcp || compiler->use_tcs_8_patch)
|
||||
key->input_vertices = brw->ctx.TessCtrlProgram.patch_vertices;
|
||||
key->outputs_written = per_vertex_slots;
|
||||
key->patch_outputs_written = per_patch_slots;
|
||||
|
||||
/* We need to specialize our code generation for tessellation levels
|
||||
* based on the domain the DS is expecting to tessellate.
|
||||
*/
|
||||
key->tes_primitive_mode = tep->program.info.tess.primitive_mode;
|
||||
key->quads_workaround = devinfo->ver < 9 &&
|
||||
tep->program.info.tess.primitive_mode == GL_QUADS &&
|
||||
tep->program.info.tess.spacing == TESS_SPACING_EQUAL;
|
||||
|
||||
if (tcp) {
|
||||
/* _NEW_TEXTURE */
|
||||
brw_populate_base_prog_key(&brw->ctx, tcp, &key->base);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_upload_tcs_prog(struct brw_context *brw)
|
||||
{
|
||||
struct brw_stage_state *stage_state = &brw->tcs.base;
|
||||
struct brw_tcs_prog_key key;
|
||||
/* BRW_NEW_TESS_PROGRAMS */
|
||||
struct brw_program *tcp =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
|
||||
ASSERTED struct brw_program *tep =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
|
||||
assert(tep);
|
||||
|
||||
if (!brw_state_dirty(brw,
|
||||
_NEW_TEXTURE,
|
||||
BRW_NEW_PATCH_PRIMITIVE |
|
||||
BRW_NEW_TESS_PROGRAMS))
|
||||
return;
|
||||
|
||||
brw_tcs_populate_key(brw, &key);
|
||||
|
||||
if (brw_search_cache(&brw->cache, BRW_CACHE_TCS_PROG, &key, sizeof(key),
|
||||
&stage_state->prog_offset, &brw->tcs.base.prog_data,
|
||||
true))
|
||||
return;
|
||||
|
||||
if (brw_disk_cache_upload_program(brw, MESA_SHADER_TESS_CTRL))
|
||||
return;
|
||||
|
||||
tcp = (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
|
||||
if (tcp)
|
||||
tcp->id = key.base.program_string_id;
|
||||
|
||||
ASSERTED bool success = brw_codegen_tcs_prog(brw, tcp, tep, &key);
|
||||
assert(success);
|
||||
}
|
||||
|
||||
void
|
||||
brw_tcs_populate_default_key(const struct brw_compiler *compiler,
|
||||
struct brw_tcs_prog_key *key,
|
||||
struct gl_shader_program *sh_prog,
|
||||
struct gl_program *prog)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
struct brw_program *btcp = brw_program(prog);
|
||||
const struct gl_linked_shader *tes =
|
||||
sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
brw_populate_default_base_prog_key(devinfo, btcp, &key->base);
|
||||
|
||||
/* Guess that the input and output patches have the same dimensionality. */
|
||||
if (devinfo->ver < 8 || compiler->use_tcs_8_patch)
|
||||
key->input_vertices = prog->info.tess.tcs_vertices_out;
|
||||
|
||||
if (tes) {
|
||||
key->tes_primitive_mode = tes->Program->info.tess.primitive_mode;
|
||||
key->quads_workaround = devinfo->ver < 9 &&
|
||||
tes->Program->info.tess.primitive_mode == GL_QUADS &&
|
||||
tes->Program->info.tess.spacing == TESS_SPACING_EQUAL;
|
||||
} else {
|
||||
key->tes_primitive_mode = GL_TRIANGLES;
|
||||
}
|
||||
|
||||
key->outputs_written = prog->nir->info.outputs_written;
|
||||
key->patch_outputs_written = prog->nir->info.patch_outputs_written;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_tcs_precompile(struct gl_context *ctx,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct brw_compiler *compiler = brw->screen->compiler;
|
||||
struct brw_tcs_prog_key key;
|
||||
uint32_t old_prog_offset = brw->tcs.base.prog_offset;
|
||||
struct brw_stage_prog_data *old_prog_data = brw->tcs.base.prog_data;
|
||||
bool success;
|
||||
|
||||
struct brw_program *btcp = brw_program(prog);
|
||||
const struct gl_linked_shader *tes =
|
||||
shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
|
||||
struct brw_program *btep = tes ? brw_program(tes->Program) : NULL;
|
||||
|
||||
brw_tcs_populate_default_key(compiler, &key, shader_prog, prog);
|
||||
|
||||
success = brw_codegen_tcs_prog(brw, btcp, btep, &key);
|
||||
|
||||
brw->tcs.base.prog_offset = old_prog_offset;
|
||||
brw->tcs.base.prog_data = old_prog_data;
|
||||
|
||||
return success;
|
||||
}
|
||||
|
|
@ -1,116 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "main/shaderapi.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
|
||||
|
||||
/* Creates a new TCS constant buffer reflecting the current TCS program's
|
||||
* constants, if needed by the TCS program.
|
||||
*
|
||||
* Otherwise, constants go through the CURBEs using the brw_constant_buffer
|
||||
* state atom.
|
||||
*/
|
||||
static void
|
||||
brw_upload_tcs_pull_constants(struct brw_context *brw)
|
||||
{
|
||||
struct brw_stage_state *stage_state = &brw->tcs.base;
|
||||
|
||||
/* BRW_NEW_TESS_PROGRAMS */
|
||||
struct brw_program *tcp =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
|
||||
|
||||
if (!tcp)
|
||||
return;
|
||||
|
||||
/* BRW_NEW_TCS_PROG_DATA */
|
||||
const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
|
||||
|
||||
_mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_CTRL);
|
||||
/* _NEW_PROGRAM_CONSTANTS */
|
||||
brw_upload_pull_constants(brw, BRW_NEW_TCS_CONSTBUF, &tcp->program,
|
||||
stage_state, prog_data);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_tcs_pull_constants = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_PROGRAM_CONSTANTS,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_TCS_PROG_DATA |
|
||||
BRW_NEW_TESS_PROGRAMS,
|
||||
},
|
||||
.emit = brw_upload_tcs_pull_constants,
|
||||
};
|
||||
|
||||
static void
|
||||
brw_upload_tcs_ubo_surfaces(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
|
||||
/* _NEW_PROGRAM */
|
||||
struct gl_program *prog =
|
||||
ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
|
||||
|
||||
/* BRW_NEW_TCS_PROG_DATA */
|
||||
struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
|
||||
|
||||
brw_upload_ubo_surfaces(brw, prog, &brw->tcs.base, prog_data);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_tcs_ubo_surfaces = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_PROGRAM,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_TCS_PROG_DATA |
|
||||
BRW_NEW_UNIFORM_BUFFER,
|
||||
},
|
||||
.emit = brw_upload_tcs_ubo_surfaces,
|
||||
};
|
||||
|
||||
static void
|
||||
brw_upload_tcs_image_surfaces(struct brw_context *brw)
|
||||
{
|
||||
/* BRW_NEW_TESS_PROGRAMS */
|
||||
const struct gl_program *tcp = brw->programs[MESA_SHADER_TESS_CTRL];
|
||||
|
||||
if (tcp) {
|
||||
/* BRW_NEW_TCS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
|
||||
brw_upload_image_surfaces(brw, tcp, &brw->tcs.base,
|
||||
brw->tcs.base.prog_data);
|
||||
}
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_tcs_image_surfaces = {
|
||||
.dirty = {
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_AUX_STATE |
|
||||
BRW_NEW_IMAGE_UNITS |
|
||||
BRW_NEW_TCS_PROG_DATA |
|
||||
BRW_NEW_TESS_PROGRAMS,
|
||||
},
|
||||
.emit = brw_upload_tcs_image_surfaces,
|
||||
};
|
||||
|
|
@ -1,233 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2014 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file brw_tes.c
|
||||
*
|
||||
* Tessellation evaluation shader state upload code.
|
||||
*/
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "compiler/brw_nir.h"
|
||||
#include "brw_program.h"
|
||||
#include "brw_state.h"
|
||||
#include "program/prog_parameter.h"
|
||||
|
||||
static bool
|
||||
brw_codegen_tes_prog(struct brw_context *brw,
|
||||
struct brw_program *tep,
|
||||
struct brw_tes_prog_key *key)
|
||||
{
|
||||
const struct brw_compiler *compiler = brw->screen->compiler;
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct brw_stage_state *stage_state = &brw->tes.base;
|
||||
struct brw_tes_prog_data prog_data;
|
||||
bool start_busy = false;
|
||||
double start_time = 0;
|
||||
|
||||
memset(&prog_data, 0, sizeof(prog_data));
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
nir_shader *nir = nir_shader_clone(mem_ctx, tep->program.nir);
|
||||
|
||||
brw_assign_common_binding_table_offsets(devinfo, &tep->program,
|
||||
&prog_data.base.base, 0);
|
||||
|
||||
brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tep->program,
|
||||
&prog_data.base.base,
|
||||
compiler->scalar_stage[MESA_SHADER_TESS_EVAL]);
|
||||
if (brw->can_push_ubos) {
|
||||
brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
|
||||
prog_data.base.base.ubo_ranges);
|
||||
}
|
||||
|
||||
int st_index = -1;
|
||||
if (INTEL_DEBUG(DEBUG_SHADER_TIME))
|
||||
st_index = brw_get_shader_time_index(brw, &tep->program, ST_TES, true);
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo);
|
||||
start_time = get_time();
|
||||
}
|
||||
|
||||
struct brw_vue_map input_vue_map;
|
||||
brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
|
||||
key->patch_inputs_read);
|
||||
|
||||
char *error_str;
|
||||
const unsigned *program =
|
||||
brw_compile_tes(compiler, brw, mem_ctx, key, &input_vue_map, &prog_data,
|
||||
nir, st_index, NULL, &error_str);
|
||||
if (program == NULL) {
|
||||
tep->program.sh.data->LinkStatus = LINKING_FAILURE;
|
||||
ralloc_strcat(&tep->program.sh.data->InfoLog, error_str);
|
||||
|
||||
_mesa_problem(NULL, "Failed to compile tessellation evaluation shader: "
|
||||
"%s\n", error_str);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
if (tep->compiled_once) {
|
||||
brw_debug_recompile(brw, MESA_SHADER_TESS_EVAL, tep->program.Id,
|
||||
&key->base);
|
||||
}
|
||||
if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
|
||||
perf_debug("TES compile took %.03f ms and stalled the GPU\n",
|
||||
(get_time() - start_time) * 1000);
|
||||
}
|
||||
tep->compiled_once = true;
|
||||
}
|
||||
|
||||
/* Scratch space is used for register spilling */
|
||||
brw_alloc_stage_scratch(brw, stage_state,
|
||||
prog_data.base.base.total_scratch);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.base.param);
|
||||
ralloc_steal(NULL, prog_data.base.base.pull_param);
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG,
|
||||
key, sizeof(*key),
|
||||
program, prog_data.base.base.program_size,
|
||||
&prog_data, sizeof(prog_data),
|
||||
&stage_state->prog_offset, &brw->tes.base.prog_data);
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
brw_tes_populate_key(struct brw_context *brw,
|
||||
struct brw_tes_prog_key *key)
|
||||
{
|
||||
struct brw_program *tcp =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
|
||||
struct brw_program *tep =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
|
||||
struct gl_program *prog = &tep->program;
|
||||
|
||||
uint64_t per_vertex_slots = prog->info.inputs_read;
|
||||
uint32_t per_patch_slots = prog->info.patch_inputs_read;
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
/* _NEW_TEXTURE */
|
||||
brw_populate_base_prog_key(&brw->ctx, tep, &key->base);
|
||||
|
||||
/* The TCS may have additional outputs which aren't read by the
|
||||
* TES (possibly for cross-thread communication). These need to
|
||||
* be stored in the Patch URB Entry as well.
|
||||
*/
|
||||
if (tcp) {
|
||||
struct gl_program *tcp_prog = &tcp->program;
|
||||
per_vertex_slots |= tcp_prog->info.outputs_written &
|
||||
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
|
||||
per_patch_slots |= tcp_prog->info.patch_outputs_written;
|
||||
}
|
||||
|
||||
key->inputs_read = per_vertex_slots;
|
||||
key->patch_inputs_read = per_patch_slots;
|
||||
}
|
||||
|
||||
void
|
||||
brw_upload_tes_prog(struct brw_context *brw)
|
||||
{
|
||||
struct brw_stage_state *stage_state = &brw->tes.base;
|
||||
struct brw_tes_prog_key key;
|
||||
/* BRW_NEW_TESS_PROGRAMS */
|
||||
struct brw_program *tep =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
|
||||
|
||||
if (!brw_state_dirty(brw,
|
||||
_NEW_TEXTURE,
|
||||
BRW_NEW_TESS_PROGRAMS))
|
||||
return;
|
||||
|
||||
brw_tes_populate_key(brw, &key);
|
||||
|
||||
if (brw_search_cache(&brw->cache, BRW_CACHE_TES_PROG, &key, sizeof(key),
|
||||
&stage_state->prog_offset, &brw->tes.base.prog_data,
|
||||
true))
|
||||
return;
|
||||
|
||||
if (brw_disk_cache_upload_program(brw, MESA_SHADER_TESS_EVAL))
|
||||
return;
|
||||
|
||||
tep = (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
|
||||
tep->id = key.base.program_string_id;
|
||||
|
||||
ASSERTED bool success = brw_codegen_tes_prog(brw, tep, &key);
|
||||
assert(success);
|
||||
}
|
||||
|
||||
void
|
||||
brw_tes_populate_default_key(const struct brw_compiler *compiler,
|
||||
struct brw_tes_prog_key *key,
|
||||
struct gl_shader_program *sh_prog,
|
||||
struct gl_program *prog)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
struct brw_program *btep = brw_program(prog);
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
brw_populate_default_base_prog_key(devinfo, btep, &key->base);
|
||||
|
||||
key->inputs_read = prog->nir->info.inputs_read;
|
||||
key->patch_inputs_read = prog->nir->info.patch_inputs_read;
|
||||
|
||||
if (sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]) {
|
||||
struct gl_program *tcp =
|
||||
sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program;
|
||||
key->inputs_read |= tcp->nir->info.outputs_written &
|
||||
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
|
||||
key->patch_inputs_read |= tcp->nir->info.patch_outputs_written;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
brw_tes_precompile(struct gl_context *ctx,
|
||||
struct gl_shader_program *shader_prog,
|
||||
struct gl_program *prog)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct brw_compiler *compiler = brw->screen->compiler;
|
||||
struct brw_tes_prog_key key;
|
||||
uint32_t old_prog_offset = brw->tes.base.prog_offset;
|
||||
struct brw_stage_prog_data *old_prog_data = brw->tes.base.prog_data;
|
||||
bool success;
|
||||
|
||||
struct brw_program *btep = brw_program(prog);
|
||||
|
||||
brw_tes_populate_default_key(compiler, &key, shader_prog, prog);
|
||||
|
||||
success = brw_codegen_tes_prog(brw, btep, &key);
|
||||
|
||||
brw->tes.base.prog_offset = old_prog_offset;
|
||||
brw->tes.base.prog_data = old_prog_data;
|
||||
|
||||
return success;
|
||||
}
|
||||
|
|
@ -1,116 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "main/shaderapi.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
|
||||
|
||||
/* Creates a new TES constant buffer reflecting the current TES program's
|
||||
* constants, if needed by the TES program.
|
||||
*
|
||||
* Otherwise, constants go through the CURBEs using the brw_constant_buffer
|
||||
* state atom.
|
||||
*/
|
||||
static void
|
||||
brw_upload_tes_pull_constants(struct brw_context *brw)
|
||||
{
|
||||
struct brw_stage_state *stage_state = &brw->tes.base;
|
||||
|
||||
/* BRW_NEW_TESS_PROGRAMS */
|
||||
struct brw_program *dp =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
|
||||
|
||||
if (!dp)
|
||||
return;
|
||||
|
||||
/* BRW_NEW_TES_PROG_DATA */
|
||||
const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
|
||||
|
||||
_mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_EVAL);
|
||||
/* _NEW_PROGRAM_CONSTANTS */
|
||||
brw_upload_pull_constants(brw, BRW_NEW_TES_CONSTBUF, &dp->program,
|
||||
stage_state, prog_data);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_tes_pull_constants = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_PROGRAM_CONSTANTS,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_TES_PROG_DATA |
|
||||
BRW_NEW_TESS_PROGRAMS,
|
||||
},
|
||||
.emit = brw_upload_tes_pull_constants,
|
||||
};
|
||||
|
||||
static void
|
||||
brw_upload_tes_ubo_surfaces(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
|
||||
/* _NEW_PROGRAM */
|
||||
struct gl_program *prog =
|
||||
ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
|
||||
|
||||
/* BRW_NEW_TES_PROG_DATA */
|
||||
struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
|
||||
|
||||
brw_upload_ubo_surfaces(brw, prog, &brw->tes.base, prog_data);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_tes_ubo_surfaces = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_PROGRAM,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_TES_PROG_DATA |
|
||||
BRW_NEW_UNIFORM_BUFFER,
|
||||
},
|
||||
.emit = brw_upload_tes_ubo_surfaces,
|
||||
};
|
||||
|
||||
static void
|
||||
brw_upload_tes_image_surfaces(struct brw_context *brw)
|
||||
{
|
||||
/* BRW_NEW_TESS_PROGRAMS */
|
||||
const struct gl_program *tep = brw->programs[MESA_SHADER_TESS_EVAL];
|
||||
|
||||
if (tep) {
|
||||
/* BRW_NEW_TES_PROG_DATA, BRW_NEW_IMAGE_UNITS */
|
||||
brw_upload_image_surfaces(brw, tep, &brw->tes.base,
|
||||
brw->tes.base.prog_data);
|
||||
}
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_tes_image_surfaces = {
|
||||
.dirty = {
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_AUX_STATE |
|
||||
BRW_NEW_IMAGE_UNITS |
|
||||
BRW_NEW_TESS_PROGRAMS |
|
||||
BRW_NEW_TES_PROG_DATA,
|
||||
},
|
||||
.emit = brw_upload_tes_image_surfaces,
|
||||
};
|
||||
|
|
@ -1,415 +0,0 @@
|
|||
#include "swrast/swrast.h"
|
||||
#include "main/renderbuffer.h"
|
||||
#include "main/texobj.h"
|
||||
#include "main/teximage.h"
|
||||
#include "main/mipmap.h"
|
||||
#include "drivers/common/meta.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_buffer_objects.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
#include "brw_tex.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "brw_state.h"
|
||||
#include "util/u_memory.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_TEXTURE
|
||||
|
||||
static struct gl_texture_image *
|
||||
brw_new_texture_image(struct gl_context *ctx)
|
||||
{
|
||||
DBG("%s\n", __func__);
|
||||
(void) ctx;
|
||||
return (struct gl_texture_image *) CALLOC_STRUCT(brw_texture_image);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_delete_texture_image(struct gl_context *ctx, struct gl_texture_image *img)
|
||||
{
|
||||
/* nothing special (yet) for brw_texture_image */
|
||||
_mesa_delete_texture_image(ctx, img);
|
||||
}
|
||||
|
||||
|
||||
static struct gl_texture_object *
|
||||
brw_new_texture_object(struct gl_context *ctx, GLuint name, GLenum target)
|
||||
{
|
||||
struct brw_texture_object *obj = CALLOC_STRUCT(brw_texture_object);
|
||||
|
||||
(void) ctx;
|
||||
|
||||
DBG("%s\n", __func__);
|
||||
|
||||
if (obj == NULL)
|
||||
return NULL;
|
||||
|
||||
_mesa_initialize_texture_object(ctx, &obj->base, name, target);
|
||||
|
||||
obj->needs_validate = true;
|
||||
|
||||
return &obj->base;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_delete_texture_object(struct gl_context *ctx,
|
||||
struct gl_texture_object *texObj)
|
||||
{
|
||||
struct brw_texture_object *brw_obj = brw_texture_object(texObj);
|
||||
|
||||
brw_miptree_release(&brw_obj->mt);
|
||||
_mesa_delete_texture_object(ctx, texObj);
|
||||
}
|
||||
|
||||
static GLboolean
|
||||
brw_alloc_texture_image_buffer(struct gl_context *ctx,
|
||||
struct gl_texture_image *image)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_texture_image *intel_image = brw_texture_image(image);
|
||||
struct gl_texture_object *texobj = image->TexObject;
|
||||
struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
|
||||
|
||||
assert(image->Border == 0);
|
||||
|
||||
/* Quantize sample count */
|
||||
if (image->NumSamples) {
|
||||
image->NumSamples = brw_quantize_num_samples(brw->screen, image->NumSamples);
|
||||
if (!image->NumSamples)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Because the driver uses AllocTextureImageBuffer() internally, it may end
|
||||
* up mismatched with FreeTextureImageBuffer(), but that is safe to call
|
||||
* multiple times.
|
||||
*/
|
||||
ctx->Driver.FreeTextureImageBuffer(ctx, image);
|
||||
|
||||
if (!_swrast_init_texture_image(image))
|
||||
return false;
|
||||
|
||||
if (intel_texobj->mt &&
|
||||
brw_miptree_match_image(intel_texobj->mt, image)) {
|
||||
brw_miptree_reference(&intel_image->mt, intel_texobj->mt);
|
||||
DBG("%s: alloc obj %p level %d %dx%dx%d using object's miptree %p\n",
|
||||
__func__, texobj, image->Level,
|
||||
image->Width, image->Height, image->Depth, intel_texobj->mt);
|
||||
} else {
|
||||
intel_image->mt = brw_miptree_create_for_teximage(brw, intel_texobj,
|
||||
intel_image,
|
||||
MIPTREE_CREATE_DEFAULT);
|
||||
if (!intel_image->mt)
|
||||
return false;
|
||||
|
||||
/* Even if the object currently has a mipmap tree associated
|
||||
* with it, this one is a more likely candidate to represent the
|
||||
* whole object since our level didn't fit what was there
|
||||
* before, and any lower levels would fit into our miptree.
|
||||
*/
|
||||
brw_miptree_reference(&intel_texobj->mt, intel_image->mt);
|
||||
|
||||
DBG("%s: alloc obj %p level %d %dx%dx%d using new miptree %p\n",
|
||||
__func__, texobj, image->Level,
|
||||
image->Width, image->Height, image->Depth, intel_image->mt);
|
||||
}
|
||||
|
||||
intel_texobj->needs_validate = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* ctx->Driver.AllocTextureStorage() handler.
|
||||
*
|
||||
* Compare this to _mesa_AllocTextureStorage_sw, which would call into
|
||||
* brw_alloc_texture_image_buffer() above.
|
||||
*/
|
||||
static GLboolean
|
||||
brw_alloc_texture_storage(struct gl_context *ctx,
|
||||
struct gl_texture_object *texobj,
|
||||
GLsizei levels, GLsizei width,
|
||||
GLsizei height, GLsizei depth)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
|
||||
struct gl_texture_image *first_image = texobj->Image[0][0];
|
||||
int num_samples = brw_quantize_num_samples(brw->screen,
|
||||
first_image->NumSamples);
|
||||
const int numFaces = _mesa_num_tex_faces(texobj->Target);
|
||||
int face;
|
||||
int level;
|
||||
|
||||
/* If the object's current miptree doesn't match what we need, make a new
|
||||
* one.
|
||||
*/
|
||||
if (!intel_texobj->mt ||
|
||||
!brw_miptree_match_image(intel_texobj->mt, first_image) ||
|
||||
intel_texobj->mt->last_level != levels - 1) {
|
||||
brw_miptree_release(&intel_texobj->mt);
|
||||
|
||||
brw_get_image_dims(first_image, &width, &height, &depth);
|
||||
intel_texobj->mt = brw_miptree_create(brw, texobj->Target,
|
||||
first_image->TexFormat,
|
||||
0, levels - 1,
|
||||
width, height, depth,
|
||||
MAX2(num_samples, 1),
|
||||
MIPTREE_CREATE_DEFAULT);
|
||||
|
||||
if (intel_texobj->mt == NULL) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (face = 0; face < numFaces; face++) {
|
||||
for (level = 0; level < levels; level++) {
|
||||
struct gl_texture_image *image = texobj->Image[face][level];
|
||||
struct brw_texture_image *intel_image = brw_texture_image(image);
|
||||
|
||||
image->NumSamples = num_samples;
|
||||
|
||||
_swrast_free_texture_image_buffer(ctx, image);
|
||||
if (!_swrast_init_texture_image(image))
|
||||
return false;
|
||||
|
||||
brw_miptree_reference(&intel_image->mt, intel_texobj->mt);
|
||||
}
|
||||
}
|
||||
|
||||
/* The miptree is in a validated state, so no need to check later. */
|
||||
intel_texobj->needs_validate = false;
|
||||
intel_texobj->validated_first_level = 0;
|
||||
intel_texobj->validated_last_level = levels - 1;
|
||||
intel_texobj->_Format = first_image->TexFormat;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
brw_free_texture_image_buffer(struct gl_context * ctx,
|
||||
struct gl_texture_image *texImage)
|
||||
{
|
||||
struct brw_texture_image *brw_image = brw_texture_image(texImage);
|
||||
|
||||
DBG("%s\n", __func__);
|
||||
|
||||
brw_miptree_release(&brw_image->mt);
|
||||
|
||||
_swrast_free_texture_image_buffer(ctx, texImage);
|
||||
}
|
||||
|
||||
/**
|
||||
* Map texture memory/buffer into user space.
|
||||
* Note: the region of interest parameters are ignored here.
|
||||
* \param mode bitmask of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT
|
||||
* \param mapOut returns start of mapping of region of interest
|
||||
* \param rowStrideOut returns row stride in bytes
|
||||
*/
|
||||
static void
|
||||
brw_map_texture_image(struct gl_context *ctx,
|
||||
struct gl_texture_image *tex_image,
|
||||
GLuint slice,
|
||||
GLuint x, GLuint y, GLuint w, GLuint h,
|
||||
GLbitfield mode,
|
||||
GLubyte **map,
|
||||
GLint *out_stride)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_texture_image *intel_image = brw_texture_image(tex_image);
|
||||
struct brw_mipmap_tree *mt = intel_image->mt;
|
||||
ptrdiff_t stride;
|
||||
|
||||
/* Our texture data is always stored in a miptree. */
|
||||
assert(mt);
|
||||
|
||||
/* Check that our caller wasn't confused about how to map a 1D texture. */
|
||||
assert(tex_image->TexObject->Target != GL_TEXTURE_1D_ARRAY || h == 1);
|
||||
|
||||
/* brw_miptree_map operates on a unified "slice" number that references the
|
||||
* cube face, since it's all just slices to the miptree code.
|
||||
*/
|
||||
if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
|
||||
slice = tex_image->Face;
|
||||
|
||||
brw_miptree_map(brw, mt,
|
||||
tex_image->Level + tex_image->TexObject->Attrib.MinLevel,
|
||||
slice + tex_image->TexObject->Attrib.MinLayer,
|
||||
x, y, w, h, mode,
|
||||
(void **)map, &stride);
|
||||
|
||||
*out_stride = stride;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_unmap_texture_image(struct gl_context *ctx,
|
||||
struct gl_texture_image *tex_image, GLuint slice)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_texture_image *intel_image = brw_texture_image(tex_image);
|
||||
struct brw_mipmap_tree *mt = intel_image->mt;
|
||||
|
||||
if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
|
||||
slice = tex_image->Face;
|
||||
|
||||
brw_miptree_unmap(brw, mt,
|
||||
tex_image->Level + tex_image->TexObject->Attrib.MinLevel,
|
||||
slice + tex_image->TexObject->Attrib.MinLayer);
|
||||
}
|
||||
|
||||
static GLboolean
|
||||
brw_texture_view(struct gl_context *ctx,
|
||||
struct gl_texture_object *texObj,
|
||||
struct gl_texture_object *origTexObj)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_texture_object *intel_tex = brw_texture_object(texObj);
|
||||
struct brw_texture_object *intel_orig_tex = brw_texture_object(origTexObj);
|
||||
|
||||
assert(intel_orig_tex->mt);
|
||||
brw_miptree_reference(&intel_tex->mt, intel_orig_tex->mt);
|
||||
|
||||
/* Since we can only make views of immutable-format textures,
|
||||
* we can assume that everything is in origTexObj's miptree.
|
||||
*
|
||||
* Mesa core has already made us a copy of all the teximage objects,
|
||||
* except it hasn't copied our mt pointers, etc.
|
||||
*/
|
||||
const int numFaces = _mesa_num_tex_faces(texObj->Target);
|
||||
const int numLevels = texObj->Attrib.NumLevels;
|
||||
|
||||
int face;
|
||||
int level;
|
||||
|
||||
for (face = 0; face < numFaces; face++) {
|
||||
for (level = 0; level < numLevels; level++) {
|
||||
struct gl_texture_image *image = texObj->Image[face][level];
|
||||
struct brw_texture_image *intel_image = brw_texture_image(image);
|
||||
|
||||
brw_miptree_reference(&intel_image->mt, intel_orig_tex->mt);
|
||||
}
|
||||
}
|
||||
|
||||
/* The miptree is in a validated state, so no need to check later. */
|
||||
intel_tex->needs_validate = false;
|
||||
intel_tex->validated_first_level = 0;
|
||||
intel_tex->validated_last_level = numLevels - 1;
|
||||
|
||||
/* Set the validated texture format, with the same adjustments that
|
||||
* would have been applied to determine the underlying texture's
|
||||
* mt->format.
|
||||
*/
|
||||
intel_tex->_Format = brw_depth_format_for_depthstencil_format(
|
||||
brw_lower_compressed_format(brw, texObj->Image[0][0]->TexFormat));
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_texture_barrier(struct gl_context *ctx)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
if (devinfo->ver >= 6) {
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
|
||||
} else {
|
||||
brw_emit_mi_flush(brw);
|
||||
}
|
||||
}
|
||||
|
||||
/* Return the usual surface usage flags for the given format. */
|
||||
static isl_surf_usage_flags_t
|
||||
isl_surf_usage(mesa_format format)
|
||||
{
|
||||
switch(_mesa_get_format_base_format(format)) {
|
||||
case GL_DEPTH_COMPONENT:
|
||||
return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
|
||||
case GL_DEPTH_STENCIL:
|
||||
return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT |
|
||||
ISL_SURF_USAGE_TEXTURE_BIT;
|
||||
case GL_STENCIL_INDEX:
|
||||
return ISL_SURF_USAGE_STENCIL_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
|
||||
default:
|
||||
return ISL_SURF_USAGE_RENDER_TARGET_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
static GLboolean
|
||||
intel_texture_for_memory_object(struct gl_context *ctx,
|
||||
struct gl_texture_object *tex_obj,
|
||||
struct gl_memory_object *mem_obj,
|
||||
GLsizei levels, GLsizei width,
|
||||
GLsizei height, GLsizei depth,
|
||||
GLuint64 offset)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_memory_object *intel_memobj = brw_memory_object(mem_obj);
|
||||
struct brw_texture_object *intel_texobj = brw_texture_object(tex_obj);
|
||||
struct gl_texture_image *image = tex_obj->Image[0][0];
|
||||
struct isl_surf surf;
|
||||
|
||||
/* Only color formats are supported. */
|
||||
if (!_mesa_is_format_color_format(image->TexFormat))
|
||||
return GL_FALSE;
|
||||
|
||||
isl_tiling_flags_t tiling_flags = ISL_TILING_ANY_MASK;
|
||||
if (tex_obj->TextureTiling == GL_LINEAR_TILING_EXT)
|
||||
tiling_flags = ISL_TILING_LINEAR_BIT;
|
||||
|
||||
UNUSED const bool isl_surf_created_successfully =
|
||||
isl_surf_init(&brw->screen->isl_dev, &surf,
|
||||
.dim = get_isl_surf_dim(tex_obj->Target),
|
||||
.format = brw_isl_format_for_mesa_format(image->TexFormat),
|
||||
.width = width,
|
||||
.height = height,
|
||||
.depth = depth,
|
||||
.levels = levels,
|
||||
.array_len = tex_obj->Target == GL_TEXTURE_3D ? 1 : depth,
|
||||
.samples = MAX2(image->NumSamples, 1),
|
||||
.usage = isl_surf_usage(image->TexFormat),
|
||||
.tiling_flags = tiling_flags);
|
||||
|
||||
assert(isl_surf_created_successfully);
|
||||
|
||||
intel_texobj->mt = brw_miptree_create_for_bo(brw,
|
||||
intel_memobj->bo,
|
||||
image->TexFormat,
|
||||
offset,
|
||||
width,
|
||||
height,
|
||||
depth,
|
||||
surf.row_pitch_B,
|
||||
surf.tiling,
|
||||
MIPTREE_CREATE_NO_AUX);
|
||||
assert(intel_texobj->mt);
|
||||
brw_alloc_texture_image_buffer(ctx, image);
|
||||
|
||||
intel_texobj->needs_validate = false;
|
||||
intel_texobj->validated_first_level = 0;
|
||||
intel_texobj->validated_last_level = levels - 1;
|
||||
intel_texobj->_Format = image->TexFormat;
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_texture_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->NewTextureObject = brw_new_texture_object;
|
||||
functions->NewTextureImage = brw_new_texture_image;
|
||||
functions->DeleteTextureImage = brw_delete_texture_image;
|
||||
functions->DeleteTexture = brw_delete_texture_object;
|
||||
functions->AllocTextureImageBuffer = brw_alloc_texture_image_buffer;
|
||||
functions->FreeTextureImageBuffer = brw_free_texture_image_buffer;
|
||||
functions->AllocTextureStorage = brw_alloc_texture_storage;
|
||||
functions->MapTextureImage = brw_map_texture_image;
|
||||
functions->UnmapTextureImage = brw_unmap_texture_image;
|
||||
functions->TextureView = brw_texture_view;
|
||||
functions->TextureBarrier = brw_texture_barrier;
|
||||
functions->SetTextureStorageForMemoryObject = intel_texture_for_memory_object;
|
||||
}
|
||||
|
|
@ -1,58 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef INTELTEX_INC
|
||||
#define INTELTEX_INC
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "main/formats.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
|
||||
void brw_init_texture_functions(struct dd_function_table *functions);
|
||||
|
||||
void brw_init_texture_image_functions(struct dd_function_table *functions);
|
||||
|
||||
void brw_init_texture_copy_image_functions(struct dd_function_table *functs);
|
||||
|
||||
void brw_init_copy_image_functions(struct dd_function_table *functions);
|
||||
|
||||
void brw_set_texbuffer(__DRIcontext *pDRICtx,
|
||||
GLint target, __DRIdrawable *pDraw);
|
||||
void brw_set_texbuffer2(__DRIcontext *pDRICtx,
|
||||
GLint target, GLint format, __DRIdrawable *pDraw);
|
||||
void brw_release_texbuffer(__DRIcontext *pDRICtx, GLint target,
|
||||
__DRIdrawable *dPriv);
|
||||
|
||||
struct brw_mipmap_tree *
|
||||
brw_miptree_create_for_teximage(struct brw_context *brw,
|
||||
struct brw_texture_object *brw_obj,
|
||||
struct brw_texture_image *brw_image,
|
||||
enum brw_miptree_create_flags flags);
|
||||
|
||||
void brw_finalize_mipmap_tree(struct brw_context *brw,
|
||||
struct gl_texture_object *tex_obj);
|
||||
|
||||
#endif
|
||||
|
|
@ -1,72 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "main/enums.h"
|
||||
#include "main/image.h"
|
||||
#include "main/teximage.h"
|
||||
#include "main/texobj.h"
|
||||
#include "main/texstate.h"
|
||||
#include "main/fbobject.h"
|
||||
|
||||
#include "drivers/common/meta.h"
|
||||
|
||||
#include "brw_screen.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "brw_tex.h"
|
||||
#include "brw_context.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_TEXTURE
|
||||
|
||||
|
||||
static void
|
||||
brw_copytexsubimage(struct gl_context *ctx, GLuint dims,
|
||||
struct gl_texture_image *texImage,
|
||||
GLint xoffset, GLint yoffset, GLint slice,
|
||||
struct gl_renderbuffer *rb,
|
||||
GLint x, GLint y,
|
||||
GLsizei width, GLsizei height)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
|
||||
/* Try BLORP first. It can handle almost everything. */
|
||||
if (brw_blorp_copytexsubimage(brw, rb, texImage, slice, x, y,
|
||||
xoffset, yoffset, width, height))
|
||||
return;
|
||||
|
||||
/* Finally, fall back to meta. This will likely be slow. */
|
||||
perf_debug("%s - fallback to swrast\n", __func__);
|
||||
_mesa_meta_CopyTexSubImage(ctx, dims, texImage,
|
||||
xoffset, yoffset, slice,
|
||||
rb, x, y, width, height);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
brw_init_texture_copy_image_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->CopyTexSubImage = brw_copytexsubimage;
|
||||
}
|
||||
|
|
@ -1,992 +0,0 @@
|
|||
|
||||
#include "main/macros.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "main/enums.h"
|
||||
#include "main/bufferobj.h"
|
||||
#include "main/context.h"
|
||||
#include "main/formats.h"
|
||||
#include "main/glformats.h"
|
||||
#include "main/image.h"
|
||||
#include "main/pbo.h"
|
||||
#include "main/renderbuffer.h"
|
||||
#include "main/texcompress.h"
|
||||
#include "main/texgetimage.h"
|
||||
#include "main/texobj.h"
|
||||
#include "main/teximage.h"
|
||||
#include "main/texstore.h"
|
||||
#include "main/glthread.h"
|
||||
|
||||
#include "drivers/common/meta.h"
|
||||
|
||||
#include "brw_mipmap_tree.h"
|
||||
#include "brw_buffer_objects.h"
|
||||
#include "brw_batch.h"
|
||||
#include "brw_tex.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "brw_image.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_blorp.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_TEXTURE
|
||||
|
||||
/* Make sure one doesn't end up shrinking base level zero unnecessarily.
|
||||
* Determining the base level dimension by shifting higher level dimension
|
||||
* ends up in off-by-one value in case base level has NPOT size (for example,
|
||||
* 293 != 146 << 1).
|
||||
* Choose the original base level dimension when shifted dimensions agree.
|
||||
* Otherwise assume real resize is intended and use the new shifted value.
|
||||
*/
|
||||
static unsigned
|
||||
get_base_dim(unsigned old_base_dim, unsigned new_level_dim, unsigned level)
|
||||
{
|
||||
const unsigned old_level_dim = old_base_dim >> level;
|
||||
const unsigned new_base_dim = new_level_dim << level;
|
||||
|
||||
return old_level_dim == new_level_dim ? old_base_dim : new_base_dim;
|
||||
}
|
||||
|
||||
/* Work back from the specified level of the image to the baselevel and create a
|
||||
* miptree of that size.
|
||||
*/
|
||||
struct brw_mipmap_tree *
|
||||
brw_miptree_create_for_teximage(struct brw_context *brw,
|
||||
struct brw_texture_object *brw_obj,
|
||||
struct brw_texture_image *brw_image,
|
||||
enum brw_miptree_create_flags flags)
|
||||
{
|
||||
GLuint lastLevel;
|
||||
int width, height, depth;
|
||||
unsigned old_width = 0, old_height = 0, old_depth = 0;
|
||||
const struct brw_mipmap_tree *old_mt = brw_obj->mt;
|
||||
const unsigned level = brw_image->base.Base.Level;
|
||||
|
||||
brw_get_image_dims(&brw_image->base.Base, &width, &height, &depth);
|
||||
|
||||
if (old_mt) {
|
||||
old_width = old_mt->surf.logical_level0_px.width;
|
||||
old_height = old_mt->surf.logical_level0_px.height;
|
||||
old_depth = old_mt->surf.dim == ISL_SURF_DIM_3D ?
|
||||
old_mt->surf.logical_level0_px.depth :
|
||||
old_mt->surf.logical_level0_px.array_len;
|
||||
}
|
||||
|
||||
DBG("%s\n", __func__);
|
||||
|
||||
/* Figure out image dimensions at start level. */
|
||||
switch(brw_obj->base.Target) {
|
||||
case GL_TEXTURE_2D_MULTISAMPLE:
|
||||
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
|
||||
case GL_TEXTURE_RECTANGLE:
|
||||
case GL_TEXTURE_EXTERNAL_OES:
|
||||
assert(level == 0);
|
||||
break;
|
||||
case GL_TEXTURE_3D:
|
||||
depth = old_mt ? get_base_dim(old_depth, depth, level) :
|
||||
depth << level;
|
||||
FALLTHROUGH;
|
||||
case GL_TEXTURE_2D:
|
||||
case GL_TEXTURE_2D_ARRAY:
|
||||
case GL_TEXTURE_CUBE_MAP:
|
||||
case GL_TEXTURE_CUBE_MAP_ARRAY:
|
||||
height = old_mt ? get_base_dim(old_height, height, level) :
|
||||
height << level;
|
||||
FALLTHROUGH;
|
||||
case GL_TEXTURE_1D:
|
||||
case GL_TEXTURE_1D_ARRAY:
|
||||
width = old_mt ? get_base_dim(old_width, width, level) :
|
||||
width << level;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unexpected target");
|
||||
}
|
||||
|
||||
/* Guess a reasonable value for lastLevel. This is probably going
|
||||
* to be wrong fairly often and might mean that we have to look at
|
||||
* resizable buffers, or require that buffers implement lazy
|
||||
* pagetable arrangements.
|
||||
*/
|
||||
if ((brw_obj->base.Sampler.Attrib.MinFilter == GL_NEAREST ||
|
||||
brw_obj->base.Sampler.Attrib.MinFilter == GL_LINEAR) &&
|
||||
brw_image->base.Base.Level == 0 &&
|
||||
!brw_obj->base.Attrib.GenerateMipmap) {
|
||||
lastLevel = 0;
|
||||
} else {
|
||||
lastLevel = _mesa_get_tex_max_num_levels(brw_obj->base.Target,
|
||||
width, height, depth) - 1;
|
||||
}
|
||||
|
||||
return brw_miptree_create(brw,
|
||||
brw_obj->base.Target,
|
||||
brw_image->base.Base.TexFormat,
|
||||
0,
|
||||
lastLevel,
|
||||
width,
|
||||
height,
|
||||
depth,
|
||||
MAX2(brw_image->base.Base.NumSamples, 1),
|
||||
flags);
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_texsubimage_blorp(struct brw_context *brw, GLuint dims,
|
||||
struct gl_texture_image *tex_image,
|
||||
unsigned x, unsigned y, unsigned z,
|
||||
unsigned width, unsigned height, unsigned depth,
|
||||
GLenum format, GLenum type, const void *pixels,
|
||||
const struct gl_pixelstore_attrib *packing)
|
||||
{
|
||||
struct brw_texture_image *intel_image = brw_texture_image(tex_image);
|
||||
const unsigned mt_level = tex_image->Level + tex_image->TexObject->Attrib.MinLevel;
|
||||
const unsigned mt_z = tex_image->TexObject->Attrib.MinLayer + tex_image->Face + z;
|
||||
|
||||
/* The blorp path can't understand crazy format hackery */
|
||||
if (_mesa_base_tex_format(&brw->ctx, tex_image->InternalFormat) !=
|
||||
_mesa_get_format_base_format(tex_image->TexFormat))
|
||||
return false;
|
||||
|
||||
return brw_blorp_upload_miptree(brw, intel_image->mt, tex_image->TexFormat,
|
||||
mt_level, x, y, mt_z, width, height, depth,
|
||||
tex_image->TexObject->Target, format, type,
|
||||
pixels, packing);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief A fast path for glTexImage and glTexSubImage.
|
||||
*
|
||||
* This fast path is taken when the texture format is BGRA, RGBA,
|
||||
* A or L and when the texture memory is X- or Y-tiled. It uploads
|
||||
* the texture data by mapping the texture memory without a GTT fence, thus
|
||||
* acquiring a tiled view of the memory, and then copying sucessive
|
||||
* spans within each tile.
|
||||
*
|
||||
* This is a performance win over the conventional texture upload path because
|
||||
* it avoids the performance penalty of writing through the write-combine
|
||||
* buffer. In the conventional texture upload path,
|
||||
* texstore.c:store_texsubimage(), the texture memory is mapped through a GTT
|
||||
* fence, thus acquiring a linear view of the memory, then each row in the
|
||||
* image is memcpy'd. In this fast path, we replace each row's copy with
|
||||
* a sequence of copies over each linear span in tile.
|
||||
*
|
||||
* One use case is Google Chrome's paint rectangles. Chrome (as
|
||||
* of version 21) renders each page as a tiling of 256x256 GL_BGRA textures.
|
||||
* Each page's content is initially uploaded with glTexImage2D and damaged
|
||||
* regions are updated with glTexSubImage2D. On some workloads, the
|
||||
* performance gain of this fastpath on Sandybridge is over 5x.
|
||||
*/
|
||||
static bool
|
||||
brw_texsubimage_tiled_memcpy(struct gl_context * ctx,
|
||||
GLuint dims,
|
||||
struct gl_texture_image *texImage,
|
||||
GLint xoffset, GLint yoffset, GLint zoffset,
|
||||
GLsizei width, GLsizei height, GLsizei depth,
|
||||
GLenum format, GLenum type,
|
||||
const GLvoid *pixels,
|
||||
const struct gl_pixelstore_attrib *packing)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct brw_texture_image *image = brw_texture_image(texImage);
|
||||
int src_pitch;
|
||||
|
||||
/* The miptree's buffer. */
|
||||
struct brw_bo *bo;
|
||||
|
||||
uint32_t cpp;
|
||||
isl_memcpy_type copy_type;
|
||||
|
||||
/* This fastpath is restricted to specific texture types:
|
||||
* a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
|
||||
* more types.
|
||||
*
|
||||
* FINISHME: The restrictions below on packing alignment and packing row
|
||||
* length are likely unneeded now because we calculate the source stride
|
||||
* with _mesa_image_row_stride. However, before removing the restrictions
|
||||
* we need tests.
|
||||
*/
|
||||
if (!devinfo->has_llc ||
|
||||
!(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
|
||||
!(texImage->TexObject->Target == GL_TEXTURE_2D ||
|
||||
texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
|
||||
pixels == NULL ||
|
||||
packing->BufferObj ||
|
||||
packing->Alignment > 4 ||
|
||||
packing->SkipPixels > 0 ||
|
||||
packing->SkipRows > 0 ||
|
||||
(packing->RowLength != 0 && packing->RowLength != width) ||
|
||||
packing->SwapBytes ||
|
||||
packing->LsbFirst ||
|
||||
packing->Invert)
|
||||
return false;
|
||||
|
||||
/* Only a simple blit, no scale, bias or other mapping. */
|
||||
if (ctx->_ImageTransferState)
|
||||
return false;
|
||||
|
||||
copy_type = brw_miptree_get_memcpy_type(texImage->TexFormat, format, type,
|
||||
&cpp);
|
||||
if (copy_type == ISL_MEMCPY_INVALID)
|
||||
return false;
|
||||
|
||||
/* If this is a nontrivial texture view, let another path handle it instead. */
|
||||
if (texImage->TexObject->Attrib.MinLayer)
|
||||
return false;
|
||||
|
||||
if (!image->mt ||
|
||||
(image->mt->surf.tiling != ISL_TILING_X &&
|
||||
image->mt->surf.tiling != ISL_TILING_Y0)) {
|
||||
/* The algorithm is written only for X- or Y-tiled memory. */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* linear_to_tiled() assumes that if the object is swizzled, it is using
|
||||
* I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y. This is only
|
||||
* true on gfx5 and above.
|
||||
*
|
||||
* The killer on top is that some gfx4 have an L-shaped swizzle mode, where
|
||||
* parts of the memory aren't swizzled at all. Userspace just can't handle
|
||||
* that.
|
||||
*/
|
||||
if (devinfo->ver < 5 && devinfo->has_bit6_swizzle)
|
||||
return false;
|
||||
|
||||
int level = texImage->Level + texImage->TexObject->Attrib.MinLevel;
|
||||
|
||||
/* Since we are going to write raw data to the miptree, we need to resolve
|
||||
* any pending fast color clears before we start.
|
||||
*/
|
||||
assert(image->mt->surf.logical_level0_px.depth == 1);
|
||||
assert(image->mt->surf.logical_level0_px.array_len == 1);
|
||||
|
||||
brw_miptree_access_raw(brw, image->mt, level, 0, true);
|
||||
|
||||
bo = image->mt->bo;
|
||||
|
||||
if (brw_batch_references(&brw->batch, bo)) {
|
||||
perf_debug("Flushing before mapping a referenced bo.\n");
|
||||
brw_batch_flush(brw);
|
||||
}
|
||||
|
||||
void *map = brw_bo_map(brw, bo, MAP_WRITE | MAP_RAW);
|
||||
if (map == NULL) {
|
||||
DBG("%s: failed to map bo\n", __func__);
|
||||
return false;
|
||||
}
|
||||
|
||||
src_pitch = _mesa_image_row_stride(packing, width, format, type);
|
||||
|
||||
/* We postponed printing this message until having committed to executing
|
||||
* the function.
|
||||
*/
|
||||
DBG("%s: level=%d offset=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
|
||||
"mesa_format=0x%x tiling=%d "
|
||||
"packing=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d) ",
|
||||
__func__, texImage->Level, xoffset, yoffset, width, height,
|
||||
format, type, texImage->TexFormat, image->mt->surf.tiling,
|
||||
packing->Alignment, packing->RowLength, packing->SkipPixels,
|
||||
packing->SkipRows);
|
||||
|
||||
/* Adjust x and y offset based on miplevel */
|
||||
unsigned level_x, level_y;
|
||||
brw_miptree_get_image_offset(image->mt, level, 0, &level_x, &level_y);
|
||||
xoffset += level_x;
|
||||
yoffset += level_y;
|
||||
|
||||
isl_memcpy_linear_to_tiled(
|
||||
xoffset * cpp, (xoffset + width) * cpp,
|
||||
yoffset, yoffset + height,
|
||||
map,
|
||||
pixels,
|
||||
image->mt->surf.row_pitch_B, src_pitch,
|
||||
devinfo->has_bit6_swizzle,
|
||||
image->mt->surf.tiling,
|
||||
copy_type
|
||||
);
|
||||
|
||||
brw_bo_unmap(bo);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
brw_upload_tex(struct gl_context * ctx,
|
||||
GLuint dims,
|
||||
struct gl_texture_image *texImage,
|
||||
GLint xoffset, GLint yoffset, GLint zoffset,
|
||||
GLsizei width, GLsizei height, GLsizei depth,
|
||||
GLenum format, GLenum type,
|
||||
const GLvoid * pixels,
|
||||
const struct gl_pixelstore_attrib *packing)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_mipmap_tree *mt = brw_texture_image(texImage)->mt;
|
||||
bool ok;
|
||||
|
||||
/* Check that there is actually data to store. */
|
||||
if (pixels == NULL && !packing->BufferObj)
|
||||
return;
|
||||
|
||||
bool tex_busy = mt &&
|
||||
(brw_batch_references(&brw->batch, mt->bo) || brw_bo_busy(mt->bo));
|
||||
|
||||
if (packing->BufferObj || tex_busy ||
|
||||
mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
|
||||
ok = brw_texsubimage_blorp(brw, dims, texImage,
|
||||
xoffset, yoffset, zoffset,
|
||||
width, height, depth, format, type,
|
||||
pixels, packing);
|
||||
if (ok)
|
||||
return;
|
||||
}
|
||||
|
||||
ok = brw_texsubimage_tiled_memcpy(ctx, dims, texImage,
|
||||
xoffset, yoffset, zoffset,
|
||||
width, height, depth,
|
||||
format, type, pixels, packing);
|
||||
if (ok)
|
||||
return;
|
||||
|
||||
_mesa_store_texsubimage(ctx, dims, texImage,
|
||||
xoffset, yoffset, zoffset,
|
||||
width, height, depth,
|
||||
format, type, pixels, packing);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
brw_teximage(struct gl_context * ctx,
|
||||
GLuint dims,
|
||||
struct gl_texture_image *texImage,
|
||||
GLenum format, GLenum type, const void *pixels,
|
||||
const struct gl_pixelstore_attrib *unpack)
|
||||
{
|
||||
DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
|
||||
__func__, _mesa_get_format_name(texImage->TexFormat),
|
||||
_mesa_enum_to_string(texImage->TexObject->Target),
|
||||
_mesa_enum_to_string(format), _mesa_enum_to_string(type),
|
||||
texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
|
||||
|
||||
/* Allocate storage for texture data. */
|
||||
if (!ctx->Driver.AllocTextureImageBuffer(ctx, texImage)) {
|
||||
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage%uD", dims);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(brw_texture_image(texImage)->mt);
|
||||
|
||||
brw_upload_tex(ctx, dims, texImage, 0, 0, 0,
|
||||
texImage->Width, texImage->Height, texImage->Depth,
|
||||
format, type, pixels, unpack);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
brw_texsubimage(struct gl_context * ctx,
|
||||
GLuint dims,
|
||||
struct gl_texture_image *texImage,
|
||||
GLint xoffset, GLint yoffset, GLint zoffset,
|
||||
GLsizei width, GLsizei height, GLsizei depth,
|
||||
GLenum format, GLenum type,
|
||||
const GLvoid * pixels,
|
||||
const struct gl_pixelstore_attrib *packing)
|
||||
{
|
||||
DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
|
||||
__func__, _mesa_get_format_name(texImage->TexFormat),
|
||||
_mesa_enum_to_string(texImage->TexObject->Target),
|
||||
_mesa_enum_to_string(format), _mesa_enum_to_string(type),
|
||||
texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
|
||||
|
||||
brw_upload_tex(ctx, dims, texImage, xoffset, yoffset, zoffset,
|
||||
width, height, depth, format, type, pixels, packing);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
brw_set_texture_image_mt(struct brw_context *brw,
|
||||
struct gl_texture_image *image,
|
||||
GLenum internal_format,
|
||||
mesa_format format,
|
||||
struct brw_mipmap_tree *mt)
|
||||
|
||||
{
|
||||
struct gl_texture_object *texobj = image->TexObject;
|
||||
struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
|
||||
struct brw_texture_image *intel_image = brw_texture_image(image);
|
||||
|
||||
_mesa_init_teximage_fields(&brw->ctx, image,
|
||||
mt->surf.logical_level0_px.width,
|
||||
mt->surf.logical_level0_px.height, 1,
|
||||
0, internal_format, format);
|
||||
|
||||
brw->ctx.Driver.FreeTextureImageBuffer(&brw->ctx, image);
|
||||
|
||||
intel_texobj->needs_validate = true;
|
||||
intel_image->base.RowStride = mt->surf.row_pitch_B / mt->cpp;
|
||||
assert(mt->surf.row_pitch_B % mt->cpp == 0);
|
||||
|
||||
brw_miptree_reference(&intel_image->mt, mt);
|
||||
|
||||
/* Immediately validate the image to the object. */
|
||||
brw_miptree_reference(&intel_texobj->mt, mt);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
brw_set_texbuffer2(__DRIcontext *pDRICtx, GLint target,
|
||||
GLint texture_format,
|
||||
__DRIdrawable *dPriv)
|
||||
{
|
||||
struct gl_framebuffer *fb = dPriv->driverPrivate;
|
||||
struct brw_context *brw = pDRICtx->driverPrivate;
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct brw_renderbuffer *rb;
|
||||
struct gl_texture_object *texObj;
|
||||
struct gl_texture_image *texImage;
|
||||
mesa_format texFormat = MESA_FORMAT_NONE;
|
||||
GLenum internal_format = 0;
|
||||
|
||||
_mesa_glthread_finish(ctx);
|
||||
|
||||
texObj = _mesa_get_current_tex_object(ctx, target);
|
||||
|
||||
if (!texObj)
|
||||
return;
|
||||
|
||||
if (dPriv->lastStamp != dPriv->dri2.stamp ||
|
||||
!pDRICtx->driScreenPriv->dri2.useInvalidate)
|
||||
brw_update_renderbuffers(pDRICtx, dPriv);
|
||||
|
||||
rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
|
||||
/* If the miptree isn't set, then intel_update_renderbuffers was unable
|
||||
* to get the BO for the drawable from the window system.
|
||||
*/
|
||||
if (!rb || !rb->mt)
|
||||
return;
|
||||
|
||||
/* Neither the EGL and GLX texture_from_pixmap specs say anything about
|
||||
* sRGB. They are both from a time where sRGB was considered an extra
|
||||
* encoding step you did as part of rendering/blending and not a format.
|
||||
* Even though we have concept of sRGB visuals, X has classically assumed
|
||||
* that your data is just bits and sRGB rendering is entirely a client-side
|
||||
* rendering construct. The assumption is that the result of BindTexImage
|
||||
* is a texture with a linear format even if it was rendered with sRGB
|
||||
* encoding enabled.
|
||||
*/
|
||||
texFormat = _mesa_get_srgb_format_linear(brw_rb_format(rb));
|
||||
|
||||
if (rb->mt->cpp == 4) {
|
||||
/* The extra texture_format parameter indicates whether the alpha
|
||||
* channel should be respected or ignored. If we set internal_format to
|
||||
* GL_RGB, the texture handling code is smart enough to swap the format
|
||||
* or apply a swizzle if the underlying format is RGBA so we don't need
|
||||
* to stomp it to RGBX or anything like that.
|
||||
*/
|
||||
if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
|
||||
internal_format = GL_RGB;
|
||||
else
|
||||
internal_format = GL_RGBA;
|
||||
} else if (rb->mt->cpp == 2) {
|
||||
internal_format = GL_RGB;
|
||||
}
|
||||
|
||||
brw_miptree_finish_external(brw, rb->mt);
|
||||
|
||||
_mesa_lock_texture(&brw->ctx, texObj);
|
||||
texImage = _mesa_get_tex_image(ctx, texObj, target, 0);
|
||||
brw_set_texture_image_mt(brw, texImage, internal_format,
|
||||
texFormat, rb->mt);
|
||||
_mesa_unlock_texture(&brw->ctx, texObj);
|
||||
}
|
||||
|
||||
void
|
||||
brw_release_texbuffer(__DRIcontext *pDRICtx, GLint target,
|
||||
__DRIdrawable *dPriv)
|
||||
{
|
||||
struct brw_context *brw = pDRICtx->driverPrivate;
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct gl_texture_object *tex_obj;
|
||||
struct brw_texture_object *intel_tex;
|
||||
|
||||
tex_obj = _mesa_get_current_tex_object(ctx, target);
|
||||
if (!tex_obj)
|
||||
return;
|
||||
|
||||
_mesa_lock_texture(&brw->ctx, tex_obj);
|
||||
|
||||
intel_tex = brw_texture_object(tex_obj);
|
||||
if (!intel_tex->mt) {
|
||||
_mesa_unlock_texture(&brw->ctx, tex_obj);
|
||||
return;
|
||||
}
|
||||
|
||||
/* The brw_miptree_prepare_external below as well as the finish_external
|
||||
* above in brw_set_texbuffer2 *should* do nothing. The BindTexImage call
|
||||
* from both GLX and EGL has TexImage2D and not TexSubImage2D semantics so
|
||||
* the texture is not immutable. This means that the user cannot create a
|
||||
* texture view of the image with a different format. Since the only three
|
||||
* formats available when using BindTexImage are all UNORM, we can never
|
||||
* end up with an sRGB format being used for texturing and so we shouldn't
|
||||
* get any format-related resolves when texturing from it.
|
||||
*
|
||||
* While very unlikely, it is possible that the client could use the bound
|
||||
* texture with GL_ARB_image_load_store. In that case, we'll do a resolve
|
||||
* but that's not actually a problem as it just means that we lose
|
||||
* compression on this texture until the next time it's used as a render
|
||||
* target.
|
||||
*
|
||||
* The only other way we could end up with an unexpected aux usage would be
|
||||
* if we rendered to the image from the same context as we have it bound as
|
||||
* a texture between BindTexImage and ReleaseTexImage. However, the spec
|
||||
* clearly calls this case out and says you shouldn't do that. It doesn't
|
||||
* explicitly prevent binding the texture to a framebuffer but it says the
|
||||
* results of trying to render to it while bound are undefined.
|
||||
*
|
||||
* Just to keep everything safe and sane, we do a prepare_external but it
|
||||
* should be a no-op in almost all cases. On the off chance that someone
|
||||
* ever triggers this, we should at least warn them.
|
||||
*/
|
||||
if (intel_tex->mt->aux_buf &&
|
||||
brw_miptree_get_aux_state(intel_tex->mt, 0, 0) !=
|
||||
isl_drm_modifier_get_default_aux_state(intel_tex->mt->drm_modifier)) {
|
||||
_mesa_warning(ctx, "Aux state changed between BindTexImage and "
|
||||
"ReleaseTexImage. Most likely someone tried to draw "
|
||||
"to the pixmap bound in BindTexImage or used it with "
|
||||
"image_load_store.");
|
||||
}
|
||||
|
||||
brw_miptree_prepare_external(brw, intel_tex->mt);
|
||||
|
||||
_mesa_unlock_texture(&brw->ctx, tex_obj);
|
||||
}
|
||||
|
||||
static GLboolean
|
||||
brw_bind_renderbuffer_tex_image(struct gl_context *ctx,
|
||||
struct gl_renderbuffer *rb,
|
||||
struct gl_texture_image *image)
|
||||
{
|
||||
struct brw_renderbuffer *irb = brw_renderbuffer(rb);
|
||||
struct brw_texture_image *intel_image = brw_texture_image(image);
|
||||
struct gl_texture_object *texobj = image->TexObject;
|
||||
struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
|
||||
|
||||
/* We can only handle RB allocated with AllocRenderbufferStorage, or
|
||||
* window-system renderbuffers.
|
||||
*/
|
||||
assert(!rb->TexImage);
|
||||
|
||||
if (!irb->mt)
|
||||
return false;
|
||||
|
||||
_mesa_lock_texture(ctx, texobj);
|
||||
_mesa_init_teximage_fields(ctx, image, rb->Width, rb->Height, 1, 0,
|
||||
rb->InternalFormat, rb->Format);
|
||||
image->NumSamples = rb->NumSamples;
|
||||
|
||||
brw_miptree_reference(&intel_image->mt, irb->mt);
|
||||
|
||||
/* Immediately validate the image to the object. */
|
||||
brw_miptree_reference(&intel_texobj->mt, intel_image->mt);
|
||||
|
||||
intel_texobj->needs_validate = true;
|
||||
_mesa_unlock_texture(ctx, texobj);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
brw_set_texbuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
|
||||
{
|
||||
/* The old interface didn't have the format argument, so copy our
|
||||
* implementation's behavior at the time.
|
||||
*/
|
||||
brw_set_texbuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_image_target_texture(struct gl_context *ctx, GLenum target,
|
||||
struct gl_texture_object *texObj,
|
||||
struct gl_texture_image *texImage,
|
||||
GLeglImageOES image_handle,
|
||||
bool storage)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_mipmap_tree *mt;
|
||||
__DRIscreen *dri_screen = brw->screen->driScrnPriv;
|
||||
__DRIimage *image;
|
||||
|
||||
image = dri_screen->dri2.image->lookupEGLImage(dri_screen, image_handle,
|
||||
dri_screen->loaderPrivate);
|
||||
if (image == NULL)
|
||||
return;
|
||||
|
||||
/* Disallow depth/stencil textures: we don't have a way to pass the
|
||||
* separate stencil miptree of a GL_DEPTH_STENCIL texture through.
|
||||
*/
|
||||
if (image->has_depthstencil) {
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION, __func__);
|
||||
return;
|
||||
}
|
||||
|
||||
mt = brw_miptree_create_for_dri_image(brw, image, target, image->format,
|
||||
false);
|
||||
if (mt == NULL)
|
||||
return;
|
||||
|
||||
struct brw_texture_object *intel_texobj = brw_texture_object(texObj);
|
||||
intel_texobj->planar_format = image->planar_format;
|
||||
intel_texobj->yuv_color_space = image->yuv_color_space;
|
||||
|
||||
GLenum internal_format =
|
||||
image->internal_format != 0 ?
|
||||
image->internal_format : _mesa_get_format_base_format(mt->format);
|
||||
|
||||
/* Fix the internal format when _mesa_get_format_base_format(mt->format)
|
||||
* isn't a valid one for that particular format.
|
||||
*/
|
||||
if (brw->mesa_format_supports_render[image->format]) {
|
||||
if (image->format == MESA_FORMAT_R10G10B10A2_UNORM ||
|
||||
image->format == MESA_FORMAT_R10G10B10X2_UNORM ||
|
||||
image->format == MESA_FORMAT_B10G10R10A2_UNORM ||
|
||||
image->format == MESA_FORMAT_B10G10R10X2_UNORM)
|
||||
internal_format = GL_RGB10_A2;
|
||||
}
|
||||
|
||||
/* Guess sized internal format for dma-bufs, as specified by
|
||||
* EXT_EGL_image_storage.
|
||||
*/
|
||||
if (storage && target == GL_TEXTURE_2D && image->imported_dmabuf) {
|
||||
internal_format = driGLFormatToSizedInternalGLFormat(image->format);
|
||||
if (internal_format == GL_NONE) {
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION, __func__);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
brw_set_texture_image_mt(brw, texImage, internal_format, mt->format, mt);
|
||||
brw_miptree_release(&mt);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_image_target_texture_2d(struct gl_context *ctx, GLenum target,
|
||||
struct gl_texture_object *texObj,
|
||||
struct gl_texture_image *texImage,
|
||||
GLeglImageOES image_handle)
|
||||
{
|
||||
brw_image_target_texture(ctx, target, texObj, texImage, image_handle,
|
||||
false);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_image_target_tex_storage(struct gl_context *ctx, GLenum target,
|
||||
struct gl_texture_object *texObj,
|
||||
struct gl_texture_image *texImage,
|
||||
GLeglImageOES image_handle)
|
||||
{
|
||||
struct brw_texture_object *intel_texobj = brw_texture_object(texObj);
|
||||
brw_image_target_texture(ctx, target, texObj, texImage, image_handle,
|
||||
true);
|
||||
|
||||
/* The miptree is in a validated state, so no need to check later. */
|
||||
intel_texobj->needs_validate = false;
|
||||
intel_texobj->validated_first_level = 0;
|
||||
intel_texobj->validated_last_level = 0;
|
||||
intel_texobj->_Format = texImage->TexFormat;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_gettexsubimage_blorp(struct brw_context *brw,
|
||||
struct gl_texture_image *tex_image,
|
||||
unsigned x, unsigned y, unsigned z,
|
||||
unsigned width, unsigned height, unsigned depth,
|
||||
GLenum format, GLenum type, const void *pixels,
|
||||
const struct gl_pixelstore_attrib *packing)
|
||||
{
|
||||
struct brw_texture_image *intel_image = brw_texture_image(tex_image);
|
||||
const unsigned mt_level = tex_image->Level + tex_image->TexObject->Attrib.MinLevel;
|
||||
const unsigned mt_z = tex_image->TexObject->Attrib.MinLayer + tex_image->Face + z;
|
||||
|
||||
/* The blorp path can't understand crazy format hackery */
|
||||
if (_mesa_base_tex_format(&brw->ctx, tex_image->InternalFormat) !=
|
||||
_mesa_get_format_base_format(tex_image->TexFormat))
|
||||
return false;
|
||||
|
||||
return brw_blorp_download_miptree(brw, intel_image->mt,
|
||||
tex_image->TexFormat, SWIZZLE_XYZW,
|
||||
mt_level, x, y, mt_z,
|
||||
width, height, depth,
|
||||
tex_image->TexObject->Target,
|
||||
format, type, false, pixels, packing);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief A fast path for glGetTexImage.
|
||||
*
|
||||
* \see brw_readpixels_tiled_memcpy()
|
||||
*/
|
||||
static bool
|
||||
brw_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
|
||||
struct gl_texture_image *texImage,
|
||||
GLint xoffset, GLint yoffset,
|
||||
GLsizei width, GLsizei height,
|
||||
GLenum format, GLenum type,
|
||||
GLvoid *pixels,
|
||||
const struct gl_pixelstore_attrib *packing)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct brw_texture_image *image = brw_texture_image(texImage);
|
||||
int dst_pitch;
|
||||
|
||||
/* The miptree's buffer. */
|
||||
struct brw_bo *bo;
|
||||
|
||||
uint32_t cpp;
|
||||
isl_memcpy_type copy_type;
|
||||
|
||||
/* This fastpath is restricted to specific texture types:
|
||||
* a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
|
||||
* more types.
|
||||
*
|
||||
* FINISHME: The restrictions below on packing alignment and packing row
|
||||
* length are likely unneeded now because we calculate the destination stride
|
||||
* with _mesa_image_row_stride. However, before removing the restrictions
|
||||
* we need tests.
|
||||
*/
|
||||
if (!devinfo->has_llc ||
|
||||
!(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
|
||||
!(texImage->TexObject->Target == GL_TEXTURE_2D ||
|
||||
texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
|
||||
pixels == NULL ||
|
||||
packing->BufferObj ||
|
||||
packing->Alignment > 4 ||
|
||||
packing->SkipPixels > 0 ||
|
||||
packing->SkipRows > 0 ||
|
||||
(packing->RowLength != 0 && packing->RowLength != width) ||
|
||||
packing->SwapBytes ||
|
||||
packing->LsbFirst ||
|
||||
packing->Invert)
|
||||
return false;
|
||||
|
||||
/* We can't handle copying from RGBX or BGRX because the tiled_memcpy
|
||||
* function doesn't set the last channel to 1. Note this checks BaseFormat
|
||||
* rather than TexFormat in case the RGBX format is being simulated with an
|
||||
* RGBA format.
|
||||
*/
|
||||
if (texImage->_BaseFormat == GL_RGB)
|
||||
return false;
|
||||
|
||||
copy_type = brw_miptree_get_memcpy_type(texImage->TexFormat, format, type,
|
||||
&cpp);
|
||||
if (copy_type == ISL_MEMCPY_INVALID)
|
||||
return false;
|
||||
|
||||
/* If this is a nontrivial texture view, let another path handle it instead. */
|
||||
if (texImage->TexObject->Attrib.MinLayer)
|
||||
return false;
|
||||
|
||||
if (!image->mt ||
|
||||
(image->mt->surf.tiling != ISL_TILING_X &&
|
||||
image->mt->surf.tiling != ISL_TILING_Y0)) {
|
||||
/* The algorithm is written only for X- or Y-tiled memory. */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* tiled_to_linear() assumes that if the object is swizzled, it is using
|
||||
* I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y. This is only
|
||||
* true on gfx5 and above.
|
||||
*
|
||||
* The killer on top is that some gfx4 have an L-shaped swizzle mode, where
|
||||
* parts of the memory aren't swizzled at all. Userspace just can't handle
|
||||
* that.
|
||||
*/
|
||||
if (devinfo->ver < 5 && devinfo->has_bit6_swizzle)
|
||||
return false;
|
||||
|
||||
int level = texImage->Level + texImage->TexObject->Attrib.MinLevel;
|
||||
|
||||
/* Since we are going to write raw data to the miptree, we need to resolve
|
||||
* any pending fast color clears before we start.
|
||||
*/
|
||||
assert(image->mt->surf.logical_level0_px.depth == 1);
|
||||
assert(image->mt->surf.logical_level0_px.array_len == 1);
|
||||
|
||||
brw_miptree_access_raw(brw, image->mt, level, 0, true);
|
||||
|
||||
bo = image->mt->bo;
|
||||
|
||||
if (brw_batch_references(&brw->batch, bo)) {
|
||||
perf_debug("Flushing before mapping a referenced bo.\n");
|
||||
brw_batch_flush(brw);
|
||||
}
|
||||
|
||||
void *map = brw_bo_map(brw, bo, MAP_READ | MAP_RAW);
|
||||
if (map == NULL) {
|
||||
DBG("%s: failed to map bo\n", __func__);
|
||||
return false;
|
||||
}
|
||||
|
||||
dst_pitch = _mesa_image_row_stride(packing, width, format, type);
|
||||
|
||||
DBG("%s: level=%d x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
|
||||
"mesa_format=0x%x tiling=%d "
|
||||
"packing=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n",
|
||||
__func__, texImage->Level, xoffset, yoffset, width, height,
|
||||
format, type, texImage->TexFormat, image->mt->surf.tiling,
|
||||
packing->Alignment, packing->RowLength, packing->SkipPixels,
|
||||
packing->SkipRows);
|
||||
|
||||
/* Adjust x and y offset based on miplevel */
|
||||
unsigned level_x, level_y;
|
||||
brw_miptree_get_image_offset(image->mt, level, 0, &level_x, &level_y);
|
||||
xoffset += level_x;
|
||||
yoffset += level_y;
|
||||
|
||||
isl_memcpy_tiled_to_linear(
|
||||
xoffset * cpp, (xoffset + width) * cpp,
|
||||
yoffset, yoffset + height,
|
||||
pixels,
|
||||
map,
|
||||
dst_pitch, image->mt->surf.row_pitch_B,
|
||||
devinfo->has_bit6_swizzle,
|
||||
image->mt->surf.tiling,
|
||||
copy_type
|
||||
);
|
||||
|
||||
brw_bo_unmap(bo);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_get_tex_sub_image(struct gl_context *ctx,
|
||||
GLint xoffset, GLint yoffset, GLint zoffset,
|
||||
GLsizei width, GLsizei height, GLint depth,
|
||||
GLenum format, GLenum type, GLvoid *pixels,
|
||||
struct gl_texture_image *texImage)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
bool ok;
|
||||
|
||||
DBG("%s\n", __func__);
|
||||
|
||||
if (ctx->Pack.BufferObj) {
|
||||
if (brw_gettexsubimage_blorp(brw, texImage,
|
||||
xoffset, yoffset, zoffset,
|
||||
width, height, depth, format, type,
|
||||
pixels, &ctx->Pack))
|
||||
return;
|
||||
|
||||
perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
|
||||
}
|
||||
|
||||
ok = brw_gettexsubimage_tiled_memcpy(ctx, texImage, xoffset, yoffset,
|
||||
width, height,
|
||||
format, type, pixels, &ctx->Pack);
|
||||
|
||||
if(ok)
|
||||
return;
|
||||
|
||||
_mesa_meta_GetTexSubImage(ctx, xoffset, yoffset, zoffset,
|
||||
width, height, depth,
|
||||
format, type, pixels, texImage);
|
||||
|
||||
DBG("%s - DONE\n", __func__);
|
||||
}
|
||||
|
||||
static void
|
||||
flush_astc_denorms(struct gl_context *ctx, GLuint dims,
|
||||
struct gl_texture_image *texImage,
|
||||
GLint xoffset, GLint yoffset, GLint zoffset,
|
||||
GLsizei width, GLsizei height, GLsizei depth)
|
||||
{
|
||||
struct compressed_pixelstore store;
|
||||
_mesa_compute_compressed_pixelstore(dims, texImage->TexFormat,
|
||||
width, height, depth,
|
||||
&ctx->Unpack, &store);
|
||||
|
||||
for (int slice = 0; slice < store.CopySlices; slice++) {
|
||||
|
||||
/* Map dest texture buffer */
|
||||
GLubyte *dstMap;
|
||||
GLint dstRowStride;
|
||||
ctx->Driver.MapTextureImage(ctx, texImage, slice + zoffset,
|
||||
xoffset, yoffset, width, height,
|
||||
GL_MAP_READ_BIT | GL_MAP_WRITE_BIT,
|
||||
&dstMap, &dstRowStride);
|
||||
if (!dstMap)
|
||||
continue;
|
||||
|
||||
for (int i = 0; i < store.CopyRowsPerSlice; i++) {
|
||||
|
||||
/* An ASTC block is stored in little endian mode. The byte that
|
||||
* contains bits 0..7 is stored at the lower address in memory.
|
||||
*/
|
||||
struct astc_void_extent {
|
||||
uint16_t header : 12;
|
||||
uint16_t dontcare[3];
|
||||
uint16_t R;
|
||||
uint16_t G;
|
||||
uint16_t B;
|
||||
uint16_t A;
|
||||
} *blocks = (struct astc_void_extent*) dstMap;
|
||||
|
||||
/* Iterate over every copied block in the row */
|
||||
for (int j = 0; j < store.CopyBytesPerRow / 16; j++) {
|
||||
|
||||
/* Check if the header matches that of an LDR void-extent block */
|
||||
if (blocks[j].header == 0xDFC) {
|
||||
|
||||
/* Flush UNORM16 values that would be denormalized */
|
||||
if (blocks[j].A < 4) blocks[j].A = 0;
|
||||
if (blocks[j].B < 4) blocks[j].B = 0;
|
||||
if (blocks[j].G < 4) blocks[j].G = 0;
|
||||
if (blocks[j].R < 4) blocks[j].R = 0;
|
||||
}
|
||||
}
|
||||
|
||||
dstMap += dstRowStride;
|
||||
}
|
||||
|
||||
ctx->Driver.UnmapTextureImage(ctx, texImage, slice + zoffset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
brw_compressedtexsubimage(struct gl_context *ctx, GLuint dims,
|
||||
struct gl_texture_image *texImage,
|
||||
GLint xoffset, GLint yoffset, GLint zoffset,
|
||||
GLsizei width, GLsizei height, GLsizei depth,
|
||||
GLenum format,
|
||||
GLsizei imageSize, const GLvoid *data)
|
||||
{
|
||||
/* Upload the compressed data blocks */
|
||||
_mesa_store_compressed_texsubimage(ctx, dims, texImage,
|
||||
xoffset, yoffset, zoffset,
|
||||
width, height, depth,
|
||||
format, imageSize, data);
|
||||
|
||||
/* Fix up copied ASTC blocks if necessary */
|
||||
GLenum gl_format = _mesa_compressed_format_to_glenum(ctx,
|
||||
texImage->TexFormat);
|
||||
bool is_linear_astc = _mesa_is_astc_format(gl_format) &&
|
||||
!_mesa_is_srgb_format(gl_format);
|
||||
struct brw_context *brw = (struct brw_context*) ctx;
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
if (devinfo->ver == 9 &&
|
||||
!intel_device_info_is_9lp(devinfo) &&
|
||||
is_linear_astc)
|
||||
flush_astc_denorms(ctx, dims, texImage,
|
||||
xoffset, yoffset, zoffset,
|
||||
width, height, depth);
|
||||
}
|
||||
|
||||
void
|
||||
brw_init_texture_image_functions(struct dd_function_table *functions)
|
||||
{
|
||||
functions->TexImage = brw_teximage;
|
||||
functions->TexSubImage = brw_texsubimage;
|
||||
functions->CompressedTexSubImage = brw_compressedtexsubimage;
|
||||
functions->EGLImageTargetTexture2D = brw_image_target_texture_2d;
|
||||
functions->EGLImageTargetTexStorage = brw_image_target_tex_storage;
|
||||
functions->BindRenderbufferTexImage = brw_bind_renderbuffer_tex_image;
|
||||
functions->GetTexSubImage = brw_get_tex_sub_image;
|
||||
}
|
||||
|
|
@ -1,101 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _BRW_TEX_OBJ_H
|
||||
#define _BRW_TEX_OBJ_H
|
||||
|
||||
#include "swrast/s_context.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct brw_texture_object
|
||||
{
|
||||
struct gl_texture_object base;
|
||||
|
||||
/* This is a mirror of base._MaxLevel, updated at validate time,
|
||||
* except that we don't bother with the non-base levels for
|
||||
* non-mipmapped textures.
|
||||
*/
|
||||
unsigned int _MaxLevel;
|
||||
|
||||
unsigned int validated_first_level;
|
||||
unsigned int validated_last_level;
|
||||
|
||||
/* The miptree of pixel data for the texture (if !needs_validate). After
|
||||
* validation, the images will also have references to the same mt.
|
||||
*/
|
||||
struct brw_mipmap_tree *mt;
|
||||
|
||||
/**
|
||||
* Set when mipmap trees in the texture images of this texture object
|
||||
* might not all be the mipmap tree above.
|
||||
*/
|
||||
bool needs_validate;
|
||||
|
||||
/* Mesa format for the validated texture object. For non-views this
|
||||
* will always be the same as texObj->Image[0][0].TexFormat. For views, it
|
||||
* may differ since the mt is shared across views with differing formats.
|
||||
*/
|
||||
mesa_format _Format;
|
||||
|
||||
const struct brw_image_format *planar_format;
|
||||
unsigned int yuv_color_space;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* brw_texture_image is a subclass of swrast_texture_image because we
|
||||
* sometimes fall back to using the swrast module for software rendering.
|
||||
*/
|
||||
struct brw_texture_image
|
||||
{
|
||||
struct swrast_texture_image base;
|
||||
|
||||
/* If brw_image->mt != NULL, image data is stored here.
|
||||
* Else if brw_image->base.Buffer != NULL, image is stored there.
|
||||
* Else there is no image data.
|
||||
*/
|
||||
struct brw_mipmap_tree *mt;
|
||||
};
|
||||
|
||||
static inline struct brw_texture_object *
|
||||
brw_texture_object(struct gl_texture_object *obj)
|
||||
{
|
||||
return (struct brw_texture_object *) obj;
|
||||
}
|
||||
|
||||
static inline struct brw_texture_image *
|
||||
brw_texture_image(struct gl_texture_image *img)
|
||||
{
|
||||
return (struct brw_texture_image *) img;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _BRW_TEX_OBJ_H */
|
||||
|
|
@ -1,223 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "main/macros.h"
|
||||
#include "main/samplerobj.h"
|
||||
#include "main/teximage.h"
|
||||
#include "main/texobj.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
#include "brw_tex.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_TEXTURE
|
||||
|
||||
/**
|
||||
* Sets our driver-specific variant of tObj->_MaxLevel for later surface state
|
||||
* upload.
|
||||
*
|
||||
* If we're only ensuring that there is storage for the first miplevel of a
|
||||
* texture, then in texture setup we're going to have to make sure we don't
|
||||
* allow sampling beyond level 0.
|
||||
*/
|
||||
static void
|
||||
brw_update_max_level(struct gl_texture_object *tObj,
|
||||
struct gl_sampler_object *sampler)
|
||||
{
|
||||
struct brw_texture_object *brw_obj = brw_texture_object(tObj);
|
||||
|
||||
if (!tObj->_MipmapComplete ||
|
||||
(tObj->_RenderToTexture &&
|
||||
(sampler->Attrib.MinFilter == GL_NEAREST ||
|
||||
sampler->Attrib.MinFilter == GL_LINEAR))) {
|
||||
brw_obj->_MaxLevel = tObj->Attrib.BaseLevel;
|
||||
} else {
|
||||
brw_obj->_MaxLevel = tObj->_MaxLevel;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* At rendering-from-a-texture time, make sure that the texture object has a
|
||||
* miptree that can hold the entire texture based on
|
||||
* BaseLevel/MaxLevel/filtering, and copy in any texture images that are
|
||||
* stored in other miptrees.
|
||||
*/
|
||||
void
|
||||
brw_finalize_mipmap_tree(struct brw_context *brw,
|
||||
struct gl_texture_object *tObj)
|
||||
{
|
||||
struct brw_texture_object *brw_obj = brw_texture_object(tObj);
|
||||
GLuint face, i;
|
||||
GLuint nr_faces = 0;
|
||||
struct brw_texture_image *firstImage;
|
||||
int width, height, depth;
|
||||
|
||||
/* TBOs require no validation -- they always just point to their BO. */
|
||||
if (tObj->Target == GL_TEXTURE_BUFFER)
|
||||
return;
|
||||
|
||||
/* What levels does this validated texture image require? */
|
||||
int validate_first_level = tObj->Attrib.BaseLevel;
|
||||
int validate_last_level = brw_obj->_MaxLevel;
|
||||
|
||||
/* Skip the loop over images in the common case of no images having
|
||||
* changed. But if the GL_BASE_LEVEL or GL_MAX_LEVEL change to something we
|
||||
* haven't looked at, then we do need to look at those new images.
|
||||
*/
|
||||
if (!brw_obj->needs_validate &&
|
||||
validate_first_level >= brw_obj->validated_first_level &&
|
||||
validate_last_level <= brw_obj->validated_last_level) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* On recent generations, immutable textures should not get this far
|
||||
* -- they should have been created in a validated state, and nothing
|
||||
* can invalidate them.
|
||||
*
|
||||
* Unfortunately, this is not true on pre-Sandybridge hardware -- when
|
||||
* rendering into an immutable-format depth texture we may have to rebase
|
||||
* the rendered levels to meet alignment requirements.
|
||||
*
|
||||
* FINISHME: Avoid doing this.
|
||||
*/
|
||||
assert(!tObj->Immutable || brw->screen->devinfo.ver < 6);
|
||||
|
||||
firstImage = brw_texture_image(tObj->Image[0][tObj->Attrib.BaseLevel]);
|
||||
if (!firstImage)
|
||||
return;
|
||||
|
||||
/* Check tree can hold all active levels. Check tree matches
|
||||
* target, imageFormat, etc.
|
||||
*/
|
||||
if (brw_obj->mt &&
|
||||
(!brw_miptree_match_image(brw_obj->mt, &firstImage->base.Base) ||
|
||||
validate_first_level < brw_obj->mt->first_level ||
|
||||
validate_last_level > brw_obj->mt->last_level)) {
|
||||
brw_miptree_release(&brw_obj->mt);
|
||||
}
|
||||
|
||||
|
||||
/* May need to create a new tree:
|
||||
*/
|
||||
if (!brw_obj->mt) {
|
||||
const unsigned level = firstImage->base.Base.Level;
|
||||
brw_get_image_dims(&firstImage->base.Base, &width, &height, &depth);
|
||||
/* Figure out image dimensions at start level. */
|
||||
switch(brw_obj->base.Target) {
|
||||
case GL_TEXTURE_2D_MULTISAMPLE:
|
||||
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
|
||||
case GL_TEXTURE_RECTANGLE:
|
||||
case GL_TEXTURE_EXTERNAL_OES:
|
||||
assert(level == 0);
|
||||
break;
|
||||
case GL_TEXTURE_3D:
|
||||
depth = depth << level;
|
||||
FALLTHROUGH;
|
||||
case GL_TEXTURE_2D:
|
||||
case GL_TEXTURE_2D_ARRAY:
|
||||
case GL_TEXTURE_CUBE_MAP:
|
||||
case GL_TEXTURE_CUBE_MAP_ARRAY:
|
||||
height = height << level;
|
||||
FALLTHROUGH;
|
||||
case GL_TEXTURE_1D:
|
||||
case GL_TEXTURE_1D_ARRAY:
|
||||
width = width << level;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unexpected target");
|
||||
}
|
||||
perf_debug("Creating new %s %dx%dx%d %d-level miptree to handle "
|
||||
"finalized texture miptree.\n",
|
||||
_mesa_get_format_name(firstImage->base.Base.TexFormat),
|
||||
width, height, depth, validate_last_level + 1);
|
||||
|
||||
brw_obj->mt = brw_miptree_create(brw,
|
||||
brw_obj->base.Target,
|
||||
firstImage->base.Base.TexFormat,
|
||||
0, /* first_level */
|
||||
validate_last_level,
|
||||
width,
|
||||
height,
|
||||
depth,
|
||||
1 /* num_samples */,
|
||||
MIPTREE_CREATE_BUSY);
|
||||
if (!brw_obj->mt)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Pull in any images not in the object's tree:
|
||||
*/
|
||||
nr_faces = _mesa_num_tex_faces(brw_obj->base.Target);
|
||||
for (face = 0; face < nr_faces; face++) {
|
||||
for (i = validate_first_level; i <= validate_last_level; i++) {
|
||||
struct brw_texture_image *brw_image =
|
||||
brw_texture_image(brw_obj->base.Image[face][i]);
|
||||
/* skip too small size mipmap */
|
||||
if (brw_image == NULL)
|
||||
break;
|
||||
|
||||
if (brw_obj->mt != brw_image->mt)
|
||||
brw_miptree_copy_teximage(brw, brw_image, brw_obj->mt);
|
||||
|
||||
/* After we're done, we'd better agree that our layout is
|
||||
* appropriate, or we'll end up hitting this function again on the
|
||||
* next draw
|
||||
*/
|
||||
assert(brw_miptree_match_image(brw_obj->mt, &brw_image->base.Base));
|
||||
}
|
||||
}
|
||||
|
||||
brw_obj->validated_first_level = validate_first_level;
|
||||
brw_obj->validated_last_level = validate_last_level;
|
||||
brw_obj->_Format = firstImage->base.Base.TexFormat,
|
||||
brw_obj->needs_validate = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finalizes all textures, completing any rendering that needs to be done
|
||||
* to prepare them.
|
||||
*/
|
||||
void
|
||||
brw_validate_textures(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
const int max_enabled_unit = ctx->Texture._MaxEnabledTexImageUnit;
|
||||
|
||||
for (int unit = 0; unit <= max_enabled_unit; unit++) {
|
||||
struct gl_texture_object *tex_obj = ctx->Texture.Unit[unit]._Current;
|
||||
|
||||
if (!tex_obj)
|
||||
continue;
|
||||
|
||||
struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
|
||||
|
||||
/* We know that this is true by now, and if it wasn't, we might have
|
||||
* mismatched level sizes and the copies would fail.
|
||||
*/
|
||||
assert(tex_obj->_BaseComplete);
|
||||
|
||||
brw_update_max_level(tex_obj, sampler);
|
||||
brw_finalize_mipmap_tree(brw, tex_obj);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,134 +0,0 @@
|
|||
/*
|
||||
* Copyright 2003 VMware, Inc.
|
||||
* Copyright © 2007 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file intel_upload.c
|
||||
*
|
||||
* Batched upload via BOs.
|
||||
*/
|
||||
|
||||
#include "main/macros.h"
|
||||
#include "brw_bufmgr.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_buffer_objects.h"
|
||||
|
||||
void
|
||||
brw_upload_finish(struct brw_uploader *upload)
|
||||
{
|
||||
assert((upload->bo == NULL) == (upload->map == NULL));
|
||||
if (!upload->bo)
|
||||
return;
|
||||
|
||||
brw_bo_unmap(upload->bo);
|
||||
brw_bo_unreference(upload->bo);
|
||||
upload->bo = NULL;
|
||||
upload->map = NULL;
|
||||
upload->next_offset = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface for getting memory for uploading streamed data to the GPU
|
||||
*
|
||||
* In most cases, streamed data (for GPU state structures, for example) is
|
||||
* uploaded through brw_state_batch(), since that interface allows relocations
|
||||
* from the streamed space returned to other BOs. However, that interface has
|
||||
* the restriction that the amount of space allocated has to be "small".
|
||||
*
|
||||
* This interface, on the other hand, is able to handle arbitrary sized
|
||||
* allocation requests, though it will batch small allocations into the same
|
||||
* BO for efficiency and reduced memory footprint.
|
||||
*
|
||||
* \note The returned pointer is valid only until brw_upload_finish().
|
||||
*
|
||||
* \param out_bo Pointer to a BO, which must point to a valid BO or NULL on
|
||||
* entry, and will have a reference to the new BO containing the state on
|
||||
* return.
|
||||
*
|
||||
* \param out_offset Offset within the buffer object that the data will land.
|
||||
*/
|
||||
void *
|
||||
brw_upload_space(struct brw_uploader *upload,
|
||||
uint32_t size,
|
||||
uint32_t alignment,
|
||||
struct brw_bo **out_bo,
|
||||
uint32_t *out_offset)
|
||||
{
|
||||
uint32_t offset;
|
||||
|
||||
offset = ALIGN_NPOT(upload->next_offset, alignment);
|
||||
if (upload->bo && offset + size > upload->bo->size) {
|
||||
brw_upload_finish(upload);
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
assert((upload->bo == NULL) == (upload->map == NULL));
|
||||
if (!upload->bo) {
|
||||
upload->bo = brw_bo_alloc(upload->bufmgr, "streamed data",
|
||||
MAX2(upload->default_size, size),
|
||||
BRW_MEMZONE_OTHER);
|
||||
upload->map = brw_bo_map(NULL, upload->bo,
|
||||
MAP_READ | MAP_WRITE |
|
||||
MAP_PERSISTENT | MAP_ASYNC);
|
||||
}
|
||||
|
||||
upload->next_offset = offset + size;
|
||||
|
||||
*out_offset = offset;
|
||||
if (*out_bo != upload->bo) {
|
||||
brw_bo_unreference(*out_bo);
|
||||
*out_bo = upload->bo;
|
||||
brw_bo_reference(upload->bo);
|
||||
}
|
||||
|
||||
return upload->map + offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handy interface to upload some data to temporary GPU memory quickly.
|
||||
*
|
||||
* References to this memory should not be retained across batch flushes.
|
||||
*/
|
||||
void
|
||||
brw_upload_data(struct brw_uploader *upload,
|
||||
const void *data,
|
||||
uint32_t size,
|
||||
uint32_t alignment,
|
||||
struct brw_bo **out_bo,
|
||||
uint32_t *out_offset)
|
||||
{
|
||||
void *dst = brw_upload_space(upload, size, alignment, out_bo, out_offset);
|
||||
memcpy(dst, data, size);
|
||||
}
|
||||
|
||||
void
|
||||
brw_upload_init(struct brw_uploader *upload,
|
||||
struct brw_bufmgr *bufmgr,
|
||||
unsigned default_size)
|
||||
{
|
||||
upload->bufmgr = bufmgr;
|
||||
upload->bo = NULL;
|
||||
upload->map = NULL;
|
||||
upload->next_offset = 0;
|
||||
upload->default_size = default_size;
|
||||
}
|
||||
|
|
@ -1,268 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "brw_batch.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_defines.h"
|
||||
|
||||
#define VS 0
|
||||
#define GS 1
|
||||
#define CLP 2
|
||||
#define SF 3
|
||||
#define CS 4
|
||||
|
||||
/** @file brw_urb.c
|
||||
*
|
||||
* Manages the division of the URB space between the various fixed-function
|
||||
* units.
|
||||
*
|
||||
* See the Thread Initiation Management section of the GFX4 B-Spec, and
|
||||
* the individual *_STATE structures for restrictions on numbers of
|
||||
* entries and threads.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Generally, a unit requires a min_nr_entries based on how many entries
|
||||
* it produces before the downstream unit gets unblocked and can use and
|
||||
* dereference some of its handles.
|
||||
*
|
||||
* The SF unit preallocates a PUE at the start of thread dispatch, and only
|
||||
* uses that one. So it requires one entry per thread.
|
||||
*
|
||||
* For CLIP, the SF unit will hold the previous primitive while the
|
||||
* next is getting assembled, meaning that linestrips require 3 CLIP VUEs
|
||||
* (vertices) to ensure continued processing, trifans require 4, and tristrips
|
||||
* require 5. There can be 1 or 2 threads, and each has the same requirement.
|
||||
*
|
||||
* GS has the same requirement as CLIP, but it never handles tristrips,
|
||||
* so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
|
||||
* We only run it single-threaded.
|
||||
*
|
||||
* For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
|
||||
* Each thread processes 2 preallocated VUEs (vertices) at a time, and they
|
||||
* get streamed down as soon as threads processing earlier vertices get
|
||||
* theirs accepted.
|
||||
*
|
||||
* Each unit will take the number of URB entries we give it (based on the
|
||||
* entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
|
||||
* and brw_curbe.c for the CURBEs) and decide its maximum number of
|
||||
* threads it can support based on that. in brw_*_state.c.
|
||||
*
|
||||
* XXX: Are the min_entry_size numbers useful?
|
||||
* XXX: Verify min_nr_entries, esp for VS.
|
||||
* XXX: Verify SF min_entry_size.
|
||||
*/
|
||||
static const struct {
|
||||
GLuint min_nr_entries;
|
||||
GLuint preferred_nr_entries;
|
||||
GLuint min_entry_size;
|
||||
GLuint max_entry_size;
|
||||
} limits[CS+1] = {
|
||||
{ 16, 32, 1, 5 }, /* vs */
|
||||
{ 4, 8, 1, 5 }, /* gs */
|
||||
{ 5, 10, 1, 5 }, /* clp */
|
||||
{ 1, 8, 1, 12 }, /* sf */
|
||||
{ 1, 4, 1, 32 } /* cs */
|
||||
};
|
||||
|
||||
|
||||
static bool check_urb_layout(struct brw_context *brw)
|
||||
{
|
||||
brw->urb.vs_start = 0;
|
||||
brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
|
||||
brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
|
||||
brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
|
||||
brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
|
||||
|
||||
return brw->urb.cs_start + brw->urb.nr_cs_entries *
|
||||
brw->urb.csize <= brw->urb.size;
|
||||
}
|
||||
|
||||
/* Most minimal update, forces re-emit of URB fence packet after GS
|
||||
* unit turned on/off.
|
||||
*/
|
||||
void
|
||||
brw_calculate_urb_fence(struct brw_context *brw, unsigned csize,
|
||||
unsigned vsize, unsigned sfsize)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
if (csize < limits[CS].min_entry_size)
|
||||
csize = limits[CS].min_entry_size;
|
||||
|
||||
if (vsize < limits[VS].min_entry_size)
|
||||
vsize = limits[VS].min_entry_size;
|
||||
|
||||
if (sfsize < limits[SF].min_entry_size)
|
||||
sfsize = limits[SF].min_entry_size;
|
||||
|
||||
if (brw->urb.vsize < vsize ||
|
||||
brw->urb.sfsize < sfsize ||
|
||||
brw->urb.csize < csize ||
|
||||
(brw->urb.constrained && (brw->urb.vsize > vsize ||
|
||||
brw->urb.sfsize > sfsize ||
|
||||
brw->urb.csize > csize))) {
|
||||
|
||||
|
||||
brw->urb.csize = csize;
|
||||
brw->urb.sfsize = sfsize;
|
||||
brw->urb.vsize = vsize;
|
||||
|
||||
brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
|
||||
brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;
|
||||
brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
|
||||
brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
|
||||
brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
|
||||
|
||||
brw->urb.constrained = 0;
|
||||
|
||||
if (devinfo->ver == 5) {
|
||||
brw->urb.nr_vs_entries = 128;
|
||||
brw->urb.nr_sf_entries = 48;
|
||||
if (check_urb_layout(brw)) {
|
||||
goto done;
|
||||
} else {
|
||||
brw->urb.constrained = 1;
|
||||
brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
|
||||
brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
|
||||
}
|
||||
} else if (devinfo->verx10 == 45) {
|
||||
brw->urb.nr_vs_entries = 64;
|
||||
if (check_urb_layout(brw)) {
|
||||
goto done;
|
||||
} else {
|
||||
brw->urb.constrained = 1;
|
||||
brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
|
||||
}
|
||||
}
|
||||
|
||||
if (!check_urb_layout(brw)) {
|
||||
brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
|
||||
brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
|
||||
brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
|
||||
brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
|
||||
brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
|
||||
|
||||
/* Mark us as operating with constrained nr_entries, so that next
|
||||
* time we recalculate we'll resize the fences in the hope of
|
||||
* escaping constrained mode and getting back to normal performance.
|
||||
*/
|
||||
brw->urb.constrained = 1;
|
||||
|
||||
if (!check_urb_layout(brw)) {
|
||||
/* This is impossible, given the maximal sizes of urb
|
||||
* entries and the values for minimum nr of entries
|
||||
* provided above.
|
||||
*/
|
||||
fprintf(stderr, "couldn't calculate URB layout!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_URB|DEBUG_PERF))
|
||||
fprintf(stderr, "URB CONSTRAINED\n");
|
||||
}
|
||||
|
||||
done:
|
||||
if (INTEL_DEBUG(DEBUG_URB))
|
||||
fprintf(stderr,
|
||||
"URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
|
||||
brw->urb.vs_start,
|
||||
brw->urb.gs_start,
|
||||
brw->urb.clip_start,
|
||||
brw->urb.sf_start,
|
||||
brw->urb.cs_start,
|
||||
brw->urb.size);
|
||||
|
||||
brw->ctx.NewDriverState |= BRW_NEW_URB_FENCE;
|
||||
}
|
||||
}
|
||||
|
||||
static void recalculate_urb_fence( struct brw_context *brw )
|
||||
{
|
||||
brw_calculate_urb_fence(brw, brw->curbe.total_size,
|
||||
brw_vue_prog_data(brw->vs.base.prog_data)->urb_entry_size,
|
||||
brw->sf.prog_data->urb_entry_size);
|
||||
}
|
||||
|
||||
|
||||
const struct brw_tracked_state brw_recalculate_urb_fence = {
|
||||
.dirty = {
|
||||
.mesa = 0,
|
||||
.brw = BRW_NEW_BLORP |
|
||||
BRW_NEW_PUSH_CONSTANT_ALLOCATION |
|
||||
BRW_NEW_SF_PROG_DATA |
|
||||
BRW_NEW_VS_PROG_DATA,
|
||||
},
|
||||
.emit = recalculate_urb_fence
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void brw_upload_urb_fence(struct brw_context *brw)
|
||||
{
|
||||
struct brw_urb_fence uf;
|
||||
memset(&uf, 0, sizeof(uf));
|
||||
|
||||
uf.header.opcode = CMD_URB_FENCE;
|
||||
uf.header.length = sizeof(uf)/4-2;
|
||||
uf.header.vs_realloc = 1;
|
||||
uf.header.gs_realloc = 1;
|
||||
uf.header.clp_realloc = 1;
|
||||
uf.header.sf_realloc = 1;
|
||||
uf.header.vfe_realloc = 1;
|
||||
uf.header.cs_realloc = 1;
|
||||
|
||||
/* The ordering below is correct, not the layout in the
|
||||
* instruction.
|
||||
*
|
||||
* There are 256/384 urb reg pairs in total.
|
||||
*/
|
||||
uf.bits0.vs_fence = brw->urb.gs_start;
|
||||
uf.bits0.gs_fence = brw->urb.clip_start;
|
||||
uf.bits0.clp_fence = brw->urb.sf_start;
|
||||
uf.bits1.sf_fence = brw->urb.cs_start;
|
||||
uf.bits1.cs_fence = brw->urb.size;
|
||||
|
||||
/* erratum: URB_FENCE must not cross a 64byte cacheline */
|
||||
if ((USED_BATCH(brw->batch) & 15) > 12) {
|
||||
int pad = 16 - (USED_BATCH(brw->batch) & 15);
|
||||
do
|
||||
*brw->batch.map_next++ = MI_NOOP;
|
||||
while (--pad);
|
||||
}
|
||||
|
||||
brw_batch_data(brw, &uf, sizeof(uf));
|
||||
}
|
||||
|
|
@ -1,125 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
|
||||
#include "brw_util.h"
|
||||
#include "brw_defines.h"
|
||||
#include "compiler/brw_eu_defines.h"
|
||||
|
||||
GLuint brw_translate_blend_equation( GLenum mode )
|
||||
{
|
||||
switch (mode) {
|
||||
case GL_FUNC_ADD:
|
||||
return BRW_BLENDFUNCTION_ADD;
|
||||
case GL_MIN:
|
||||
return BRW_BLENDFUNCTION_MIN;
|
||||
case GL_MAX:
|
||||
return BRW_BLENDFUNCTION_MAX;
|
||||
case GL_FUNC_SUBTRACT:
|
||||
return BRW_BLENDFUNCTION_SUBTRACT;
|
||||
case GL_FUNC_REVERSE_SUBTRACT:
|
||||
return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
}
|
||||
|
||||
GLuint brw_translate_blend_factor( GLenum factor )
|
||||
{
|
||||
switch(factor) {
|
||||
case GL_ZERO:
|
||||
return BRW_BLENDFACTOR_ZERO;
|
||||
case GL_SRC_ALPHA:
|
||||
return BRW_BLENDFACTOR_SRC_ALPHA;
|
||||
case GL_ONE:
|
||||
return BRW_BLENDFACTOR_ONE;
|
||||
case GL_SRC_COLOR:
|
||||
return BRW_BLENDFACTOR_SRC_COLOR;
|
||||
case GL_ONE_MINUS_SRC_COLOR:
|
||||
return BRW_BLENDFACTOR_INV_SRC_COLOR;
|
||||
case GL_DST_COLOR:
|
||||
return BRW_BLENDFACTOR_DST_COLOR;
|
||||
case GL_ONE_MINUS_DST_COLOR:
|
||||
return BRW_BLENDFACTOR_INV_DST_COLOR;
|
||||
case GL_ONE_MINUS_SRC_ALPHA:
|
||||
return BRW_BLENDFACTOR_INV_SRC_ALPHA;
|
||||
case GL_DST_ALPHA:
|
||||
return BRW_BLENDFACTOR_DST_ALPHA;
|
||||
case GL_ONE_MINUS_DST_ALPHA:
|
||||
return BRW_BLENDFACTOR_INV_DST_ALPHA;
|
||||
case GL_SRC_ALPHA_SATURATE:
|
||||
return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
|
||||
case GL_CONSTANT_COLOR:
|
||||
return BRW_BLENDFACTOR_CONST_COLOR;
|
||||
case GL_ONE_MINUS_CONSTANT_COLOR:
|
||||
return BRW_BLENDFACTOR_INV_CONST_COLOR;
|
||||
case GL_CONSTANT_ALPHA:
|
||||
return BRW_BLENDFACTOR_CONST_ALPHA;
|
||||
case GL_ONE_MINUS_CONSTANT_ALPHA:
|
||||
return BRW_BLENDFACTOR_INV_CONST_ALPHA;
|
||||
|
||||
case GL_SRC1_COLOR:
|
||||
return BRW_BLENDFACTOR_SRC1_COLOR;
|
||||
case GL_SRC1_ALPHA:
|
||||
return BRW_BLENDFACTOR_SRC1_ALPHA;
|
||||
case GL_ONE_MINUS_SRC1_COLOR:
|
||||
return BRW_BLENDFACTOR_INV_SRC1_COLOR;
|
||||
case GL_ONE_MINUS_SRC1_ALPHA:
|
||||
return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
|
||||
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
}
|
||||
|
||||
static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = {
|
||||
[GL_POINTS] =_3DPRIM_POINTLIST,
|
||||
[GL_LINES] = _3DPRIM_LINELIST,
|
||||
[GL_LINE_LOOP] = _3DPRIM_LINELOOP,
|
||||
[GL_LINE_STRIP] = _3DPRIM_LINESTRIP,
|
||||
[GL_TRIANGLES] = _3DPRIM_TRILIST,
|
||||
[GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
|
||||
[GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
|
||||
[GL_QUADS] = _3DPRIM_QUADLIST,
|
||||
[GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
|
||||
[GL_POLYGON] = _3DPRIM_POLYGON,
|
||||
[GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
|
||||
[GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
|
||||
[GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
|
||||
[GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
|
||||
};
|
||||
|
||||
uint32_t
|
||||
get_hw_prim_for_gl_prim(int mode)
|
||||
{
|
||||
assert(mode < ARRAY_SIZE(prim_to_hw_prim));
|
||||
return prim_to_hw_prim[mode];
|
||||
}
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
|
||||
#ifndef BRW_UTIL_H
|
||||
#define BRW_UTIL_H
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "main/framebuffer.h"
|
||||
|
||||
extern GLuint brw_translate_blend_factor( GLenum factor );
|
||||
extern GLuint brw_translate_blend_equation( GLenum mode );
|
||||
|
||||
static inline float
|
||||
brw_get_line_width(struct brw_context *brw)
|
||||
{
|
||||
/* From the OpenGL 4.4 spec:
|
||||
*
|
||||
* "The actual width of non-antialiased lines is determined by rounding
|
||||
* the supplied width to the nearest integer, then clamping it to the
|
||||
* implementation-dependent maximum non-antialiased line width."
|
||||
*/
|
||||
float line_width =
|
||||
CLAMP(!_mesa_is_multisample_enabled(&brw->ctx) && !brw->ctx.Line.SmoothFlag
|
||||
? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width,
|
||||
0.125f, brw->ctx.Const.MaxLineWidth);
|
||||
|
||||
if (!_mesa_is_multisample_enabled(&brw->ctx) && brw->ctx.Line.SmoothFlag && line_width < 1.5f) {
|
||||
/* For 1 pixel line thickness or less, the general
|
||||
* anti-aliasing algorithm gives up, and a garbage line is
|
||||
* generated. Setting a Line Width of 0.0 specifies the
|
||||
* rasterization of the "thinnest" (one-pixel-wide),
|
||||
* non-antialiased lines.
|
||||
*
|
||||
* Lines rendered with zero Line Width are rasterized using
|
||||
* Grid Intersection Quantization rules as specified by
|
||||
* bspec section 6.3.12.1 Zero-Width (Cosmetic) Line
|
||||
* Rasterization.
|
||||
*/
|
||||
line_width = 0.0f;
|
||||
}
|
||||
|
||||
return line_width;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,369 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
|
||||
#include "util/compiler.h"
|
||||
#include "main/context.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_vs.h"
|
||||
#include "brw_util.h"
|
||||
#include "brw_state.h"
|
||||
#include "program/prog_print.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "compiler/brw_nir.h"
|
||||
#include "brw_program.h"
|
||||
|
||||
#include "util/ralloc.h"
|
||||
|
||||
/**
|
||||
* Decide which set of clip planes should be used when clipping via
|
||||
* gl_Position or gl_ClipVertex.
|
||||
*/
|
||||
gl_clip_plane *
|
||||
brw_select_clip_planes(struct gl_context *ctx)
|
||||
{
|
||||
if (ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]) {
|
||||
/* There is currently a GLSL vertex shader, so clip according to GLSL
|
||||
* rules, which means compare gl_ClipVertex (or gl_Position, if
|
||||
* gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes
|
||||
* that were stored in EyeUserPlane at the time the clip planes were
|
||||
* specified.
|
||||
*/
|
||||
return ctx->Transform.EyeUserPlane;
|
||||
} else {
|
||||
/* Either we are using fixed function or an ARB vertex program. In
|
||||
* either case the clip planes are going to be compared against
|
||||
* gl_Position (which is in clip coordinates) so we have to clip using
|
||||
* _ClipUserPlane, which was transformed into clip coordinates by Mesa
|
||||
* core.
|
||||
*/
|
||||
return ctx->Transform._ClipUserPlane;
|
||||
}
|
||||
}
|
||||
|
||||
static GLbitfield64
|
||||
brw_vs_outputs_written(struct brw_context *brw, struct brw_vs_prog_key *key,
|
||||
GLbitfield64 user_varyings)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
GLbitfield64 outputs_written = user_varyings;
|
||||
|
||||
if (devinfo->ver < 6) {
|
||||
/* Put dummy slots into the VUE for the SF to put the replaced
|
||||
* point sprite coords in. We shouldn't need these dummy slots,
|
||||
* which take up precious URB space, but it would mean that the SF
|
||||
* doesn't get nice aligned pairs of input coords into output
|
||||
* coords, which would be a pain to handle.
|
||||
*/
|
||||
for (unsigned i = 0; i < 8; i++) {
|
||||
if (key->point_coord_replace & (1 << i))
|
||||
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
|
||||
}
|
||||
|
||||
/* if back colors are written, allocate slots for front colors too */
|
||||
if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
|
||||
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
|
||||
if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
|
||||
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
|
||||
}
|
||||
|
||||
/* In order for legacy clipping to work, we need to populate the clip
|
||||
* distance varying slots whenever clipping is enabled, even if the vertex
|
||||
* shader doesn't write to gl_ClipDistance.
|
||||
*/
|
||||
if (key->nr_userclip_plane_consts > 0) {
|
||||
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
|
||||
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
|
||||
}
|
||||
|
||||
return outputs_written;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_codegen_vs_prog(struct brw_context *brw,
|
||||
struct brw_program *vp,
|
||||
struct brw_vs_prog_key *key)
|
||||
{
|
||||
const struct brw_compiler *compiler = brw->screen->compiler;
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
const GLuint *program;
|
||||
struct brw_vs_prog_data prog_data;
|
||||
struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
|
||||
void *mem_ctx;
|
||||
bool start_busy = false;
|
||||
double start_time = 0;
|
||||
|
||||
memset(&prog_data, 0, sizeof(prog_data));
|
||||
|
||||
/* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
|
||||
if (vp->program.info.is_arb_asm)
|
||||
stage_prog_data->use_alt_mode = true;
|
||||
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
|
||||
nir_shader *nir = nir_shader_clone(mem_ctx, vp->program.nir);
|
||||
|
||||
brw_assign_common_binding_table_offsets(devinfo, &vp->program,
|
||||
&prog_data.base.base, 0);
|
||||
|
||||
if (!vp->program.info.is_arb_asm) {
|
||||
brw_nir_setup_glsl_uniforms(mem_ctx, nir, &vp->program,
|
||||
&prog_data.base.base,
|
||||
compiler->scalar_stage[MESA_SHADER_VERTEX]);
|
||||
if (brw->can_push_ubos) {
|
||||
brw_nir_analyze_ubo_ranges(compiler, nir, key,
|
||||
prog_data.base.base.ubo_ranges);
|
||||
}
|
||||
} else {
|
||||
brw_nir_setup_arb_uniforms(mem_ctx, nir, &vp->program,
|
||||
&prog_data.base.base);
|
||||
}
|
||||
|
||||
if (key->nr_userclip_plane_consts > 0) {
|
||||
brw_nir_lower_legacy_clipping(nir, key->nr_userclip_plane_consts,
|
||||
&prog_data.base.base);
|
||||
}
|
||||
|
||||
if (key->copy_edgeflag)
|
||||
nir_lower_passthrough_edgeflags(nir);
|
||||
|
||||
uint64_t outputs_written =
|
||||
brw_vs_outputs_written(brw, key, nir->info.outputs_written);
|
||||
|
||||
brw_compute_vue_map(devinfo,
|
||||
&prog_data.base.vue_map, outputs_written,
|
||||
nir->info.separate_shader, 1);
|
||||
|
||||
if (0) {
|
||||
_mesa_fprint_program_opt(stderr, &vp->program, PROG_PRINT_DEBUG, true);
|
||||
}
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
start_busy = (brw->batch.last_bo &&
|
||||
brw_bo_busy(brw->batch.last_bo));
|
||||
start_time = get_time();
|
||||
}
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_VS)) {
|
||||
if (vp->program.info.is_arb_asm)
|
||||
brw_dump_arb_asm("vertex", &vp->program);
|
||||
}
|
||||
|
||||
|
||||
/* Emit GFX4 code.
|
||||
*/
|
||||
struct brw_compile_vs_params params = {
|
||||
.nir = nir,
|
||||
.key = key,
|
||||
.prog_data = &prog_data,
|
||||
.log_data = brw,
|
||||
};
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
|
||||
params.shader_time = true;
|
||||
params.shader_time_index =
|
||||
brw_get_shader_time_index(brw, &vp->program, ST_VS,
|
||||
!vp->program.info.is_arb_asm);
|
||||
}
|
||||
|
||||
program = brw_compile_vs(compiler, mem_ctx, ¶ms);
|
||||
if (program == NULL) {
|
||||
if (!vp->program.info.is_arb_asm) {
|
||||
vp->program.sh.data->LinkStatus = LINKING_FAILURE;
|
||||
ralloc_strcat(&vp->program.sh.data->InfoLog, params.error_str);
|
||||
}
|
||||
|
||||
_mesa_problem(NULL, "Failed to compile vertex shader: %s\n", params.error_str);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
if (vp->compiled_once) {
|
||||
brw_debug_recompile(brw, MESA_SHADER_VERTEX, vp->program.Id,
|
||||
&key->base);
|
||||
}
|
||||
if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
|
||||
perf_debug("VS compile took %.03f ms and stalled the GPU\n",
|
||||
(get_time() - start_time) * 1000);
|
||||
}
|
||||
vp->compiled_once = true;
|
||||
}
|
||||
|
||||
/* Scratch space is used for register spilling */
|
||||
brw_alloc_stage_scratch(brw, &brw->vs.base,
|
||||
prog_data.base.base.total_scratch);
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.base.param);
|
||||
ralloc_steal(NULL, prog_data.base.base.pull_param);
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
|
||||
key, sizeof(struct brw_vs_prog_key),
|
||||
program, prog_data.base.base.program_size,
|
||||
&prog_data, sizeof(prog_data),
|
||||
&brw->vs.base.prog_offset, &brw->vs.base.prog_data);
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_vs_state_dirty(const struct brw_context *brw)
|
||||
{
|
||||
return brw_state_dirty(brw,
|
||||
_NEW_BUFFERS |
|
||||
_NEW_LIGHT |
|
||||
_NEW_POINT |
|
||||
_NEW_POLYGON |
|
||||
_NEW_TEXTURE |
|
||||
_NEW_TRANSFORM,
|
||||
BRW_NEW_VERTEX_PROGRAM |
|
||||
BRW_NEW_VS_ATTRIB_WORKAROUNDS);
|
||||
}
|
||||
|
||||
void
|
||||
brw_vs_populate_key(struct brw_context *brw,
|
||||
struct brw_vs_prog_key *key)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_VERTEX_PROGRAM */
|
||||
struct gl_program *prog = brw->programs[MESA_SHADER_VERTEX];
|
||||
struct brw_program *vp = (struct brw_program *) prog;
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
/* Just upload the program verbatim for now. Always send it all
|
||||
* the inputs it asks for, whether they are varying or not.
|
||||
*/
|
||||
|
||||
/* _NEW_TEXTURE */
|
||||
brw_populate_base_prog_key(ctx, vp, &key->base);
|
||||
|
||||
if (ctx->Transform.ClipPlanesEnabled != 0 &&
|
||||
(ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES) &&
|
||||
vp->program.info.clip_distance_array_size == 0) {
|
||||
key->nr_userclip_plane_consts =
|
||||
util_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
|
||||
}
|
||||
|
||||
if (devinfo->ver < 6) {
|
||||
/* _NEW_POLYGON */
|
||||
key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
|
||||
ctx->Polygon.BackMode != GL_FILL);
|
||||
|
||||
/* _NEW_POINT */
|
||||
if (ctx->Point.PointSprite) {
|
||||
key->point_coord_replace = ctx->Point.CoordReplace & 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
if (prog->info.outputs_written &
|
||||
(VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 |
|
||||
VARYING_BIT_BFC1)) {
|
||||
/* _NEW_LIGHT | _NEW_BUFFERS */
|
||||
key->clamp_vertex_color = ctx->Light._ClampVertexColor;
|
||||
}
|
||||
|
||||
/* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
|
||||
if (devinfo->verx10 <= 70) {
|
||||
memcpy(key->gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
|
||||
sizeof(brw->vb.attrib_wa_flags));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_upload_vs_prog(struct brw_context *brw)
|
||||
{
|
||||
struct brw_vs_prog_key key;
|
||||
/* BRW_NEW_VERTEX_PROGRAM */
|
||||
struct brw_program *vp =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_VERTEX];
|
||||
|
||||
if (!brw_vs_state_dirty(brw))
|
||||
return;
|
||||
|
||||
brw_vs_populate_key(brw, &key);
|
||||
|
||||
if (brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG, &key, sizeof(key),
|
||||
&brw->vs.base.prog_offset, &brw->vs.base.prog_data,
|
||||
true))
|
||||
return;
|
||||
|
||||
if (brw_disk_cache_upload_program(brw, MESA_SHADER_VERTEX))
|
||||
return;
|
||||
|
||||
vp = (struct brw_program *) brw->programs[MESA_SHADER_VERTEX];
|
||||
vp->id = key.base.program_string_id;
|
||||
|
||||
ASSERTED bool success = brw_codegen_vs_prog(brw, vp, &key);
|
||||
assert(success);
|
||||
}
|
||||
|
||||
void
|
||||
brw_vs_populate_default_key(const struct brw_compiler *compiler,
|
||||
struct brw_vs_prog_key *key,
|
||||
struct gl_program *prog)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
struct brw_program *bvp = brw_program(prog);
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
brw_populate_default_base_prog_key(devinfo, bvp, &key->base);
|
||||
|
||||
key->clamp_vertex_color =
|
||||
(prog->info.outputs_written &
|
||||
(VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 |
|
||||
VARYING_BIT_BFC1));
|
||||
}
|
||||
|
||||
bool
|
||||
brw_vs_precompile(struct gl_context *ctx, struct gl_program *prog)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
struct brw_vs_prog_key key;
|
||||
uint32_t old_prog_offset = brw->vs.base.prog_offset;
|
||||
struct brw_stage_prog_data *old_prog_data = brw->vs.base.prog_data;
|
||||
bool success;
|
||||
|
||||
struct brw_program *bvp = brw_program(prog);
|
||||
|
||||
brw_vs_populate_default_key(brw->screen->compiler, &key, prog);
|
||||
|
||||
success = brw_codegen_vs_prog(brw, bvp, &key);
|
||||
|
||||
brw->vs.base.prog_offset = old_prog_offset;
|
||||
brw->vs.base.prog_data = old_prog_data;
|
||||
|
||||
return success;
|
||||
}
|
||||
|
|
@ -1,57 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
|
||||
#ifndef BRW_VS_H
|
||||
#define BRW_VS_H
|
||||
|
||||
#include "brw_context.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void
|
||||
brw_upload_vs_prog(struct brw_context *brw);
|
||||
|
||||
void
|
||||
brw_vs_populate_key(struct brw_context *brw,
|
||||
struct brw_vs_prog_key *key);
|
||||
void
|
||||
brw_vs_populate_default_key(const struct brw_compiler *compiler,
|
||||
struct brw_vs_prog_key *key,
|
||||
struct gl_program *prog);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -1,119 +0,0 @@
|
|||
/*
|
||||
Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
Intel funded Tungsten Graphics to
|
||||
develop this 3D driver.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the
|
||||
next paragraph) shall be included in all copies or substantial
|
||||
portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
**********************************************************************/
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keithw@vmware.com>
|
||||
*/
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "main/shaderapi.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_buffer_objects.h"
|
||||
|
||||
|
||||
/* Creates a new VS constant buffer reflecting the current VS program's
|
||||
* constants, if needed by the VS program.
|
||||
*
|
||||
* Otherwise, constants go through the CURBEs using the brw_constant_buffer
|
||||
* state atom.
|
||||
*/
|
||||
static void
|
||||
brw_upload_vs_pull_constants(struct brw_context *brw)
|
||||
{
|
||||
struct brw_stage_state *stage_state = &brw->vs.base;
|
||||
|
||||
/* BRW_NEW_VERTEX_PROGRAM */
|
||||
struct brw_program *vp =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_VERTEX];
|
||||
|
||||
/* BRW_NEW_VS_PROG_DATA */
|
||||
const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
|
||||
|
||||
_mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_VERTEX);
|
||||
/* _NEW_PROGRAM_CONSTANTS */
|
||||
brw_upload_pull_constants(brw, BRW_NEW_VS_CONSTBUF, &vp->program,
|
||||
stage_state, prog_data);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_vs_pull_constants = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_PROGRAM_CONSTANTS,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_VERTEX_PROGRAM |
|
||||
BRW_NEW_VS_PROG_DATA,
|
||||
},
|
||||
.emit = brw_upload_vs_pull_constants,
|
||||
};
|
||||
|
||||
static void
|
||||
brw_upload_vs_ubo_surfaces(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* _NEW_PROGRAM */
|
||||
struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
|
||||
|
||||
/* BRW_NEW_VS_PROG_DATA */
|
||||
brw_upload_ubo_surfaces(brw, prog, &brw->vs.base, brw->vs.base.prog_data);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_vs_ubo_surfaces = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_PROGRAM,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_UNIFORM_BUFFER |
|
||||
BRW_NEW_VS_PROG_DATA,
|
||||
},
|
||||
.emit = brw_upload_vs_ubo_surfaces,
|
||||
};
|
||||
|
||||
static void
|
||||
brw_upload_vs_image_surfaces(struct brw_context *brw)
|
||||
{
|
||||
/* BRW_NEW_VERTEX_PROGRAM */
|
||||
const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
|
||||
|
||||
if (vp) {
|
||||
/* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
|
||||
brw_upload_image_surfaces(brw, vp, &brw->vs.base,
|
||||
brw->vs.base.prog_data);
|
||||
}
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_vs_image_surfaces = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_TEXTURE,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_AUX_STATE |
|
||||
BRW_NEW_IMAGE_UNITS |
|
||||
BRW_NEW_VERTEX_PROGRAM |
|
||||
BRW_NEW_VS_PROG_DATA,
|
||||
},
|
||||
.emit = brw_upload_vs_image_surfaces,
|
||||
};
|
||||
|
|
@ -1,639 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) Intel Corp. 2006. All Rights Reserved.
|
||||
* Intel funded Tungsten Graphics to
|
||||
* develop this 3D driver.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial
|
||||
* portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include "brw_context.h"
|
||||
#include "brw_wm.h"
|
||||
#include "brw_state.h"
|
||||
#include "main/enums.h"
|
||||
#include "main/formats.h"
|
||||
#include "main/fbobject.h"
|
||||
#include "main/samplerobj.h"
|
||||
#include "main/framebuffer.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "program/program.h"
|
||||
#include "brw_mipmap_tree.h"
|
||||
#include "brw_image.h"
|
||||
#include "brw_fbo.h"
|
||||
#include "compiler/brw_nir.h"
|
||||
#include "brw_program.h"
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
static void
|
||||
assign_fs_binding_table_offsets(const struct intel_device_info *devinfo,
|
||||
const struct gl_program *prog,
|
||||
const struct brw_wm_prog_key *key,
|
||||
struct brw_wm_prog_data *prog_data)
|
||||
{
|
||||
/* Render targets implicitly start at surface index 0. Even if there are
|
||||
* no color regions, we still perform an FB write to a null render target,
|
||||
* which will be surface 0.
|
||||
*/
|
||||
uint32_t next_binding_table_offset = MAX2(key->nr_color_regions, 1);
|
||||
|
||||
next_binding_table_offset =
|
||||
brw_assign_common_binding_table_offsets(devinfo, prog, &prog_data->base,
|
||||
next_binding_table_offset);
|
||||
|
||||
if (prog->nir->info.outputs_read && !key->coherent_fb_fetch) {
|
||||
prog_data->binding_table.render_target_read_start =
|
||||
next_binding_table_offset;
|
||||
next_binding_table_offset += key->nr_color_regions;
|
||||
}
|
||||
|
||||
/* Update the binding table size */
|
||||
prog_data->base.binding_table.size_bytes = next_binding_table_offset * 4;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_codegen_wm_prog(struct brw_context *brw,
|
||||
struct brw_program *fp,
|
||||
struct brw_wm_prog_key *key,
|
||||
struct brw_vue_map *vue_map)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
struct brw_wm_prog_data prog_data;
|
||||
const GLuint *program;
|
||||
bool start_busy = false;
|
||||
double start_time = 0;
|
||||
|
||||
nir_shader *nir = nir_shader_clone(mem_ctx, fp->program.nir);
|
||||
|
||||
memset(&prog_data, 0, sizeof(prog_data));
|
||||
|
||||
/* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
|
||||
if (fp->program.info.is_arb_asm)
|
||||
prog_data.base.use_alt_mode = true;
|
||||
|
||||
assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data);
|
||||
|
||||
if (!fp->program.info.is_arb_asm) {
|
||||
brw_nir_setup_glsl_uniforms(mem_ctx, nir, &fp->program,
|
||||
&prog_data.base, true);
|
||||
if (brw->can_push_ubos) {
|
||||
brw_nir_analyze_ubo_ranges(brw->screen->compiler, nir,
|
||||
NULL, prog_data.base.ubo_ranges);
|
||||
}
|
||||
} else {
|
||||
brw_nir_setup_arb_uniforms(mem_ctx, nir, &fp->program, &prog_data.base);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_WM))
|
||||
brw_dump_arb_asm("fragment", &fp->program);
|
||||
}
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
start_busy = (brw->batch.last_bo &&
|
||||
brw_bo_busy(brw->batch.last_bo));
|
||||
start_time = get_time();
|
||||
}
|
||||
|
||||
struct brw_compile_fs_params params = {
|
||||
.nir = nir,
|
||||
.key = key,
|
||||
.prog_data = &prog_data,
|
||||
|
||||
.allow_spilling = true,
|
||||
.vue_map = vue_map,
|
||||
|
||||
.log_data = brw,
|
||||
};
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
|
||||
params.shader_time = true;
|
||||
params.shader_time_index8 =
|
||||
brw_get_shader_time_index(brw, &fp->program, ST_FS8,
|
||||
!fp->program.info.is_arb_asm);
|
||||
params.shader_time_index16 =
|
||||
brw_get_shader_time_index(brw, &fp->program, ST_FS16,
|
||||
!fp->program.info.is_arb_asm);
|
||||
params.shader_time_index32 =
|
||||
brw_get_shader_time_index(brw, &fp->program, ST_FS32,
|
||||
!fp->program.info.is_arb_asm);
|
||||
}
|
||||
|
||||
program = brw_compile_fs(brw->screen->compiler, mem_ctx, ¶ms);
|
||||
|
||||
if (program == NULL) {
|
||||
if (!fp->program.info.is_arb_asm) {
|
||||
fp->program.sh.data->LinkStatus = LINKING_FAILURE;
|
||||
ralloc_strcat(&fp->program.sh.data->InfoLog, params.error_str);
|
||||
}
|
||||
|
||||
_mesa_problem(NULL, "Failed to compile fragment shader: %s\n", params.error_str);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (unlikely(brw->perf_debug)) {
|
||||
if (fp->compiled_once) {
|
||||
brw_debug_recompile(brw, MESA_SHADER_FRAGMENT, fp->program.Id,
|
||||
&key->base);
|
||||
}
|
||||
fp->compiled_once = true;
|
||||
|
||||
if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
|
||||
perf_debug("FS compile took %.03f ms and stalled the GPU\n",
|
||||
(get_time() - start_time) * 1000);
|
||||
}
|
||||
}
|
||||
|
||||
brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_WM) && fp->program.info.is_arb_asm)
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
/* The param and pull_param arrays will be freed by the shader cache. */
|
||||
ralloc_steal(NULL, prog_data.base.param);
|
||||
ralloc_steal(NULL, prog_data.base.pull_param);
|
||||
brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG,
|
||||
key, sizeof(struct brw_wm_prog_key),
|
||||
program, prog_data.base.program_size,
|
||||
&prog_data, sizeof(prog_data),
|
||||
&brw->wm.base.prog_offset, &brw->wm.base.prog_data);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
gfx6_gather_workaround(GLenum internalformat)
|
||||
{
|
||||
switch (internalformat) {
|
||||
case GL_R8I: return WA_SIGN | WA_8BIT;
|
||||
case GL_R8UI: return WA_8BIT;
|
||||
case GL_R16I: return WA_SIGN | WA_16BIT;
|
||||
case GL_R16UI: return WA_16BIT;
|
||||
default:
|
||||
/* Note that even though GL_R32I and GL_R32UI have format overrides in
|
||||
* the surface state, there is no shader w/a required.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
brw_populate_sampler_prog_key_data(struct gl_context *ctx,
|
||||
const struct gl_program *prog,
|
||||
struct brw_sampler_prog_key_data *key)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
GLbitfield mask = prog->SamplersUsed;
|
||||
|
||||
while (mask) {
|
||||
const int s = u_bit_scan(&mask);
|
||||
|
||||
key->swizzles[s] = SWIZZLE_NOOP;
|
||||
key->scale_factors[s] = 0.0f;
|
||||
|
||||
int unit_id = prog->SamplerUnits[s];
|
||||
const struct gl_texture_unit *unit = &ctx->Texture.Unit[unit_id];
|
||||
|
||||
if (unit->_Current && unit->_Current->Target != GL_TEXTURE_BUFFER) {
|
||||
const struct gl_texture_object *t = unit->_Current;
|
||||
const struct gl_texture_image *img = t->Image[0][t->Attrib.BaseLevel];
|
||||
struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit_id);
|
||||
|
||||
const bool alpha_depth = t->Attrib.DepthMode == GL_ALPHA &&
|
||||
(img->_BaseFormat == GL_DEPTH_COMPONENT ||
|
||||
img->_BaseFormat == GL_DEPTH_STENCIL);
|
||||
|
||||
/* Haswell handles texture swizzling as surface format overrides
|
||||
* (except for GL_ALPHA); all other platforms need MOVs in the shader.
|
||||
*/
|
||||
if (alpha_depth || (devinfo->verx10 <= 70))
|
||||
key->swizzles[s] = brw_get_texture_swizzle(ctx, t);
|
||||
|
||||
if (devinfo->ver < 8 &&
|
||||
sampler->Attrib.MinFilter != GL_NEAREST &&
|
||||
sampler->Attrib.MagFilter != GL_NEAREST) {
|
||||
if (sampler->Attrib.WrapS == GL_CLAMP)
|
||||
key->gl_clamp_mask[0] |= 1 << s;
|
||||
if (sampler->Attrib.WrapT == GL_CLAMP)
|
||||
key->gl_clamp_mask[1] |= 1 << s;
|
||||
if (sampler->Attrib.WrapR == GL_CLAMP)
|
||||
key->gl_clamp_mask[2] |= 1 << s;
|
||||
}
|
||||
|
||||
/* gather4 for RG32* is broken in multiple ways on Gfx7. */
|
||||
if (devinfo->ver == 7 && prog->info.uses_texture_gather) {
|
||||
switch (img->InternalFormat) {
|
||||
case GL_RG32I:
|
||||
case GL_RG32UI: {
|
||||
/* We have to override the format to R32G32_FLOAT_LD.
|
||||
* This means that SCS_ALPHA and SCS_ONE will return 0x3f8
|
||||
* (1.0) rather than integer 1. This needs shader hacks.
|
||||
*
|
||||
* On Ivybridge, we whack W (alpha) to ONE in our key's
|
||||
* swizzle. On Haswell, we look at the original texture
|
||||
* swizzle, and use XYZW with channels overridden to ONE,
|
||||
* leaving normal texture swizzling to SCS.
|
||||
*/
|
||||
unsigned src_swizzle =
|
||||
devinfo->platform == INTEL_PLATFORM_HSW ?
|
||||
t->Attrib._Swizzle : key->swizzles[s];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
unsigned src_comp = GET_SWZ(src_swizzle, i);
|
||||
if (src_comp == SWIZZLE_ONE || src_comp == SWIZZLE_W) {
|
||||
key->swizzles[i] &= ~(0x7 << (3 * i));
|
||||
key->swizzles[i] |= SWIZZLE_ONE << (3 * i);
|
||||
}
|
||||
}
|
||||
}
|
||||
FALLTHROUGH;
|
||||
case GL_RG32F:
|
||||
/* The channel select for green doesn't work - we have to
|
||||
* request blue. Haswell can use SCS for this, but Ivybridge
|
||||
* needs a shader workaround.
|
||||
*/
|
||||
if (devinfo->platform != INTEL_PLATFORM_HSW)
|
||||
key->gather_channel_quirk_mask |= 1 << s;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Gfx6's gather4 is broken for UINT/SINT; we treat them as
|
||||
* UNORM/FLOAT instead and fix it in the shader.
|
||||
*/
|
||||
if (devinfo->ver == 6 && prog->info.uses_texture_gather) {
|
||||
key->gfx6_gather_wa[s] = gfx6_gather_workaround(img->InternalFormat);
|
||||
}
|
||||
|
||||
/* If this is a multisample sampler, and uses the CMS MSAA layout,
|
||||
* then we need to emit slightly different code to first sample the
|
||||
* MCS surface.
|
||||
*/
|
||||
struct brw_texture_object *intel_tex =
|
||||
brw_texture_object((struct gl_texture_object *)t);
|
||||
|
||||
/* From gfx9 onwards some single sampled buffers can also be
|
||||
* compressed. These don't need ld2dms sampling along with mcs fetch.
|
||||
*/
|
||||
if (intel_tex->mt->aux_usage == ISL_AUX_USAGE_MCS) {
|
||||
assert(devinfo->ver >= 7);
|
||||
assert(intel_tex->mt->surf.samples > 1);
|
||||
assert(intel_tex->mt->aux_buf);
|
||||
assert(intel_tex->mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
|
||||
key->compressed_multisample_layout_mask |= 1 << s;
|
||||
|
||||
if (intel_tex->mt->surf.samples >= 16) {
|
||||
assert(devinfo->ver >= 9);
|
||||
key->msaa_16 |= 1 << s;
|
||||
}
|
||||
}
|
||||
|
||||
if (t->Target == GL_TEXTURE_EXTERNAL_OES && intel_tex->planar_format) {
|
||||
|
||||
/* Setup possible scaling factor. */
|
||||
key->scale_factors[s] = intel_tex->planar_format->scaling_factor;
|
||||
|
||||
switch (intel_tex->planar_format->components) {
|
||||
case __DRI_IMAGE_COMPONENTS_Y_UV:
|
||||
key->y_uv_image_mask |= 1 << s;
|
||||
break;
|
||||
case __DRI_IMAGE_COMPONENTS_Y_U_V:
|
||||
key->y_u_v_image_mask |= 1 << s;
|
||||
break;
|
||||
case __DRI_IMAGE_COMPONENTS_Y_XUXV:
|
||||
key->yx_xuxv_image_mask |= 1 << s;
|
||||
break;
|
||||
case __DRI_IMAGE_COMPONENTS_Y_UXVX:
|
||||
key->xy_uxvx_image_mask |= 1 << s;
|
||||
break;
|
||||
case __DRI_IMAGE_COMPONENTS_AYUV:
|
||||
key->ayuv_image_mask |= 1 << s;
|
||||
break;
|
||||
case __DRI_IMAGE_COMPONENTS_XYUV:
|
||||
key->xyuv_image_mask |= 1 << s;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (intel_tex->yuv_color_space) {
|
||||
case __DRI_YUV_COLOR_SPACE_ITU_REC709:
|
||||
key->bt709_mask |= 1 << s;
|
||||
break;
|
||||
case __DRI_YUV_COLOR_SPACE_ITU_REC2020:
|
||||
key->bt2020_mask |= 1 << s;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
brw_populate_base_prog_key(struct gl_context *ctx,
|
||||
const struct brw_program *prog,
|
||||
struct brw_base_prog_key *key)
|
||||
{
|
||||
key->program_string_id = prog->id;
|
||||
key->subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
|
||||
brw_populate_sampler_prog_key_data(ctx, &prog->program, &key->tex);
|
||||
}
|
||||
|
||||
void
|
||||
brw_populate_default_base_prog_key(const struct intel_device_info *devinfo,
|
||||
const struct brw_program *prog,
|
||||
struct brw_base_prog_key *key)
|
||||
{
|
||||
key->program_string_id = prog->id;
|
||||
key->subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
|
||||
brw_setup_tex_for_precompile(devinfo, &key->tex, &prog->program);
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_wm_state_dirty(const struct brw_context *brw)
|
||||
{
|
||||
return brw_state_dirty(brw,
|
||||
_NEW_BUFFERS |
|
||||
_NEW_COLOR |
|
||||
_NEW_DEPTH |
|
||||
_NEW_FRAG_CLAMP |
|
||||
_NEW_HINT |
|
||||
_NEW_LIGHT |
|
||||
_NEW_LINE |
|
||||
_NEW_MULTISAMPLE |
|
||||
_NEW_POLYGON |
|
||||
_NEW_STENCIL |
|
||||
_NEW_TEXTURE,
|
||||
BRW_NEW_FRAGMENT_PROGRAM |
|
||||
BRW_NEW_REDUCED_PRIMITIVE |
|
||||
BRW_NEW_STATS_WM |
|
||||
BRW_NEW_VUE_MAP_GEOM_OUT);
|
||||
}
|
||||
|
||||
void
|
||||
brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_FRAGMENT_PROGRAM */
|
||||
const struct gl_program *prog = brw->programs[MESA_SHADER_FRAGMENT];
|
||||
const struct brw_program *fp = brw_program_const(prog);
|
||||
GLuint lookup = 0;
|
||||
GLuint line_aa;
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
/* Build the index for table lookup
|
||||
*/
|
||||
if (devinfo->ver < 6) {
|
||||
struct brw_renderbuffer *depth_irb =
|
||||
brw_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
|
||||
|
||||
/* _NEW_COLOR */
|
||||
if (prog->info.fs.uses_discard || ctx->Color.AlphaEnabled) {
|
||||
lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT;
|
||||
}
|
||||
|
||||
if (prog->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
|
||||
lookup |= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT;
|
||||
}
|
||||
|
||||
/* _NEW_DEPTH */
|
||||
if (depth_irb && ctx->Depth.Test) {
|
||||
lookup |= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT;
|
||||
|
||||
if (brw_depth_writes_enabled(brw))
|
||||
lookup |= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT;
|
||||
}
|
||||
|
||||
/* _NEW_STENCIL | _NEW_BUFFERS */
|
||||
if (brw->stencil_enabled) {
|
||||
lookup |= BRW_WM_IZ_STENCIL_TEST_ENABLE_BIT;
|
||||
|
||||
if (ctx->Stencil.WriteMask[0] ||
|
||||
ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
|
||||
lookup |= BRW_WM_IZ_STENCIL_WRITE_ENABLE_BIT;
|
||||
}
|
||||
key->iz_lookup = lookup;
|
||||
}
|
||||
|
||||
line_aa = BRW_WM_AA_NEVER;
|
||||
|
||||
/* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
|
||||
if (ctx->Line.SmoothFlag) {
|
||||
if (brw->reduced_primitive == GL_LINES) {
|
||||
line_aa = BRW_WM_AA_ALWAYS;
|
||||
}
|
||||
else if (brw->reduced_primitive == GL_TRIANGLES) {
|
||||
if (ctx->Polygon.FrontMode == GL_LINE) {
|
||||
line_aa = BRW_WM_AA_SOMETIMES;
|
||||
|
||||
if (ctx->Polygon.BackMode == GL_LINE ||
|
||||
(ctx->Polygon.CullFlag &&
|
||||
ctx->Polygon.CullFaceMode == GL_BACK))
|
||||
line_aa = BRW_WM_AA_ALWAYS;
|
||||
}
|
||||
else if (ctx->Polygon.BackMode == GL_LINE) {
|
||||
line_aa = BRW_WM_AA_SOMETIMES;
|
||||
|
||||
if ((ctx->Polygon.CullFlag &&
|
||||
ctx->Polygon.CullFaceMode == GL_FRONT))
|
||||
line_aa = BRW_WM_AA_ALWAYS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
key->line_aa = line_aa;
|
||||
|
||||
/* _NEW_HINT */
|
||||
key->high_quality_derivatives =
|
||||
prog->info.uses_fddx_fddy &&
|
||||
ctx->Hint.FragmentShaderDerivative == GL_NICEST;
|
||||
|
||||
if (devinfo->ver < 6)
|
||||
key->stats_wm = brw->stats_wm;
|
||||
|
||||
/* _NEW_LIGHT */
|
||||
key->flat_shade =
|
||||
(prog->info.inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1)) &&
|
||||
(ctx->Light.ShadeModel == GL_FLAT);
|
||||
|
||||
/* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
|
||||
key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
|
||||
|
||||
/* _NEW_TEXTURE */
|
||||
brw_populate_base_prog_key(ctx, fp, &key->base);
|
||||
|
||||
/* _NEW_BUFFERS */
|
||||
key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers;
|
||||
|
||||
/* _NEW_COLOR */
|
||||
key->force_dual_color_blend = brw->dual_color_blend_by_location &&
|
||||
(ctx->Color.BlendEnabled & 1) && ctx->Color._BlendUsesDualSrc & 0x1;
|
||||
|
||||
/* _NEW_MULTISAMPLE, _NEW_BUFFERS */
|
||||
key->alpha_to_coverage = _mesa_is_alpha_to_coverage_enabled(ctx);
|
||||
|
||||
/* _NEW_COLOR, _NEW_BUFFERS */
|
||||
key->alpha_test_replicate_alpha =
|
||||
ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
|
||||
_mesa_is_alpha_test_enabled(ctx);
|
||||
|
||||
/* _NEW_BUFFERS _NEW_MULTISAMPLE */
|
||||
/* Ignore sample qualifier while computing this flag. */
|
||||
if (ctx->Multisample.Enabled) {
|
||||
key->persample_interp =
|
||||
ctx->Multisample.SampleShading &&
|
||||
(ctx->Multisample.MinSampleShadingValue *
|
||||
_mesa_geometric_samples(ctx->DrawBuffer) > 1);
|
||||
|
||||
key->multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
|
||||
}
|
||||
|
||||
key->ignore_sample_mask_out = !key->multisample_fbo;
|
||||
|
||||
/* BRW_NEW_VUE_MAP_GEOM_OUT */
|
||||
if (devinfo->ver < 6 || util_bitcount64(prog->info.inputs_read &
|
||||
BRW_FS_VARYING_INPUT_MASK) > 16) {
|
||||
key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
|
||||
}
|
||||
|
||||
/* _NEW_COLOR | _NEW_BUFFERS */
|
||||
/* Pre-gfx6, the hardware alpha test always used each render
|
||||
* target's alpha to do alpha test, as opposed to render target 0's alpha
|
||||
* like GL requires. Fix that by building the alpha test into the
|
||||
* shader, and we'll skip enabling the fixed function alpha test.
|
||||
*/
|
||||
if (devinfo->ver < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
|
||||
ctx->Color.AlphaEnabled) {
|
||||
key->alpha_test_func = ctx->Color.AlphaFunc;
|
||||
key->alpha_test_ref = ctx->Color.AlphaRef;
|
||||
}
|
||||
|
||||
/* Whether reads from the framebuffer should behave coherently. */
|
||||
key->coherent_fb_fetch = ctx->Extensions.EXT_shader_framebuffer_fetch;
|
||||
}
|
||||
|
||||
void
|
||||
brw_upload_wm_prog(struct brw_context *brw)
|
||||
{
|
||||
struct brw_wm_prog_key key;
|
||||
struct brw_program *fp =
|
||||
(struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
|
||||
|
||||
if (!brw_wm_state_dirty(brw))
|
||||
return;
|
||||
|
||||
brw_wm_populate_key(brw, &key);
|
||||
|
||||
if (brw_search_cache(&brw->cache, BRW_CACHE_FS_PROG, &key, sizeof(key),
|
||||
&brw->wm.base.prog_offset, &brw->wm.base.prog_data,
|
||||
true))
|
||||
return;
|
||||
|
||||
if (brw_disk_cache_upload_program(brw, MESA_SHADER_FRAGMENT))
|
||||
return;
|
||||
|
||||
fp = (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
|
||||
fp->id = key.base.program_string_id;
|
||||
|
||||
ASSERTED bool success = brw_codegen_wm_prog(brw, fp, &key,
|
||||
&brw->vue_map_geom_out);
|
||||
assert(success);
|
||||
}
|
||||
|
||||
void
|
||||
brw_wm_populate_default_key(const struct brw_compiler *compiler,
|
||||
struct brw_wm_prog_key *key,
|
||||
struct gl_program *prog)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
brw_populate_default_base_prog_key(devinfo, brw_program(prog),
|
||||
&key->base);
|
||||
|
||||
uint64_t outputs_written = prog->info.outputs_written;
|
||||
|
||||
if (devinfo->ver < 6) {
|
||||
if (prog->info.fs.uses_discard)
|
||||
key->iz_lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT;
|
||||
|
||||
if (outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
|
||||
key->iz_lookup |= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT;
|
||||
|
||||
/* Just assume depth testing. */
|
||||
key->iz_lookup |= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT;
|
||||
key->iz_lookup |= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT;
|
||||
}
|
||||
|
||||
if (devinfo->ver < 6 || util_bitcount64(prog->info.inputs_read &
|
||||
BRW_FS_VARYING_INPUT_MASK) > 16) {
|
||||
key->input_slots_valid = prog->info.inputs_read | VARYING_BIT_POS;
|
||||
}
|
||||
|
||||
key->nr_color_regions = util_bitcount64(outputs_written &
|
||||
~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
|
||||
BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
|
||||
BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
|
||||
|
||||
/* Whether reads from the framebuffer should behave coherently. */
|
||||
key->coherent_fb_fetch = devinfo->ver >= 9;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
struct brw_wm_prog_key key;
|
||||
|
||||
struct brw_program *bfp = brw_program(prog);
|
||||
|
||||
brw_wm_populate_default_key(brw->screen->compiler, &key, prog);
|
||||
|
||||
/* check brw_wm_populate_default_key coherent_fb_fetch setting */
|
||||
assert(key.coherent_fb_fetch ==
|
||||
ctx->Extensions.EXT_shader_framebuffer_fetch);
|
||||
|
||||
uint32_t old_prog_offset = brw->wm.base.prog_offset;
|
||||
struct brw_stage_prog_data *old_prog_data = brw->wm.base.prog_data;
|
||||
|
||||
struct brw_vue_map vue_map;
|
||||
if (devinfo->ver < 6) {
|
||||
brw_compute_vue_map(&brw->screen->devinfo, &vue_map,
|
||||
prog->info.inputs_read | VARYING_BIT_POS,
|
||||
false, 1);
|
||||
}
|
||||
|
||||
bool success = brw_codegen_wm_prog(brw, bfp, &key, &vue_map);
|
||||
|
||||
brw->wm.base.prog_offset = old_prog_offset;
|
||||
brw->wm.base.prog_data = old_prog_data;
|
||||
|
||||
return success;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue