classic/i965: Remove driver

Reviewed-by: Emma Anholt <emma@anholt.net>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Adam Jackson <ajax@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10153>
This commit is contained in:
Dylan Baker 2021-04-12 11:26:45 -07:00 committed by Marge Bot
parent 0cad451f00
commit cdde031ac2
124 changed files with 8 additions and 55418 deletions

View file

@ -825,7 +825,6 @@ fedora-release:
-Wno-error=uninitialized
CPP_ARGS: >
-Wno-error=array-bounds
DRI_DRIVERS: "i965"
DRI_LOADERS: >
-D glx=dri
-D gbm=enabled
@ -1039,7 +1038,6 @@ debian-clang:
-Wno-error=unused-variable
DRI_LOADERS: >
-D glvnd=true
DRI_DRIVERS: "auto"
GALLIUM_DRIVERS: "iris,nouveau,kmsro,r300,r600,freedreno,swr,swrast,svga,v3d,vc4,virgl,etnaviv,panfrost,lima,zink,radeonsi,tegra,d3d12,crocus"
VULKAN_DRIVERS: intel,amd,freedreno,broadcom,virtio-experimental
CC: clang
@ -1118,7 +1116,6 @@ debian-i386:
CROSS: i386
VULKAN_DRIVERS: intel,amd,swrast,virtio-experimental
GALLIUM_DRIVERS: "iris,nouveau,r300,r600,radeonsi,swrast,virgl,zink,crocus"
DRI_DRIVERS: "i965"
EXTRA_OPTION: >
-D vulkan-layers=device-select,overlay
@ -1155,7 +1152,6 @@ debian-mingw32-x86_64:
-Wno-error=format
-Wno-error=format-extra-args
CPP_ARGS: $C_ARGS
DRI_DRIVERS: ""
GALLIUM_DRIVERS: "swrast"
EXTRA_OPTION: >
-Dllvm=disabled

View file

@ -68,7 +68,6 @@ meson _build --native-file=native.file \
-D cpp_args="$(echo -n $CPP_ARGS)" \
-D libunwind=${UNWIND} \
${DRI_LOADERS} \
-D dri-drivers=${DRI_DRIVERS:-[]} \
${GALLIUM_ST} \
-D gallium-drivers=${GALLIUM_DRIVERS:-[]} \
-D vulkan-drivers=${VULKAN_DRIVERS:-[]} \

View file

@ -1,260 +0,0 @@
#ifndef PREFER_CROCUS
CHIPSET(0x29A2, i965, "BW", "Intel(R) 965G")
CHIPSET(0x2992, i965, "BW", "Intel(R) 965Q")
CHIPSET(0x2982, i965, "BW", "Intel(R) 965G")
CHIPSET(0x2972, i965, "BW", "Intel(R) 946GZ")
CHIPSET(0x2A02, i965, "CL", "Intel(R) 965GM")
CHIPSET(0x2A12, i965, "CL", "Intel(R) 965GME/GLE")
CHIPSET(0x2A42, g4x, "CTG", "Mobile Intel® GM45 Express Chipset")
CHIPSET(0x2E02, g4x, "ELK", "Intel(R) Integrated Graphics Device")
CHIPSET(0x2E12, g4x, "ELK", "Intel(R) Q45/Q43")
CHIPSET(0x2E22, g4x, "ELK", "Intel(R) G45/G43")
CHIPSET(0x2E32, g4x, "ELK", "Intel(R) G41")
CHIPSET(0x2E42, g4x, "ELK", "Intel(R) B43")
CHIPSET(0x2E92, g4x, "ELK", "Intel(R) B43")
CHIPSET(0x0042, ilk, "ILK", "Intel(R) HD Graphics")
CHIPSET(0x0046, ilk, "ILK", "Intel(R) HD Graphics")
CHIPSET(0x0102, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000")
CHIPSET(0x0112, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
CHIPSET(0x0122, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
CHIPSET(0x0106, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000")
CHIPSET(0x0116, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
CHIPSET(0x0126, snb_gt2, "SNB GT2", "Intel(R) HD Graphics 3000")
CHIPSET(0x010A, snb_gt1, "SNB GT1", "Intel(R) HD Graphics 2000")
CHIPSET(0x0152, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics 2500")
CHIPSET(0x0162, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics 4000")
CHIPSET(0x0156, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics 2500")
CHIPSET(0x0166, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics 4000")
CHIPSET(0x015a, ivb_gt1, "IVB GT1", "Intel(R) HD Graphics")
CHIPSET(0x016a, ivb_gt2, "IVB GT2", "Intel(R) HD Graphics P4000")
CHIPSET(0x0402, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0412, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600")
CHIPSET(0x0422, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x0406, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0416, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600")
CHIPSET(0x0426, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x040A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x041A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics P4600/P4700")
CHIPSET(0x042A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x040B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x041B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
CHIPSET(0x042B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x040E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x041E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4400")
CHIPSET(0x042E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x0C02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0C12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
CHIPSET(0x0C22, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x0C06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0C16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
CHIPSET(0x0C26, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x0C0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0C1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
CHIPSET(0x0C2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x0C0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0C1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
CHIPSET(0x0C2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x0C0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0C1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
CHIPSET(0x0C2E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x0A02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0A12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
CHIPSET(0x0A22, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x0A06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0A16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4400")
CHIPSET(0x0A26, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics 5000")
CHIPSET(0x0A0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0A1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
CHIPSET(0x0A2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x0A0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0A1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
CHIPSET(0x0A2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x0A0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0A1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4200")
CHIPSET(0x0A2E, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Graphics 5100")
CHIPSET(0x0D02, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0D12, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics 4600")
CHIPSET(0x0D22, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Pro Graphics 5200")
CHIPSET(0x0D06, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0D16, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
CHIPSET(0x0D26, hsw_gt3, "HSW GT3", "Intel(R) Iris(R) Pro Graphics P5200")
CHIPSET(0x0D0A, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0D1A, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
CHIPSET(0x0D2A, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x0D0B, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0D1B, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
CHIPSET(0x0D2B, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x0D0E, hsw_gt1, "HSW GT1", "Intel(R) HD Graphics")
CHIPSET(0x0D1E, hsw_gt2, "HSW GT2", "Intel(R) HD Graphics")
CHIPSET(0x0D2E, hsw_gt3, "HSW GT3", "Intel(R) HD Graphics")
CHIPSET(0x0F31, byt, "BYT", "Intel(R) HD Graphics")
CHIPSET(0x0F32, byt, "BYT", "Intel(R) HD Graphics")
CHIPSET(0x0F33, byt, "BYT", "Intel(R) HD Graphics")
CHIPSET(0x0157, byt, "BYT", "Intel(R) HD Graphics")
CHIPSET(0x0155, byt, "BYT", "Intel(R) HD Graphics")
CHIPSET(0x22B0, chv, "CHV", "Intel(R) HD Graphics")
CHIPSET(0x22B1, chv, "BSW", "Intel(R) HD Graphics XXX") /* Overridden in brw_get_renderer_string */
CHIPSET(0x22B2, chv, "CHV", "Intel(R) HD Graphics")
CHIPSET(0x22B3, chv, "CHV", "Intel(R) HD Graphics")
#endif
#ifndef PREFER_IRIS
CHIPSET(0x1602, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
CHIPSET(0x1606, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
CHIPSET(0x160A, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
CHIPSET(0x160B, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
CHIPSET(0x160D, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
CHIPSET(0x160E, bdw_gt1, "BDW GT1", "Intel(R) HD Graphics")
CHIPSET(0x1612, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5600")
CHIPSET(0x1616, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5500")
CHIPSET(0x161A, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics P5700")
CHIPSET(0x161B, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics")
CHIPSET(0x161D, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics")
CHIPSET(0x161E, bdw_gt2, "BDW GT2", "Intel(R) HD Graphics 5300")
CHIPSET(0x1622, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Pro Graphics 6200")
CHIPSET(0x1626, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics 6000")
CHIPSET(0x162A, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Pro Graphics P6300")
CHIPSET(0x162B, bdw_gt3, "BDW GT3", "Intel(R) Iris(R) Graphics 6100")
CHIPSET(0x162D, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics")
CHIPSET(0x162E, bdw_gt3, "BDW GT3", "Intel(R) HD Graphics")
CHIPSET(0x1902, skl_gt1, "SKL GT1", "Intel(R) HD Graphics 510")
CHIPSET(0x1906, skl_gt1, "SKL GT1", "Intel(R) HD Graphics 510")
CHIPSET(0x190A, skl_gt1, "SKL GT1", "Intel(R) HD Graphics")
CHIPSET(0x190B, skl_gt1, "SKL GT1", "Intel(R) HD Graphics 510")
CHIPSET(0x190E, skl_gt1, "SKL GT1", "Intel(R) HD Graphics")
CHIPSET(0x1912, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 530")
CHIPSET(0x1913, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics")
CHIPSET(0x1915, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics")
CHIPSET(0x1916, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 520")
CHIPSET(0x1917, skl_gt2, "SKL GT2F", "Intel(R) HD Graphics")
CHIPSET(0x191A, skl_gt2, "SKL GT2", "Intel(R) HD Graphics")
CHIPSET(0x191B, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 530")
CHIPSET(0x191D, skl_gt2, "SKL GT2", "Intel(R) HD Graphics P530")
CHIPSET(0x191E, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 515")
CHIPSET(0x1921, skl_gt2, "SKL GT2", "Intel(R) HD Graphics 520")
CHIPSET(0x1923, skl_gt3, "SKL GT3", "Intel(R) HD Graphics 535")
CHIPSET(0x1926, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics 540")
CHIPSET(0x1927, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics 550")
CHIPSET(0x192A, skl_gt4, "SKL GT4", "Intel(R) HD Graphics")
CHIPSET(0x192B, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics 555")
CHIPSET(0x192D, skl_gt3, "SKL GT3", "Intel(R) Iris(R) Graphics P555")
CHIPSET(0x1932, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics 580")
CHIPSET(0x193A, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics P580")
CHIPSET(0x193B, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics 580")
CHIPSET(0x193D, skl_gt4, "SKL GT4", "Intel(R) Iris(R) Pro Graphics P580")
CHIPSET(0x0A84, bxt, "BXT 3", "Intel(R) HD Graphics")
CHIPSET(0x1A84, bxt, "BXT 3", "Intel(R) HD Graphics")
CHIPSET(0x1A85, bxt_2x6, "BXT 2", "Intel(R) HD Graphics")
CHIPSET(0x5A84, bxt, "APL 3", "Intel(R) HD Graphics 505")
CHIPSET(0x5A85, bxt_2x6, "APL 2", "Intel(R) HD Graphics 500")
CHIPSET(0x3184, glk, "GLK 3", "Intel(R) UHD Graphics 605")
CHIPSET(0x3185, glk_2x6, "GLK 2", "Intel(R) UHD Graphics 600")
CHIPSET(0x5902, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics 610")
CHIPSET(0x5906, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics 610")
CHIPSET(0x590A, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics")
CHIPSET(0x5908, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics")
CHIPSET(0x590B, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics 610")
CHIPSET(0x590E, kbl_gt1, "KBL GT1", "Intel(R) HD Graphics")
CHIPSET(0x5913, kbl_gt1_5, "KBL GT1.5", "Intel(R) HD Graphics")
CHIPSET(0x5915, kbl_gt1_5, "KBL GT1.5", "Intel(R) HD Graphics")
CHIPSET(0x5917, kbl_gt2, "KBL GT2", "Intel(R) UHD Graphics 620")
CHIPSET(0x5912, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 630")
CHIPSET(0x5916, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 620")
CHIPSET(0x591A, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics P630")
CHIPSET(0x591B, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 630")
CHIPSET(0x591D, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics P630")
CHIPSET(0x591E, kbl_gt2, "KBL GT2", "Intel(R) HD Graphics 615")
CHIPSET(0x5921, kbl_gt2, "KBL GT2F", "Intel(R) HD Graphics 620")
CHIPSET(0x5923, kbl_gt3, "KBL GT3", "Intel(R) HD Graphics 635")
CHIPSET(0x5926, kbl_gt3, "KBL GT3", "Intel(R) Iris(R) Plus Graphics 640 (Kaby Lake GT3e)")
CHIPSET(0x5927, kbl_gt3, "KBL GT3", "Intel(R) Iris(R) Plus Graphics 650 (Kaby Lake GT3e)")
CHIPSET(0x593B, kbl_gt4, "KBL GT4", "Intel(R) HD Graphics")
CHIPSET(0x591C, kbl_gt2, "AML-KBL", "Intel(R) UHD Graphics 615")
CHIPSET(0x87C0, kbl_gt2, "AML-KBL", "Intel(R) UHD Graphics 617")
CHIPSET(0x87CA, cfl_gt2, "AML-CFL", "Intel(R) UHD Graphics")
CHIPSET(0x3E90, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
CHIPSET(0x3E93, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
CHIPSET(0x3E99, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
CHIPSET(0x3E9C, cfl_gt1, "CFL GT1", "Intel(R) UHD Graphics 610")
CHIPSET(0x3E91, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
CHIPSET(0x3E92, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
CHIPSET(0x3E96, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630")
CHIPSET(0x3E98, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
CHIPSET(0x3E9A, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630")
CHIPSET(0x3E9B, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 630")
CHIPSET(0x3E94, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics P630")
CHIPSET(0x3EA9, cfl_gt2, "CFL GT2", "Intel(R) UHD Graphics 620")
CHIPSET(0x3EA5, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 655")
CHIPSET(0x3EA6, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 645")
CHIPSET(0x3EA7, cfl_gt3, "CFL GT3", "Intel(R) HD Graphics")
CHIPSET(0x3EA8, cfl_gt3, "CFL GT3", "Intel(R) Iris(R) Plus Graphics 655")
CHIPSET(0x3EA1, cfl_gt1, "WHL GT1", "Intel(R) UHD Graphics 610")
CHIPSET(0x3EA4, cfl_gt1, "WHL GT1", "Intel(R) UHD Graphics")
CHIPSET(0x3EA0, cfl_gt2, "WHL GT2", "Intel(R) UHD Graphics 620")
CHIPSET(0x3EA3, cfl_gt2, "WHL GT2", "Intel(R) UHD Graphics")
CHIPSET(0x3EA2, cfl_gt3, "WHL GT3", "Intel(R) UHD Graphics")
CHIPSET(0x9B21, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
CHIPSET(0x9BA0, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
CHIPSET(0x9BA2, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
CHIPSET(0x9BA4, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
CHIPSET(0x9BA5, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics 610")
CHIPSET(0x9BA8, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics 610")
CHIPSET(0x9BAA, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
CHIPSET(0x9BAB, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
CHIPSET(0x9BAC, cfl_gt1, "CML GT1", "Intel(R) UHD Graphics")
CHIPSET(0x9B41, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
CHIPSET(0x9BC0, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
CHIPSET(0x9BC2, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
CHIPSET(0x9BC4, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
CHIPSET(0x9BC5, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics 630")
CHIPSET(0x9BC6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630")
CHIPSET(0x9BC8, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics 630")
CHIPSET(0x9BCA, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
CHIPSET(0x9BCB, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
CHIPSET(0x9BCC, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics")
CHIPSET(0x9BE6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630")
CHIPSET(0x9BF6, cfl_gt2, "CML GT2", "Intel(R) UHD Graphics P630")
CHIPSET(0x8A50, icl_gt2, "ICL GT2", "Intel(R) HD Graphics")
CHIPSET(0x8A51, icl_gt2, "ICL GT2", "Intel(R) Iris(R) Plus Graphics")
CHIPSET(0x8A52, icl_gt2, "ICL GT2", "Intel(R) Iris(R) Plus Graphics")
CHIPSET(0x8A53, icl_gt2, "ICL GT2", "Intel(R) Iris(R) Plus Graphics")
CHIPSET(0x8A54, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics")
CHIPSET(0x8A56, icl_gt1, "ICL GT1", "Intel(R) UHD Graphics")
CHIPSET(0x8A57, icl_gt1_5, "ICL GT1.5", "Intel(R) HD Graphics")
CHIPSET(0x8A58, icl_gt1, "ICL GT1", "Intel(R) UHD Graphics")
CHIPSET(0x8A59, icl_gt1_5, "ICL GT1.5", "Intel(R) HD Graphics")
CHIPSET(0x8A5A, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics")
CHIPSET(0x8A5B, icl_gt1, "ICL GT1", "Intel(R) HD Graphics")
CHIPSET(0x8A5C, icl_gt1_5, "ICL GT1.5", "Intel(R) Iris(R) Plus Graphics")
CHIPSET(0x8A5D, icl_gt1, "ICL GT1", "Intel(R) HD Graphics")
CHIPSET(0x8A71, icl_gt0_5, "ICL GT0.5", "Intel(R) HD Graphics")
CHIPSET(0x4500, ehl_4x8, "EHL", "Intel(R) UHD Graphics")
CHIPSET(0x4541, ehl_2x4, "EHL", "Intel(R) UHD Graphics")
CHIPSET(0x4551, ehl_4x4, "EHL", "Intel(R) UHD Graphics")
CHIPSET(0x4555, ehl_2x8, "EHL", "Intel(R) UHD Graphics")
CHIPSET(0x4557, ehl_4x5, "EHL", "Intel(R) UHD Graphics")
CHIPSET(0x4571, ehl_4x8, "EHL", "Intel(R) UHD Graphics")
CHIPSET(0x4E51, ehl_4x4, "JSL", "Intel(R) UHD Graphics")
CHIPSET(0x4E55, ehl_2x8, "JSL", "Intel(R) UHD Graphics")
CHIPSET(0x4E57, ehl_4x5, "JSL", "Intel(R) UHD Graphics")
CHIPSET(0x4E61, ehl_4x6, "JSL", "Intel(R) UHD Graphics")
CHIPSET(0x4E71, ehl_4x8, "JSL", "Intel(R) UHD Graphics")
#endif

View file

@ -174,27 +174,9 @@ with_shared_glapi = with_shared_glapi and with_any_opengl
system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'gnu/kfreebsd', 'dragonfly', 'linux', 'sunos'].contains(host_machine.system())
dri_drivers = get_option('dri-drivers')
if dri_drivers.contains('auto')
if system_has_kms_drm
# TODO: PPC, Sparc
if ['x86', 'x86_64'].contains(host_machine.cpu_family())
dri_drivers = ['i965']
elif ['arm', 'aarch64', 'mips', 'mips64'].contains(host_machine.cpu_family())
dri_drivers = []
else
error('Unknown architecture @0@. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.'.format(
host_machine.cpu_family()))
if dri_drivers.length() != 0
error('Mesa\'s main branch no longer has any "classic" drivers, use the "amber" branch instead.')
endif
elif ['darwin', 'windows', 'cygwin', 'haiku'].contains(host_machine.system())
# only swrast would make sense here, but gallium swrast is a much better default
dri_drivers = []
else
error('Unknown OS @0@. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.'.format(
host_machine.system()))
endif
endif
with_dri_i965 = dri_drivers.contains('i965')
with_dri = dri_drivers.length() != 0
@ -205,7 +187,7 @@ if gallium_drivers.contains('auto')
if ['x86', 'x86_64'].contains(host_machine.cpu_family())
gallium_drivers = [
'r300', 'r600', 'radeonsi', 'nouveau', 'virgl', 'svga', 'swrast',
'iris', 'crocus'
'iris', 'crocus', 'i915'
]
elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
gallium_drivers = [
@ -293,7 +275,7 @@ with_broadcom_vk = _vulkan_drivers.contains('broadcom')
with_any_vk = _vulkan_drivers.length() != 0
with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk
with_any_intel = with_dri_i965 or with_intel_vk or with_gallium_iris or with_gallium_crocus
with_any_intel = with_intel_vk or with_gallium_iris or with_gallium_crocus
if with_swrast_vk and not with_gallium_softpipe
error('swrast vulkan requires gallium swrast')
@ -1493,8 +1475,6 @@ if cc.has_function('dl_iterate_phdr')
pre_args += '-DHAVE_DL_ITERATE_PHDR'
elif with_intel_vk
error('Intel "Anvil" Vulkan driver requires the dl_iterate_phdr function')
elif with_dri_i965 and with_shader_cache
error('Intel i965 GL driver requires dl_iterate_phdr when built with shader caching.')
endif
# Determine whether or not the rt library is needed for time functions

View file

@ -54,9 +54,7 @@ option(
option(
'dri-drivers',
type : 'array',
value : ['auto'],
choices : ['auto', 'i965'],
description : 'List of dri drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
description : 'DEPRECATED: List of dri drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
)
option(
'dri-drivers-path',
@ -455,18 +453,6 @@ option(
value : true,
description : 'Enable direct rendering in GLX and EGL for DRI',
)
option(
'prefer-iris',
type : 'boolean',
value : true,
description : 'Prefer new Intel iris driver over older i965 driver'
)
option(
'prefer-crocus',
type : 'boolean',
value : false,
description : 'Prefer new crocus driver over older i965 driver for gen4-7'
)
option('egl-lib-suffix',
type : 'string',
value : '',

View file

@ -1247,7 +1247,7 @@ intel_get_device_info_from_pci_id(int pci_id,
#undef CHIPSET
#define CHIPSET(id, family, fam_str, name) \
case id: *devinfo = intel_device_info_##family; break;
#include "pci_ids/i965_pci_ids.h"
#include "pci_ids/crocus_pci_ids.h"
#include "pci_ids/iris_pci_ids.h"
#undef CHIPSET
@ -1269,7 +1269,7 @@ intel_get_device_info_from_pci_id(int pci_id,
sizeof(devinfo->name)); \
strncpy(devinfo->name, _name " (" _fam_str ")", sizeof(devinfo->name)); \
break;
#include "pci_ids/i965_pci_ids.h"
#include "pci_ids/crocus_pci_ids.h"
#include "pci_ids/iris_pci_ids.h"
default:
strncpy(devinfo->name, "Intel Unknown", sizeof(devinfo->name));

View file

@ -14,13 +14,8 @@ main(int argc, char *argv[])
} chipsets[] = {
#undef CHIPSET
#define CHIPSET(id, family, family_str, str_name) { .pci_id = id, .name = str_name, },
#include "pci_ids/crocus_pci_ids.h"
#include "pci_ids/i965_pci_ids.h"
#include "pci_ids/iris_pci_ids.h"
#undef CHIPSET
#define CHIPSET(id, fam_str, str_name) { .pci_id = id, .name = str_name, },
#include "pci_ids/i915_pci_ids.h"
#undef CHIPSET
#include "pci_ids/crocus_pci_ids.h"
};
for (uint32_t i = 0; i < ARRAY_SIZE(chipsets); i++) {

View file

@ -40,15 +40,6 @@ loader_c_args = [
'-DUSE_DRICONF',
'-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path),
]
if get_option('prefer-iris')
loader_c_args += ['-DPREFER_IRIS']
endif
if get_option('prefer-crocus')
loader_c_args += ['-DPREFER_CROCUS']
endif
libloader = static_library(
'loader',
['loader_dri_helper.c', 'loader.c'],

View file

@ -8,12 +8,6 @@
# error "Only include from loader.c"
#endif
static const int i965_chip_ids[] = {
#define CHIPSET(chip, family, family_str, name) chip,
#include "pci_ids/i965_pci_ids.h"
#undef CHIPSET
};
static const int crocus_chip_ids[] = {
#define CHIPSET(chip, family, family_str, name) chip,
#include "pci_ids/crocus_pci_ids.h"
@ -53,7 +47,6 @@ static const struct {
int num_chips_ids;
bool (*predicate)(int fd);
} driver_map[] = {
{ 0x8086, "i965", i965_chip_ids, ARRAY_SIZE(i965_chip_ids) },
{ 0x8086, "crocus", crocus_chip_ids, ARRAY_SIZE(crocus_chip_ids) },
{ 0x8086, "iris", NULL, -1, is_kernel_i915 },
{ 0x1002, "r300", r300_chip_ids, ARRAY_SIZE(r300_chip_ids) },

File diff suppressed because it is too large Load diff

View file

@ -1,166 +0,0 @@
#ifndef BRW_BATCH_H
#define BRW_BATCH_H
#include "main/mtypes.h"
#include "brw_context.h"
#include "brw_bufmgr.h"
#ifdef __cplusplus
extern "C" {
#endif
/* The kernel assumes batchbuffers are smaller than 256kB. */
#define MAX_BATCH_SIZE (256 * 1024)
/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base
* Address, which means that we can't put binding tables beyond 64kB. This
* effectively limits the maximum statebuffer size to 64kB.
*/
#define MAX_STATE_SIZE (64 * 1024)
struct brw_batch;
void brw_batch_init(struct brw_context *brw);
void brw_batch_free(struct brw_batch *batch);
void brw_batch_save_state(struct brw_context *brw);
bool brw_batch_saved_state_is_empty(struct brw_context *brw);
void brw_batch_reset_to_saved(struct brw_context *brw);
void brw_batch_require_space(struct brw_context *brw, GLuint sz);
int _brw_batch_flush_fence(struct brw_context *brw,
int in_fence_fd, int *out_fence_fd,
const char *file, int line);
void brw_batch_maybe_noop(struct brw_context *brw);
#define brw_batch_flush(brw) \
_brw_batch_flush_fence((brw), -1, NULL, __FILE__, __LINE__)
#define brw_batch_flush_fence(brw, in_fence_fd, out_fence_fd) \
_brw_batch_flush_fence((brw), (in_fence_fd), (out_fence_fd), \
__FILE__, __LINE__)
/* Unlike bmBufferData, this currently requires the buffer be mapped.
* Consider it a convenience function wrapping multple
* brw_buffer_dword() calls.
*/
void brw_batch_data(struct brw_context *brw,
const void *data, GLuint bytes);
static inline bool
brw_batch_has_aperture_space(struct brw_context *brw, uint64_t extra_space)
{
return brw->batch.aperture_space + extra_space <=
brw->screen->aperture_threshold;
}
bool brw_batch_references(struct brw_batch *batch, struct brw_bo *bo);
#define RELOC_WRITE EXEC_OBJECT_WRITE
#define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT
/* Inverted meaning, but using the same bit...emit_reloc will flip it. */
#define RELOC_32BIT EXEC_OBJECT_SUPPORTS_48B_ADDRESS
void brw_use_pinned_bo(struct brw_batch *batch, struct brw_bo *bo,
unsigned writeable_flag);
uint64_t brw_batch_reloc(struct brw_batch *batch,
uint32_t batch_offset,
struct brw_bo *target,
uint32_t target_offset,
unsigned flags);
uint64_t brw_state_reloc(struct brw_batch *batch,
uint32_t batch_offset,
struct brw_bo *target,
uint32_t target_offset,
unsigned flags);
#define USED_BATCH(_batch) \
((uintptr_t)((_batch).map_next - (_batch).batch.map))
static inline uint32_t float_as_int(float f)
{
union {
float f;
uint32_t d;
} fi;
fi.f = f;
return fi.d;
}
static inline void
brw_batch_begin(struct brw_context *brw, int n)
{
brw_batch_require_space(brw, n * 4);
#ifdef DEBUG
brw->batch.emit = USED_BATCH(brw->batch);
brw->batch.total = n;
#endif
}
static inline void
brw_batch_advance(struct brw_context *brw)
{
#ifdef DEBUG
struct brw_batch *batch = &brw->batch;
unsigned int _n = USED_BATCH(*batch) - batch->emit;
assert(batch->total != 0);
if (_n != batch->total) {
fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",
_n, batch->total);
abort();
}
batch->total = 0;
#else
(void) brw;
#endif
}
static inline bool
brw_ptr_in_state_buffer(struct brw_batch *batch, void *p)
{
return (char *) p >= (char *) batch->state.map &&
(char *) p < (char *) batch->state.map + batch->state.bo->size;
}
#define BEGIN_BATCH(n) do { \
brw_batch_begin(brw, (n)); \
uint32_t *__map = brw->batch.map_next; \
brw->batch.map_next += (n)
#define BEGIN_BATCH_BLT(n) do { \
assert(brw->screen->devinfo.ver < 6); \
brw_batch_begin(brw, (n)); \
uint32_t *__map = brw->batch.map_next; \
brw->batch.map_next += (n)
#define OUT_BATCH(d) *__map++ = (d)
#define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f)))
#define OUT_RELOC(buf, flags, delta) do { \
uint32_t __offset = (__map - brw->batch.batch.map) * 4; \
uint32_t reloc = \
brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \
OUT_BATCH(reloc); \
} while (0)
/* Handle 48-bit address relocations for Gfx8+ */
#define OUT_RELOC64(buf, flags, delta) do { \
uint32_t __offset = (__map - brw->batch.batch.map) * 4; \
uint64_t reloc64 = \
brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \
OUT_BATCH(reloc64); \
OUT_BATCH(reloc64 >> 32); \
} while (0)
#define ADVANCE_BATCH() \
assert(__map == brw->batch.map_next); \
brw_batch_advance(brw); \
} while (0)
#ifdef __cplusplus
}
#endif
#endif

View file

@ -1,307 +0,0 @@
/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* \file brw_binding_tables.c
*
* State atoms which upload the "binding table" for each shader stage.
*
* Binding tables map a numeric "surface index" to the SURFACE_STATE structure
* for a currently bound surface. This allows SEND messages (such as sampler
* or data port messages) to refer to a particular surface by number, rather
* than by pointer.
*
* The binding table is stored as a (sparse) array of SURFACE_STATE entries;
* surface indexes are simply indexes into the array. The ordering of the
* entries is entirely left up to software; see the SURF_INDEX_* macros in
* brw_context.h to see our current layout.
*/
#include "main/mtypes.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_state.h"
#include "brw_batch.h"
/**
* Upload a shader stage's binding table as indirect state.
*
* This copies brw_stage_state::surf_offset[] into the indirect state section
* of the batchbuffer (allocated by brw_state_batch()).
*/
void
brw_upload_binding_table(struct brw_context *brw,
uint32_t packet_name,
const struct brw_stage_prog_data *prog_data,
struct brw_stage_state *stage_state)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
if (prog_data->binding_table.size_bytes == 0) {
/* There are no surfaces; skip making the binding table altogether. */
if (stage_state->bind_bo_offset == 0 && devinfo->ver < 9)
return;
stage_state->bind_bo_offset = 0;
} else {
/* Upload a new binding table. */
if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
brw_emit_buffer_surface_state(
brw, &stage_state->surf_offset[
prog_data->binding_table.shader_time_start],
brw->shader_time.bo, 0, ISL_FORMAT_RAW,
brw->shader_time.bo->size, 1, RELOC_WRITE);
}
uint32_t *bind =
brw_state_batch(brw, prog_data->binding_table.size_bytes,
32, &stage_state->bind_bo_offset);
/* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
memcpy(bind, stage_state->surf_offset,
prog_data->binding_table.size_bytes);
}
brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
if (devinfo->ver >= 7) {
BEGIN_BATCH(2);
OUT_BATCH(packet_name << 16 | (2 - 2));
/* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table field
* when hw-generated binding table is enabled.
*/
OUT_BATCH(stage_state->bind_bo_offset);
ADVANCE_BATCH();
}
}
/**
* State atoms which upload the binding table for a particular shader stage.
* @{
*/
/** Upload the VS binding table. */
static void
brw_vs_upload_binding_table(struct brw_context *brw)
{
/* BRW_NEW_VS_PROG_DATA */
const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
brw_upload_binding_table(brw,
_3DSTATE_BINDING_TABLE_POINTERS_VS,
prog_data,
&brw->vs.base);
}
const struct brw_tracked_state brw_vs_binding_table = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_VS_CONSTBUF |
BRW_NEW_VS_PROG_DATA |
BRW_NEW_SURFACES,
},
.emit = brw_vs_upload_binding_table,
};
/** Upload the PS binding table. */
static void
brw_upload_wm_binding_table(struct brw_context *brw)
{
/* BRW_NEW_FS_PROG_DATA */
const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
brw_upload_binding_table(brw,
_3DSTATE_BINDING_TABLE_POINTERS_PS,
prog_data,
&brw->wm.base);
}
const struct brw_tracked_state brw_wm_binding_table = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_FS_PROG_DATA |
BRW_NEW_SURFACES,
},
.emit = brw_upload_wm_binding_table,
};
/** Upload the TCS binding table (if tessellation stages are active). */
static void
brw_tcs_upload_binding_table(struct brw_context *brw)
{
/* Skip if the tessellation stages are disabled. */
if (brw->programs[MESA_SHADER_TESS_EVAL] == NULL)
return;
/* BRW_NEW_TCS_PROG_DATA */
const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
brw_upload_binding_table(brw,
_3DSTATE_BINDING_TABLE_POINTERS_HS,
prog_data,
&brw->tcs.base);
}
const struct brw_tracked_state brw_tcs_binding_table = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_DEFAULT_TESS_LEVELS |
BRW_NEW_SURFACES |
BRW_NEW_TCS_CONSTBUF |
BRW_NEW_TCS_PROG_DATA,
},
.emit = brw_tcs_upload_binding_table,
};
/** Upload the TES binding table (if TES is active). */
static void
brw_tes_upload_binding_table(struct brw_context *brw)
{
/* If there's no TES, skip changing anything. */
if (brw->programs[MESA_SHADER_TESS_EVAL] == NULL)
return;
/* BRW_NEW_TES_PROG_DATA */
const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
brw_upload_binding_table(brw,
_3DSTATE_BINDING_TABLE_POINTERS_DS,
prog_data,
&brw->tes.base);
}
const struct brw_tracked_state brw_tes_binding_table = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_SURFACES |
BRW_NEW_TES_CONSTBUF |
BRW_NEW_TES_PROG_DATA,
},
.emit = brw_tes_upload_binding_table,
};
/** Upload the GS binding table (if GS is active). */
static void
brw_gs_upload_binding_table(struct brw_context *brw)
{
/* If there's no GS, skip changing anything. */
if (brw->programs[MESA_SHADER_GEOMETRY] == NULL)
return;
/* BRW_NEW_GS_PROG_DATA */
const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
brw_upload_binding_table(brw,
_3DSTATE_BINDING_TABLE_POINTERS_GS,
prog_data,
&brw->gs.base);
}
const struct brw_tracked_state brw_gs_binding_table = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_GS_CONSTBUF |
BRW_NEW_GS_PROG_DATA |
BRW_NEW_SURFACES,
},
.emit = brw_gs_upload_binding_table,
};
/** @} */
/**
* State atoms which emit 3DSTATE packets to update the binding table pointers.
* @{
*/
/**
* (Gfx4-5) Upload the binding table pointers for all shader stages.
*
* The binding table pointers are relative to the surface state base address,
* which points at the batchbuffer containing the streamed batch state.
*/
static void
gfx4_upload_binding_table_pointers(struct brw_context *brw)
{
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
OUT_BATCH(brw->vs.base.bind_bo_offset);
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
OUT_BATCH(brw->wm.base.bind_bo_offset);
ADVANCE_BATCH();
}
const struct brw_tracked_state brw_binding_table_pointers = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_BINDING_TABLE_POINTERS |
BRW_NEW_STATE_BASE_ADDRESS,
},
.emit = gfx4_upload_binding_table_pointers,
};
/**
* (Sandybridge Only) Upload the binding table pointers for all shader stages.
*
* The binding table pointers are relative to the surface state base address,
* which points at the batchbuffer containing the streamed batch state.
*/
static void
gfx6_upload_binding_table_pointers(struct brw_context *brw)
{
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
GFX6_BINDING_TABLE_MODIFY_VS |
GFX6_BINDING_TABLE_MODIFY_GS |
GFX6_BINDING_TABLE_MODIFY_PS |
(4 - 2));
OUT_BATCH(brw->vs.base.bind_bo_offset); /* vs */
if (brw->ff_gs.prog_active)
OUT_BATCH(brw->ff_gs.bind_bo_offset); /* gs */
else
OUT_BATCH(brw->gs.base.bind_bo_offset); /* gs */
OUT_BATCH(brw->wm.base.bind_bo_offset); /* wm/ps */
ADVANCE_BATCH();
}
const struct brw_tracked_state gfx6_binding_table_pointers = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_BINDING_TABLE_POINTERS |
BRW_NEW_STATE_BASE_ADDRESS,
},
.emit = gfx6_upload_binding_table_pointers,
};
/** @} */

View file

@ -1,790 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "main/mtypes.h"
#include "main/blit.h"
#include "main/context.h"
#include "main/enums.h"
#include "main/fbobject.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_blit.h"
#include "brw_buffers.h"
#include "brw_fbo.h"
#include "brw_batch.h"
#include "brw_mipmap_tree.h"
#define FILE_DEBUG_FLAG DEBUG_BLIT
static void
brw_miptree_set_alpha_to_one(struct brw_context *brw,
struct brw_mipmap_tree *mt,
int x, int y, int width, int height);
static GLuint translate_raster_op(enum gl_logicop_mode logicop)
{
return logicop | (logicop << 4);
}
static uint32_t
br13_for_cpp(int cpp)
{
switch (cpp) {
case 16:
return BR13_32323232;
case 8:
return BR13_16161616;
case 4:
return BR13_8888;
case 2:
return BR13_565;
case 1:
return BR13_8;
default:
unreachable("not reached");
}
}
/**
* Emits the packet for switching the blitter from X to Y tiled or back.
*
* This has to be called in a single BEGIN_BATCH_BLT_TILED() /
* ADVANCE_BATCH_TILED(). This is because BCS_SWCTRL is saved and restored as
* part of the power context, not a render context, and if the batchbuffer was
* to get flushed between setting and blitting, or blitting and restoring, our
* tiling state would leak into other unsuspecting applications (like the X
* server).
*/
static uint32_t *
set_blitter_tiling(struct brw_context *brw,
bool dst_y_tiled, bool src_y_tiled,
uint32_t *__map)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
const unsigned n_dwords = devinfo->ver >= 8 ? 5 : 4;
assert(devinfo->ver >= 6);
/* Idle the blitter before we update how tiling is interpreted. */
OUT_BATCH(MI_FLUSH_DW | (n_dwords - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
if (n_dwords == 5)
OUT_BATCH(0);
OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
OUT_BATCH(BCS_SWCTRL);
OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 |
(dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) |
(src_y_tiled ? BCS_SWCTRL_SRC_Y : 0));
return __map;
}
#define SET_BLITTER_TILING(...) __map = set_blitter_tiling(__VA_ARGS__, __map)
#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) \
unsigned set_tiling_batch_size = 0; \
if (dst_y_tiled || src_y_tiled) { \
if (devinfo->ver >= 8) \
set_tiling_batch_size = 16; \
else \
set_tiling_batch_size = 14; \
} \
BEGIN_BATCH_BLT(n + set_tiling_batch_size); \
if (dst_y_tiled || src_y_tiled) \
SET_BLITTER_TILING(brw, dst_y_tiled, src_y_tiled)
#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) \
if (dst_y_tiled || src_y_tiled) \
SET_BLITTER_TILING(brw, false, false); \
ADVANCE_BATCH()
bool
brw_miptree_blit_compatible_formats(mesa_format src, mesa_format dst)
{
/* The BLT doesn't handle sRGB conversion */
assert(src == _mesa_get_srgb_format_linear(src));
assert(dst == _mesa_get_srgb_format_linear(dst));
/* No swizzle or format conversions possible, except... */
if (src == dst)
return true;
/* ...we can either discard the alpha channel when going from A->X,
* or we can fill the alpha channel with 0xff when going from X->A
*/
if (src == MESA_FORMAT_B8G8R8A8_UNORM || src == MESA_FORMAT_B8G8R8X8_UNORM)
return (dst == MESA_FORMAT_B8G8R8A8_UNORM ||
dst == MESA_FORMAT_B8G8R8X8_UNORM);
if (src == MESA_FORMAT_R8G8B8A8_UNORM || src == MESA_FORMAT_R8G8B8X8_UNORM)
return (dst == MESA_FORMAT_R8G8B8A8_UNORM ||
dst == MESA_FORMAT_R8G8B8X8_UNORM);
/* We can also discard alpha when going from A2->X2 for 2 bit alpha,
* however we can't fill the alpha channel with two 1 bits when going
* from X2->A2, because brw_miptree_set_alpha_to_one() is not yet
* ready for this / can only handle 8 bit alpha.
*/
if (src == MESA_FORMAT_B10G10R10A2_UNORM)
return (dst == MESA_FORMAT_B10G10R10A2_UNORM ||
dst == MESA_FORMAT_B10G10R10X2_UNORM);
if (src == MESA_FORMAT_R10G10B10A2_UNORM)
return (dst == MESA_FORMAT_R10G10B10A2_UNORM ||
dst == MESA_FORMAT_R10G10B10X2_UNORM);
return false;
}
static void
get_blit_intratile_offset_el(const struct brw_context *brw,
struct brw_mipmap_tree *mt,
uint32_t total_x_offset_el,
uint32_t total_y_offset_el,
uint64_t *tile_offset_B,
uint32_t *x_offset_el,
uint32_t *y_offset_el)
{
ASSERTED uint32_t z_offset_el, array_offset;
isl_tiling_get_intratile_offset_el(mt->surf.tiling, mt->surf.dim,
mt->surf.msaa_layout,
mt->cpp * 8, mt->surf.samples,
mt->surf.row_pitch_B,
mt->surf.array_pitch_el_rows,
total_x_offset_el, total_y_offset_el, 0, 0,
tile_offset_B,
x_offset_el, y_offset_el,
&z_offset_el, &array_offset);
assert(z_offset_el == 0);
assert(array_offset == 0);
if (mt->surf.tiling == ISL_TILING_LINEAR) {
/* From the Broadwell PRM docs for XY_SRC_COPY_BLT::SourceBaseAddress:
*
* "Base address of the destination surface: X=0, Y=0. Lower 32bits
* of the 48bit addressing. When Src Tiling is enabled (Bit_15
* enabled), this address must be 4KB-aligned. When Tiling is not
* enabled, this address should be CL (64byte) aligned."
*
* The offsets we get from ISL in the tiled case are already aligned.
* In the linear case, we need to do some of our own aligning.
*/
uint32_t delta = *tile_offset_B & 63;
assert(delta % mt->cpp == 0);
*tile_offset_B -= delta;
*x_offset_el += delta / mt->cpp;
} else {
assert(*tile_offset_B % 4096 == 0);
}
}
static bool
alignment_valid(struct brw_context *brw, unsigned offset,
enum isl_tiling tiling)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
/* Tiled buffers must be page-aligned (4K). */
if (tiling != ISL_TILING_LINEAR)
return (offset & 4095) == 0;
/* On Gfx8+, linear buffers must be cacheline-aligned. */
if (devinfo->ver >= 8)
return (offset & 63) == 0;
return true;
}
static uint32_t
xy_blit_cmd(enum isl_tiling src_tiling, enum isl_tiling dst_tiling,
uint32_t cpp)
{
uint32_t CMD = 0;
assert(cpp <= 4);
switch (cpp) {
case 1:
case 2:
CMD = XY_SRC_COPY_BLT_CMD;
break;
case 4:
CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
break;
default:
unreachable("not reached");
}
if (dst_tiling != ISL_TILING_LINEAR)
CMD |= XY_DST_TILED;
if (src_tiling != ISL_TILING_LINEAR)
CMD |= XY_SRC_TILED;
return CMD;
}
/* Copy BitBlt
*/
static bool
emit_copy_blit(struct brw_context *brw,
GLuint cpp,
int32_t src_pitch,
struct brw_bo *src_buffer,
GLuint src_offset,
enum isl_tiling src_tiling,
int32_t dst_pitch,
struct brw_bo *dst_buffer,
GLuint dst_offset,
enum isl_tiling dst_tiling,
GLshort src_x, GLshort src_y,
GLshort dst_x, GLshort dst_y,
GLshort w, GLshort h,
enum gl_logicop_mode logic_op)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
GLuint CMD, BR13;
int dst_y2 = dst_y + h;
int dst_x2 = dst_x + w;
bool dst_y_tiled = dst_tiling == ISL_TILING_Y0;
bool src_y_tiled = src_tiling == ISL_TILING_Y0;
uint32_t src_tile_w, src_tile_h;
uint32_t dst_tile_w, dst_tile_h;
if ((dst_y_tiled || src_y_tiled) && devinfo->ver < 6)
return false;
const unsigned bo_sizes = dst_buffer->size + src_buffer->size;
/* do space check before going any further */
if (!brw_batch_has_aperture_space(brw, bo_sizes))
brw_batch_flush(brw);
if (!brw_batch_has_aperture_space(brw, bo_sizes))
return false;
unsigned length = devinfo->ver >= 8 ? 10 : 8;
brw_batch_require_space(brw, length * 4);
DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
__func__,
src_buffer, src_pitch, src_offset, src_x, src_y,
dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
isl_get_tile_dims(src_tiling, cpp, &src_tile_w, &src_tile_h);
isl_get_tile_dims(dst_tiling, cpp, &dst_tile_w, &dst_tile_h);
/* For Tiled surfaces, the pitch has to be a multiple of the Tile width
* (X direction width of the Tile). This is ensured while allocating the
* buffer object.
*/
assert(src_tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0);
assert(dst_tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0);
/* For big formats (such as floating point), do the copy using 16 or
* 32bpp and multiply the coordinates.
*/
if (cpp > 4) {
if (cpp % 4 == 2) {
dst_x *= cpp / 2;
dst_x2 *= cpp / 2;
src_x *= cpp / 2;
cpp = 2;
} else {
assert(cpp % 4 == 0);
dst_x *= cpp / 4;
dst_x2 *= cpp / 4;
src_x *= cpp / 4;
cpp = 4;
}
}
if (!alignment_valid(brw, dst_offset, dst_tiling))
return false;
if (!alignment_valid(brw, src_offset, src_tiling))
return false;
/* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop
* the low bits. Offsets must be naturally aligned.
*/
if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
dst_pitch % 4 != 0 || dst_offset % cpp != 0)
return false;
assert(cpp <= 4);
BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
CMD = xy_blit_cmd(src_tiling, dst_tiling, cpp);
/* For tiled source and destination, pitch value should be specified
* as a number of Dwords.
*/
if (dst_tiling != ISL_TILING_LINEAR)
dst_pitch /= 4;
if (src_tiling != ISL_TILING_LINEAR)
src_pitch /= 4;
if (dst_y2 <= dst_y || dst_x2 <= dst_x)
return true;
assert(dst_x < dst_x2);
assert(dst_y < dst_y2);
BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled);
OUT_BATCH(CMD | (length - 2));
OUT_BATCH(BR13 | (uint16_t)dst_pitch);
OUT_BATCH(SET_FIELD(dst_y, BLT_Y) | SET_FIELD(dst_x, BLT_X));
OUT_BATCH(SET_FIELD(dst_y2, BLT_Y) | SET_FIELD(dst_x2, BLT_X));
if (devinfo->ver >= 8) {
OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset);
} else {
OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset);
}
OUT_BATCH(SET_FIELD(src_y, BLT_Y) | SET_FIELD(src_x, BLT_X));
OUT_BATCH((uint16_t)src_pitch);
if (devinfo->ver >= 8) {
OUT_RELOC64(src_buffer, 0, src_offset);
} else {
OUT_RELOC(src_buffer, 0, src_offset);
}
ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled);
brw_emit_mi_flush(brw);
return true;
}
static bool
emit_miptree_blit(struct brw_context *brw,
struct brw_mipmap_tree *src_mt,
uint32_t src_x, uint32_t src_y,
struct brw_mipmap_tree *dst_mt,
uint32_t dst_x, uint32_t dst_y,
uint32_t width, uint32_t height,
bool reverse, enum gl_logicop_mode logicop)
{
/* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
* Data Size Limitations):
*
* The BLT engine is capable of transferring very large quantities of
* graphics data. Any graphics data read from and written to the
* destination is permitted to represent a number of pixels that
* occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
* at the destination. The maximum number of pixels that may be
* represented per scan lines worth of graphics data depends on the
* color depth.
*
* The blitter's pitch is a signed 16-bit integer, but measured in bytes
* for linear surfaces and DWords for tiled surfaces. So the maximum
* pitch is 32k linear and 128k tiled.
*/
if (brw_miptree_blt_pitch(src_mt) >= 32768 ||
brw_miptree_blt_pitch(dst_mt) >= 32768) {
perf_debug("Falling back due to >= 32k/128k pitch\n");
return false;
}
/* We need to split the blit into chunks that each fit within the blitter's
* restrictions. We can't use a chunk size of 32768 because we need to
* ensure that src_tile_x + chunk_size fits. We choose 16384 because it's
* a nice round power of two, big enough that performance won't suffer, and
* small enough to guarantee everything fits.
*/
const uint32_t max_chunk_size = 16384;
for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
uint64_t src_offset;
uint32_t src_tile_x, src_tile_y;
get_blit_intratile_offset_el(brw, src_mt,
src_x + chunk_x, src_y + chunk_y,
&src_offset, &src_tile_x, &src_tile_y);
uint64_t dst_offset;
uint32_t dst_tile_x, dst_tile_y;
get_blit_intratile_offset_el(brw, dst_mt,
dst_x + chunk_x, dst_y + chunk_y,
&dst_offset, &dst_tile_x, &dst_tile_y);
if (!emit_copy_blit(brw,
src_mt->cpp,
reverse ? -src_mt->surf.row_pitch_B :
src_mt->surf.row_pitch_B,
src_mt->bo, src_mt->offset + src_offset,
src_mt->surf.tiling,
dst_mt->surf.row_pitch_B,
dst_mt->bo, dst_mt->offset + dst_offset,
dst_mt->surf.tiling,
src_tile_x, src_tile_y,
dst_tile_x, dst_tile_y,
chunk_w, chunk_h,
logicop)) {
/* If this is ever going to fail, it will fail on the first chunk */
assert(chunk_x == 0 && chunk_y == 0);
return false;
}
}
}
return true;
}
/**
* Implements a rectangular block transfer (blit) of pixels between two
* miptrees.
*
* Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous,
* but limited, pitches and sizes allowed.
*
* The src/dst coordinates are relative to the given level/slice of the
* miptree.
*
* If @src_flip or @dst_flip is set, then the rectangle within that miptree
* will be inverted (including scanline order) when copying. This is common
* in GL when copying between window system and user-created
* renderbuffers/textures.
*/
bool
brw_miptree_blit(struct brw_context *brw,
struct brw_mipmap_tree *src_mt,
int src_level, int src_slice,
uint32_t src_x, uint32_t src_y, bool src_flip,
struct brw_mipmap_tree *dst_mt,
int dst_level, int dst_slice,
uint32_t dst_x, uint32_t dst_y, bool dst_flip,
uint32_t width, uint32_t height,
enum gl_logicop_mode logicop)
{
/* The blitter doesn't understand multisampling at all. */
if (src_mt->surf.samples > 1 || dst_mt->surf.samples > 1)
return false;
/* No sRGB decode or encode is done by the hardware blitter, which is
* consistent with what we want in many callers (glCopyTexSubImage(),
* texture validation, etc.).
*/
mesa_format src_format = _mesa_get_srgb_format_linear(src_mt->format);
mesa_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format);
/* The blitter doesn't support doing any format conversions. We do also
* support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into
* the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A
* channel to 1.0 at the end. Also trivially ARGB2101010 to XRGB2101010,
* but not XRGB2101010 to ARGB2101010 yet.
*/
if (!brw_miptree_blit_compatible_formats(src_format, dst_format)) {
perf_debug("%s: Can't use hardware blitter from %s to %s, "
"falling back.\n", __func__,
_mesa_get_format_name(src_format),
_mesa_get_format_name(dst_format));
return false;
}
/* The blitter has no idea about HiZ or fast color clears, so we need to
* resolve the miptrees before we do anything.
*/
brw_miptree_access_raw(brw, src_mt, src_level, src_slice, false);
brw_miptree_access_raw(brw, dst_mt, dst_level, dst_slice, true);
if (src_flip) {
const unsigned h0 = src_mt->surf.phys_level0_sa.height;
src_y = minify(h0, src_level - src_mt->first_level) - src_y - height;
}
if (dst_flip) {
const unsigned h0 = dst_mt->surf.phys_level0_sa.height;
dst_y = minify(h0, dst_level - dst_mt->first_level) - dst_y - height;
}
uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
brw_miptree_get_image_offset(src_mt, src_level, src_slice,
&src_image_x, &src_image_y);
brw_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
&dst_image_x, &dst_image_y);
src_x += src_image_x;
src_y += src_image_y;
dst_x += dst_image_x;
dst_y += dst_image_y;
if (!emit_miptree_blit(brw, src_mt, src_x, src_y,
dst_mt, dst_x, dst_y, width, height,
src_flip != dst_flip, logicop)) {
return false;
}
/* XXX This could be done in a single pass using XY_FULL_MONO_PATTERN_BLT */
if (_mesa_get_format_bits(src_format, GL_ALPHA_BITS) == 0 &&
_mesa_get_format_bits(dst_format, GL_ALPHA_BITS) > 0) {
brw_miptree_set_alpha_to_one(brw, dst_mt, dst_x, dst_y, width, height);
}
return true;
}
bool
brw_miptree_copy(struct brw_context *brw,
struct brw_mipmap_tree *src_mt,
int src_level, int src_slice,
uint32_t src_x, uint32_t src_y,
struct brw_mipmap_tree *dst_mt,
int dst_level, int dst_slice,
uint32_t dst_x, uint32_t dst_y,
uint32_t src_width, uint32_t src_height)
{
/* The blitter doesn't understand multisampling at all. */
if (src_mt->surf.samples > 1 || dst_mt->surf.samples > 1)
return false;
if (src_mt->format == MESA_FORMAT_S_UINT8)
return false;
/* The blitter has no idea about HiZ or fast color clears, so we need to
* resolve the miptrees before we do anything.
*/
brw_miptree_access_raw(brw, src_mt, src_level, src_slice, false);
brw_miptree_access_raw(brw, dst_mt, dst_level, dst_slice, true);
uint32_t src_image_x, src_image_y;
brw_miptree_get_image_offset(src_mt, src_level, src_slice,
&src_image_x, &src_image_y);
if (_mesa_is_format_compressed(src_mt->format)) {
GLuint bw, bh;
_mesa_get_format_block_size(src_mt->format, &bw, &bh);
/* Compressed textures need not have dimensions that are a multiple of
* the block size. Rectangles in compressed textures do need to be a
* multiple of the block size. The one exception is that the right and
* bottom edges may be at the right or bottom edge of the miplevel even
* if it's not aligned.
*/
assert(src_x % bw == 0);
assert(src_y % bh == 0);
assert(src_width % bw == 0 ||
src_x + src_width ==
minify(src_mt->surf.logical_level0_px.width, src_level));
assert(src_height % bh == 0 ||
src_y + src_height ==
minify(src_mt->surf.logical_level0_px.height, src_level));
src_x /= (int)bw;
src_y /= (int)bh;
src_width = DIV_ROUND_UP(src_width, (int)bw);
src_height = DIV_ROUND_UP(src_height, (int)bh);
}
src_x += src_image_x;
src_y += src_image_y;
uint32_t dst_image_x, dst_image_y;
brw_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
&dst_image_x, &dst_image_y);
if (_mesa_is_format_compressed(dst_mt->format)) {
GLuint bw, bh;
_mesa_get_format_block_size(dst_mt->format, &bw, &bh);
assert(dst_x % bw == 0);
assert(dst_y % bh == 0);
dst_x /= (int)bw;
dst_y /= (int)bh;
}
dst_x += dst_image_x;
dst_y += dst_image_y;
return emit_miptree_blit(brw, src_mt, src_x, src_y,
dst_mt, dst_x, dst_y,
src_width, src_height, false, COLOR_LOGICOP_COPY);
}
bool
brw_emit_immediate_color_expand_blit(struct brw_context *brw,
GLuint cpp,
GLubyte *src_bits, GLuint src_size,
GLuint fg_color,
GLshort dst_pitch,
struct brw_bo *dst_buffer,
GLuint dst_offset,
enum isl_tiling dst_tiling,
GLshort x, GLshort y,
GLshort w, GLshort h,
enum gl_logicop_mode logic_op)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
int dwords = ALIGN(src_size, 8) / 4;
uint32_t opcode, br13, blit_cmd;
if (dst_tiling != ISL_TILING_LINEAR) {
if (dst_offset & 4095)
return false;
if (dst_tiling == ISL_TILING_Y0)
return false;
}
assert((unsigned) logic_op <= 0x0f);
assert(dst_pitch > 0);
if (w < 0 || h < 0)
return true;
DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
__func__,
dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
unsigned xy_setup_blt_length = devinfo->ver >= 8 ? 10 : 8;
brw_batch_require_space(brw, (xy_setup_blt_length * 4) +
(3 * 4) + dwords * 4);
opcode = XY_SETUP_BLT_CMD;
if (cpp == 4)
opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
if (dst_tiling != ISL_TILING_LINEAR) {
opcode |= XY_DST_TILED;
dst_pitch /= 4;
}
br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
br13 |= br13_for_cpp(cpp);
blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
if (dst_tiling != ISL_TILING_LINEAR)
blit_cmd |= XY_DST_TILED;
BEGIN_BATCH_BLT(xy_setup_blt_length + 3);
OUT_BATCH(opcode | (xy_setup_blt_length - 2));
OUT_BATCH(br13);
OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
if (devinfo->ver >= 8) {
OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset);
} else {
OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset);
}
OUT_BATCH(0); /* bg */
OUT_BATCH(fg_color); /* fg */
OUT_BATCH(0); /* pattern base addr */
if (devinfo->ver >= 8)
OUT_BATCH(0);
OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
OUT_BATCH(SET_FIELD(y, BLT_Y) | SET_FIELD(x, BLT_X));
OUT_BATCH(SET_FIELD(y + h, BLT_Y) | SET_FIELD(x + w, BLT_X));
ADVANCE_BATCH();
brw_batch_data(brw, src_bits, dwords * 4);
brw_emit_mi_flush(brw);
return true;
}
/**
* Used to initialize the alpha value of an ARGB8888 miptree after copying
* into it from an XRGB8888 source.
*
* This is very common with glCopyTexImage2D(). Note that the coordinates are
* relative to the start of the miptree, not relative to a slice within the
* miptree.
*/
static void
brw_miptree_set_alpha_to_one(struct brw_context *brw,
struct brw_mipmap_tree *mt,
int x, int y, int width, int height)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
uint32_t BR13, CMD;
int pitch, cpp;
pitch = mt->surf.row_pitch_B;
cpp = mt->cpp;
DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
__func__, mt->bo, pitch, x, y, width, height);
/* Note: Currently only handles 8 bit alpha channel. Extension to < 8 Bit
* alpha channel would be likely possible via ROP code 0xfa instead of 0xf0
* and writing a suitable bit-mask instead of 0xffffffff.
*/
BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
CMD = XY_COLOR_BLT_CMD;
CMD |= XY_BLT_WRITE_ALPHA;
if (mt->surf.tiling != ISL_TILING_LINEAR) {
CMD |= XY_DST_TILED;
pitch /= 4;
}
BR13 |= pitch;
/* do space check before going any further */
if (!brw_batch_has_aperture_space(brw, mt->bo->size))
brw_batch_flush(brw);
unsigned length = devinfo->ver >= 8 ? 7 : 6;
const bool dst_y_tiled = mt->surf.tiling == ISL_TILING_Y0;
/* We need to split the blit into chunks that each fit within the blitter's
* restrictions. We can't use a chunk size of 32768 because we need to
* ensure that src_tile_x + chunk_size fits. We choose 16384 because it's
* a nice round power of two, big enough that performance won't suffer, and
* small enough to guarantee everything fits.
*/
const uint32_t max_chunk_size = 16384;
for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
uint64_t offset_B;
uint32_t tile_x, tile_y;
get_blit_intratile_offset_el(brw, mt,
x + chunk_x, y + chunk_y,
&offset_B, &tile_x, &tile_y);
BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, false);
OUT_BATCH(CMD | (length - 2));
OUT_BATCH(BR13);
OUT_BATCH(SET_FIELD(y + chunk_y, BLT_Y) |
SET_FIELD(x + chunk_x, BLT_X));
OUT_BATCH(SET_FIELD(y + chunk_y + chunk_h, BLT_Y) |
SET_FIELD(x + chunk_x + chunk_w, BLT_X));
if (devinfo->ver >= 8) {
OUT_RELOC64(mt->bo, RELOC_WRITE, mt->offset + offset_B);
} else {
OUT_RELOC(mt->bo, RELOC_WRITE, mt->offset + offset_B);
}
OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
ADVANCE_BATCH_TILED(dst_y_tiled, false);
}
}
brw_emit_mi_flush(brw);
}

View file

@ -1,65 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef BRW_BLIT_H
#define BRW_BLIT_H
#include "brw_context.h"
bool brw_miptree_blit_compatible_formats(mesa_format src, mesa_format dst);
bool brw_miptree_blit(struct brw_context *brw,
struct brw_mipmap_tree *src_mt,
int src_level, int src_slice,
uint32_t src_x, uint32_t src_y, bool src_flip,
struct brw_mipmap_tree *dst_mt,
int dst_level, int dst_slice,
uint32_t dst_x, uint32_t dst_y, bool dst_flip,
uint32_t width, uint32_t height,
enum gl_logicop_mode logicop);
bool brw_miptree_copy(struct brw_context *brw,
struct brw_mipmap_tree *src_mt,
int src_level, int src_slice,
uint32_t src_x, uint32_t src_y,
struct brw_mipmap_tree *dst_mt,
int dst_level, int dst_slice,
uint32_t dst_x, uint32_t dst_y,
uint32_t src_width, uint32_t src_height);
bool
brw_emit_immediate_color_expand_blit(struct brw_context *brw,
GLuint cpp,
GLubyte *src_bits, GLuint src_size,
GLuint fg_color,
GLshort dst_pitch,
struct brw_bo *dst_buffer,
GLuint dst_offset,
enum isl_tiling dst_tiling,
GLshort x, GLshort y,
GLshort w, GLshort h,
enum gl_logicop_mode logic_op);
#endif

File diff suppressed because it is too large Load diff

View file

@ -1,137 +0,0 @@
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef BRW_BLORP_H
#define BRW_BLORP_H
#include "blorp/blorp.h"
#include "brw_mipmap_tree.h"
#include "program/prog_instruction.h"
#ifdef __cplusplus
extern "C" {
#endif
void brw_blorp_init(struct brw_context *brw);
void
brw_blorp_blit_miptrees(struct brw_context *brw,
struct brw_mipmap_tree *src_mt,
unsigned src_level, unsigned src_layer,
mesa_format src_format, int src_swizzle,
struct brw_mipmap_tree *dst_mt,
unsigned dst_level, unsigned dst_layer,
mesa_format dst_format,
float src_x0, float src_y0,
float src_x1, float src_y1,
float dst_x0, float dst_y0,
float dst_x1, float dst_y1,
GLenum filter, bool mirror_x, bool mirror_y,
bool decode_srgb, bool encode_srgb);
void
brw_blorp_copy_miptrees(struct brw_context *brw,
struct brw_mipmap_tree *src_mt,
unsigned src_level, unsigned src_logical_layer,
struct brw_mipmap_tree *dst_mt,
unsigned dst_level, unsigned dst_logical_layer,
unsigned src_x, unsigned src_y,
unsigned dst_x, unsigned dst_y,
unsigned src_width, unsigned src_height);
void
brw_blorp_copy_buffers(struct brw_context *brw,
struct brw_bo *src_bo,
unsigned src_offset,
struct brw_bo *dst_bo,
unsigned dst_offset,
unsigned size);
bool
brw_blorp_upload_miptree(struct brw_context *brw,
struct brw_mipmap_tree *dst_mt,
mesa_format dst_format,
uint32_t level, uint32_t x, uint32_t y, uint32_t z,
uint32_t width, uint32_t height, uint32_t depth,
GLenum target, GLenum format, GLenum type,
const void *pixels,
const struct gl_pixelstore_attrib *packing);
bool
brw_blorp_download_miptree(struct brw_context *brw,
struct brw_mipmap_tree *src_mt,
mesa_format src_format, uint32_t src_swizzle,
uint32_t level, uint32_t x, uint32_t y, uint32_t z,
uint32_t width, uint32_t height, uint32_t depth,
GLenum target, GLenum format, GLenum type,
bool y_flip, const void *pixels,
const struct gl_pixelstore_attrib *packing);
void
brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
GLbitfield mask, bool partial_clear, bool encode_srgb);
void
brw_blorp_clear_depth_stencil(struct brw_context *brw,
struct gl_framebuffer *fb,
GLbitfield mask, bool partial_clear);
void
brw_blorp_resolve_color(struct brw_context *brw,
struct brw_mipmap_tree *mt,
unsigned level, unsigned layer,
enum isl_aux_op resolve_op);
void
brw_blorp_mcs_partial_resolve(struct brw_context *brw,
struct brw_mipmap_tree *mt,
uint32_t start_layer, uint32_t num_layers);
void
brw_hiz_exec(struct brw_context *brw, struct brw_mipmap_tree *mt,
unsigned int level, unsigned int start_layer,
unsigned int num_layers, enum isl_aux_op op);
void gfx4_blorp_exec(struct blorp_batch *batch,
const struct blorp_params *params);
void gfx45_blorp_exec(struct blorp_batch *batch,
const struct blorp_params *params);
void gfx5_blorp_exec(struct blorp_batch *batch,
const struct blorp_params *params);
void gfx6_blorp_exec(struct blorp_batch *batch,
const struct blorp_params *params);
void gfx7_blorp_exec(struct blorp_batch *batch,
const struct blorp_params *params);
void gfx75_blorp_exec(struct blorp_batch *batch,
const struct blorp_params *params);
void gfx8_blorp_exec(struct blorp_batch *batch,
const struct blorp_params *params);
void gfx9_blorp_exec(struct blorp_batch *batch,
const struct blorp_params *params);
void gfx11_blorp_exec(struct blorp_batch *batch,
const struct blorp_params *params);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* BRW_BLORP_H */

View file

@ -1,710 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @file brw_buffer_objects.c
*
* This provides core GL buffer object functionality.
*/
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/streaming-load-memcpy.h"
#include "main/bufferobj.h"
#include "x86/common_x86_asm.h"
#include "util/u_memory.h"
#include "brw_context.h"
#include "brw_blorp.h"
#include "brw_buffer_objects.h"
#include "brw_batch.h"
static void
mark_buffer_gpu_usage(struct brw_buffer_object *intel_obj,
uint32_t offset, uint32_t size)
{
intel_obj->gpu_active_start = MIN2(intel_obj->gpu_active_start, offset);
intel_obj->gpu_active_end = MAX2(intel_obj->gpu_active_end, offset + size);
}
static void
mark_buffer_inactive(struct brw_buffer_object *intel_obj)
{
intel_obj->gpu_active_start = ~0;
intel_obj->gpu_active_end = 0;
}
static void
mark_buffer_valid_data(struct brw_buffer_object *intel_obj,
uint32_t offset, uint32_t size)
{
intel_obj->valid_data_start = MIN2(intel_obj->valid_data_start, offset);
intel_obj->valid_data_end = MAX2(intel_obj->valid_data_end, offset + size);
}
static void
mark_buffer_invalid(struct brw_buffer_object *intel_obj)
{
intel_obj->valid_data_start = ~0;
intel_obj->valid_data_end = 0;
}
/** Allocates a new brw_bo to store the data for the buffer object. */
static void
alloc_buffer_object(struct brw_context *brw,
struct brw_buffer_object *intel_obj)
{
const struct gl_context *ctx = &brw->ctx;
uint64_t size = intel_obj->Base.Size;
if (ctx->Const.RobustAccess) {
/* Pad out buffer objects with an extra 2kB (half a page).
*
* When pushing UBOs, we need to safeguard against 3DSTATE_CONSTANT_*
* reading out of bounds memory. The application might bind a UBO that's
* smaller than what the program expects. Ideally, we'd bind an extra
* push buffer containing zeros, but we have a limited number of those,
* so it's not always viable. Our only safe option is to pad all buffer
* objects by the maximum push data length, so that it will never read
* past the end of a BO.
*
* This is unfortunate, but it should result in at most 1 extra page,
* which probably isn't too terrible.
*/
size += 64 * 32; /* max read length of 64 256-bit units */
}
intel_obj->buffer =
brw_bo_alloc(brw->bufmgr, "bufferobj", size, BRW_MEMZONE_OTHER);
/* the buffer might be bound as a uniform buffer, need to update it
*/
if (intel_obj->Base.UsageHistory & USAGE_UNIFORM_BUFFER)
brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
if (intel_obj->Base.UsageHistory & USAGE_SHADER_STORAGE_BUFFER)
brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
if (intel_obj->Base.UsageHistory & USAGE_TEXTURE_BUFFER)
brw->ctx.NewDriverState |= BRW_NEW_TEXTURE_BUFFER;
if (intel_obj->Base.UsageHistory & USAGE_ATOMIC_COUNTER_BUFFER)
brw->ctx.NewDriverState |= BRW_NEW_UNIFORM_BUFFER;
mark_buffer_inactive(intel_obj);
mark_buffer_invalid(intel_obj);
}
static void
release_buffer(struct brw_buffer_object *intel_obj)
{
brw_bo_unreference(intel_obj->buffer);
intel_obj->buffer = NULL;
}
/**
* The NewBufferObject() driver hook.
*
* Allocates a new brw_buffer_object structure and initializes it.
*
* There is some duplication between mesa's bufferobjects and our
* bufmgr buffers. Both have an integer handle and a hashtable to
* lookup an opaque structure. It would be nice if the handles and
* internal structure where somehow shared.
*/
static struct gl_buffer_object *
brw_new_buffer_object(struct gl_context * ctx, GLuint name)
{
struct brw_buffer_object *obj = CALLOC_STRUCT(brw_buffer_object);
if (!obj) {
_mesa_error_no_memory(__func__);
return NULL;
}
_mesa_initialize_buffer_object(ctx, &obj->Base, name);
obj->buffer = NULL;
return &obj->Base;
}
/**
* The DeleteBuffer() driver hook.
*
* Deletes a single OpenGL buffer object. Used by glDeleteBuffers().
*/
static void
brw_delete_buffer(struct gl_context * ctx, struct gl_buffer_object *obj)
{
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
assert(intel_obj);
/* Buffer objects are automatically unmapped when deleting according
* to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy
* (though it does if you call glDeleteBuffers)
*/
_mesa_buffer_unmap_all_mappings(ctx, obj);
brw_bo_unreference(intel_obj->buffer);
_mesa_delete_buffer_object(ctx, obj);
}
/**
* The BufferData() driver hook.
*
* Implements glBufferData(), which recreates a buffer object's data store
* and populates it with the given data, if present.
*
* Any data that was previously stored in the buffer object is lost.
*
* \return true for success, false if out of memory
*/
static GLboolean
brw_buffer_data(struct gl_context *ctx,
GLenum target,
GLsizeiptrARB size,
const GLvoid *data,
GLenum usage,
GLbitfield storageFlags,
struct gl_buffer_object *obj)
{
struct brw_context *brw = brw_context(ctx);
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
/* Part of the ABI, but this function doesn't use it.
*/
(void) target;
intel_obj->Base.Size = size;
intel_obj->Base.Usage = usage;
intel_obj->Base.StorageFlags = storageFlags;
assert(!obj->Mappings[MAP_USER].Pointer); /* Mesa should have unmapped it */
assert(!obj->Mappings[MAP_INTERNAL].Pointer);
if (intel_obj->buffer != NULL)
release_buffer(intel_obj);
if (size != 0) {
alloc_buffer_object(brw, intel_obj);
if (!intel_obj->buffer)
return false;
if (data != NULL) {
brw_bo_subdata(intel_obj->buffer, 0, size, data);
mark_buffer_valid_data(intel_obj, 0, size);
}
}
return true;
}
static GLboolean
brw_buffer_data_mem(struct gl_context *ctx,
GLenum target,
GLsizeiptrARB size,
struct gl_memory_object *memObj,
GLuint64 offset,
GLenum usage,
struct gl_buffer_object *bufObj)
{
struct brw_buffer_object *intel_obj = brw_buffer_object(bufObj);
struct brw_memory_object *intel_memObj = brw_memory_object(memObj);
/* Part of the ABI, but this function doesn't use it.
*/
(void) target;
intel_obj->Base.Size = size;
intel_obj->Base.Usage = usage;
intel_obj->Base.StorageFlags = 0;
assert(!bufObj->Mappings[MAP_USER].Pointer); /* Mesa should have unmapped it */
assert(!bufObj->Mappings[MAP_INTERNAL].Pointer);
if (intel_obj->buffer != NULL)
release_buffer(intel_obj);
if (size != 0) {
intel_obj->buffer = intel_memObj->bo;
mark_buffer_valid_data(intel_obj, offset, size);
}
return true;
}
/**
* The BufferSubData() driver hook.
*
* Implements glBufferSubData(), which replaces a portion of the data in a
* buffer object.
*
* If the data range specified by (size + offset) extends beyond the end of
* the buffer or if data is NULL, no copy is performed.
*/
static void
brw_buffer_subdata(struct gl_context *ctx,
GLintptrARB offset,
GLsizeiptrARB size,
const GLvoid *data,
struct gl_buffer_object *obj)
{
struct brw_context *brw = brw_context(ctx);
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
bool busy;
if (size == 0)
return;
assert(intel_obj);
/* See if we can unsynchronized write the data into the user's BO. This
* avoids GPU stalls in unfortunately common user patterns (uploading
* sequentially into a BO, with draw calls in between each upload).
*
* Once we've hit this path, we mark this GL BO as preferring stalling to
* blits, so that we can hopefully hit this path again in the future
* (otherwise, an app that might occasionally stall but mostly not will end
* up with blitting all the time, at the cost of bandwidth)
*/
if (offset + size <= intel_obj->gpu_active_start ||
intel_obj->gpu_active_end <= offset ||
offset + size <= intel_obj->valid_data_start ||
intel_obj->valid_data_end <= offset) {
void *map = brw_bo_map(brw, intel_obj->buffer, MAP_WRITE | MAP_ASYNC);
memcpy(map + offset, data, size);
brw_bo_unmap(intel_obj->buffer);
if (intel_obj->gpu_active_end > intel_obj->gpu_active_start)
intel_obj->prefer_stall_to_blit = true;
mark_buffer_valid_data(intel_obj, offset, size);
return;
}
busy =
brw_bo_busy(intel_obj->buffer) ||
brw_batch_references(&brw->batch, intel_obj->buffer);
if (busy) {
if (size == intel_obj->Base.Size ||
(intel_obj->valid_data_start >= offset &&
intel_obj->valid_data_end <= offset + size)) {
/* Replace the current busy bo so the subdata doesn't stall. */
brw_bo_unreference(intel_obj->buffer);
alloc_buffer_object(brw, intel_obj);
} else if (!intel_obj->prefer_stall_to_blit) {
perf_debug("Using a blit copy to avoid stalling on "
"glBufferSubData(%ld, %ld) (%ldkb) to a busy "
"(%d-%d) / valid (%d-%d) buffer object.\n",
(long)offset, (long)offset + size, (long)(size/1024),
intel_obj->gpu_active_start,
intel_obj->gpu_active_end,
intel_obj->valid_data_start,
intel_obj->valid_data_end);
struct brw_bo *temp_bo =
brw_bo_alloc(brw->bufmgr, "subdata temp", size, BRW_MEMZONE_OTHER);
brw_bo_subdata(temp_bo, 0, size, data);
brw_blorp_copy_buffers(brw,
temp_bo, 0,
intel_obj->buffer, offset,
size);
brw_emit_mi_flush(brw);
brw_bo_unreference(temp_bo);
mark_buffer_valid_data(intel_obj, offset, size);
return;
} else {
perf_debug("Stalling on glBufferSubData(%ld, %ld) (%ldkb) to a busy "
"(%d-%d) buffer object. Use glMapBufferRange() to "
"avoid this.\n",
(long)offset, (long)offset + size, (long)(size/1024),
intel_obj->gpu_active_start,
intel_obj->gpu_active_end);
brw_batch_flush(brw);
}
}
brw_bo_subdata(intel_obj->buffer, offset, size, data);
mark_buffer_inactive(intel_obj);
mark_buffer_valid_data(intel_obj, offset, size);
}
/* Typedef for memcpy function (used in brw_get_buffer_subdata below). */
typedef void *(*mem_copy_fn)(void *dest, const void *src, size_t n);
/**
* The GetBufferSubData() driver hook.
*
* Implements glGetBufferSubData(), which copies a subrange of a buffer
* object into user memory.
*/
static void
brw_get_buffer_subdata(struct gl_context *ctx,
GLintptrARB offset,
GLsizeiptrARB size,
GLvoid *data,
struct gl_buffer_object *obj)
{
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
struct brw_context *brw = brw_context(ctx);
assert(intel_obj);
if (brw_batch_references(&brw->batch, intel_obj->buffer)) {
brw_batch_flush(brw);
}
unsigned int map_flags = MAP_READ;
mem_copy_fn memcpy_fn = memcpy;
#ifdef USE_SSE41
if (!intel_obj->buffer->cache_coherent && cpu_has_sse4_1) {
/* Rather than acquire a new WB mmaping of the buffer object and pull
* it into the CPU cache, keep using the WC mmap that we have for writes,
* and use the magic movntd instructions instead.
*/
map_flags |= MAP_COHERENT;
memcpy_fn = (mem_copy_fn) _mesa_streaming_load_memcpy;
}
#endif
void *map = brw_bo_map(brw, intel_obj->buffer, map_flags);
if (unlikely(!map)) {
_mesa_error_no_memory(__func__);
return;
}
memcpy_fn(data, map + offset, size);
brw_bo_unmap(intel_obj->buffer);
mark_buffer_inactive(intel_obj);
}
/**
* The MapBufferRange() driver hook.
*
* This implements both glMapBufferRange() and glMapBuffer().
*
* The goal of this extension is to allow apps to accumulate their rendering
* at the same time as they accumulate their buffer object. Without it,
* you'd end up blocking on execution of rendering every time you mapped
* the buffer to put new data in.
*
* We support it in 3 ways: If unsynchronized, then don't bother
* flushing the batchbuffer before mapping the buffer, which can save blocking
* in many cases. If we would still block, and they allow the whole buffer
* to be invalidated, then just allocate a new buffer to replace the old one.
* If not, and we'd block, and they allow the subrange of the buffer to be
* invalidated, then we can make a new little BO, let them write into that,
* and blit it into the real BO at unmap time.
*/
static void *
brw_map_buffer_range(struct gl_context *ctx,
GLintptr offset, GLsizeiptr length,
GLbitfield access, struct gl_buffer_object *obj,
gl_map_buffer_index index)
{
struct brw_context *brw = brw_context(ctx);
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
assert(intel_obj);
STATIC_ASSERT(GL_MAP_UNSYNCHRONIZED_BIT == MAP_ASYNC);
STATIC_ASSERT(GL_MAP_WRITE_BIT == MAP_WRITE);
STATIC_ASSERT(GL_MAP_READ_BIT == MAP_READ);
STATIC_ASSERT(GL_MAP_PERSISTENT_BIT == MAP_PERSISTENT);
STATIC_ASSERT(GL_MAP_COHERENT_BIT == MAP_COHERENT);
assert((access & MAP_INTERNAL_MASK) == 0);
/* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
* internally uses our functions directly.
*/
obj->Mappings[index].Offset = offset;
obj->Mappings[index].Length = length;
obj->Mappings[index].AccessFlags = access;
if (intel_obj->buffer == NULL) {
obj->Mappings[index].Pointer = NULL;
return NULL;
}
/* If the access is synchronized (like a normal buffer mapping), then get
* things flushed out so the later mapping syncs appropriately through GEM.
* If the user doesn't care about existing buffer contents and mapping would
* cause us to block, then throw out the old buffer.
*
* If they set INVALIDATE_BUFFER, we can pitch the current contents to
* achieve the required synchronization.
*/
if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
if (brw_batch_references(&brw->batch, intel_obj->buffer)) {
if (access & GL_MAP_INVALIDATE_BUFFER_BIT) {
brw_bo_unreference(intel_obj->buffer);
alloc_buffer_object(brw, intel_obj);
} else {
perf_debug("Stalling on the GPU for mapping a busy buffer "
"object\n");
brw_batch_flush(brw);
}
} else if (brw_bo_busy(intel_obj->buffer) &&
(access & GL_MAP_INVALIDATE_BUFFER_BIT)) {
brw_bo_unreference(intel_obj->buffer);
alloc_buffer_object(brw, intel_obj);
}
}
if (access & MAP_WRITE)
mark_buffer_valid_data(intel_obj, offset, length);
/* If the user is mapping a range of an active buffer object but
* doesn't require the current contents of that range, make a new
* BO, and we'll copy what they put in there out at unmap or
* FlushRange time.
*
* That is, unless they're looking for a persistent mapping -- we would
* need to do blits in the MemoryBarrier call, and it's easier to just do a
* GPU stall and do a mapping.
*/
if (!(access & (GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_PERSISTENT_BIT)) &&
(access & GL_MAP_INVALIDATE_RANGE_BIT) &&
brw_bo_busy(intel_obj->buffer)) {
/* Ensure that the base alignment of the allocation meets the alignment
* guarantees the driver has advertised to the application.
*/
const unsigned alignment = ctx->Const.MinMapBufferAlignment;
intel_obj->map_extra[index] = (uintptr_t) offset % alignment;
intel_obj->range_map_bo[index] =
brw_bo_alloc(brw->bufmgr, "BO blit temp",
length + intel_obj->map_extra[index],
BRW_MEMZONE_OTHER);
void *map = brw_bo_map(brw, intel_obj->range_map_bo[index], access);
obj->Mappings[index].Pointer = map + intel_obj->map_extra[index];
return obj->Mappings[index].Pointer;
}
void *map = brw_bo_map(brw, intel_obj->buffer, access);
if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
mark_buffer_inactive(intel_obj);
}
obj->Mappings[index].Pointer = map + offset;
return obj->Mappings[index].Pointer;
}
/**
* The FlushMappedBufferRange() driver hook.
*
* Implements glFlushMappedBufferRange(), which signifies that modifications
* have been made to a range of a mapped buffer, and it should be flushed.
*
* This is only used for buffers mapped with GL_MAP_FLUSH_EXPLICIT_BIT.
*
* Ideally we'd use a BO to avoid taking up cache space for the temporary
* data, but FlushMappedBufferRange may be followed by further writes to
* the pointer, so we would have to re-map after emitting our blit, which
* would defeat the point.
*/
static void
brw_flush_mapped_buffer_range(struct gl_context *ctx,
GLintptr offset, GLsizeiptr length,
struct gl_buffer_object *obj,
gl_map_buffer_index index)
{
struct brw_context *brw = brw_context(ctx);
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
assert(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT);
/* If we gave a direct mapping of the buffer instead of using a temporary,
* then there's nothing to do.
*/
if (intel_obj->range_map_bo[index] == NULL)
return;
if (length == 0)
return;
/* Note that we're not unmapping our buffer while executing the blit. We
* need to have a mapping still at the end of this call, since the user
* gets to make further modifications and glFlushMappedBufferRange() calls.
* This is safe, because:
*
* - On LLC platforms, we're using a CPU mapping that's coherent with the
* GPU (except for the render caches), so the kernel doesn't need to do
* any flushing work for us except for what happens at batch exec time
* anyway.
*
* - On non-LLC platforms, we're using a GTT mapping that writes directly
* to system memory (except for the chipset cache that gets flushed at
* batch exec time).
*
* In both cases we don't need to stall for the previous blit to complete
* so we can re-map (and we definitely don't want to, since that would be
* slow): If the user edits a part of their buffer that's previously been
* blitted, then our lack of synchoronization is fine, because either
* they'll get some too-new data in the first blit and not do another blit
* of that area (but in that case the results are undefined), or they'll do
* another blit of that area and the complete newer data will land the
* second time.
*/
brw_blorp_copy_buffers(brw,
intel_obj->range_map_bo[index],
intel_obj->map_extra[index] + offset,
intel_obj->buffer,
obj->Mappings[index].Offset + offset,
length);
mark_buffer_gpu_usage(intel_obj,
obj->Mappings[index].Offset + offset,
length);
brw_emit_mi_flush(brw);
}
/**
* The UnmapBuffer() driver hook.
*
* Implements glUnmapBuffer().
*/
static GLboolean
brw_unmap_buffer(struct gl_context *ctx,
struct gl_buffer_object *obj,
gl_map_buffer_index index)
{
struct brw_context *brw = brw_context(ctx);
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
assert(intel_obj);
assert(obj->Mappings[index].Pointer);
if (intel_obj->range_map_bo[index] != NULL) {
brw_bo_unmap(intel_obj->range_map_bo[index]);
if (!(obj->Mappings[index].AccessFlags & GL_MAP_FLUSH_EXPLICIT_BIT)) {
brw_blorp_copy_buffers(brw,
intel_obj->range_map_bo[index],
intel_obj->map_extra[index],
intel_obj->buffer, obj->Mappings[index].Offset,
obj->Mappings[index].Length);
mark_buffer_gpu_usage(intel_obj, obj->Mappings[index].Offset,
obj->Mappings[index].Length);
brw_emit_mi_flush(brw);
}
/* Since we've emitted some blits to buffers that will (likely) be used
* in rendering operations in other cache domains in this batch, emit a
* flush. Once again, we wish for a domain tracker in libdrm to cover
* usage inside of a batchbuffer.
*/
brw_bo_unreference(intel_obj->range_map_bo[index]);
intel_obj->range_map_bo[index] = NULL;
} else if (intel_obj->buffer != NULL) {
brw_bo_unmap(intel_obj->buffer);
}
obj->Mappings[index].Pointer = NULL;
obj->Mappings[index].Offset = 0;
obj->Mappings[index].Length = 0;
return true;
}
/**
* Gets a pointer to the object's BO, and marks the given range as being used
* on the GPU.
*
* Anywhere that uses buffer objects in the pipeline should be using this to
* mark the range of the buffer that is being accessed by the pipeline.
*/
struct brw_bo *
brw_bufferobj_buffer(struct brw_context *brw,
struct brw_buffer_object *intel_obj,
uint32_t offset, uint32_t size, bool write)
{
/* This is needed so that things like transform feedback and texture buffer
* objects that need a BO but don't want to check that they exist for
* draw-time validation can just always get a BO from a GL buffer object.
*/
if (intel_obj->buffer == NULL)
alloc_buffer_object(brw, intel_obj);
mark_buffer_gpu_usage(intel_obj, offset, size);
/* If writing, (conservatively) mark this section as having valid data. */
if (write)
mark_buffer_valid_data(intel_obj, offset, size);
return intel_obj->buffer;
}
/**
* The CopyBufferSubData() driver hook.
*
* Implements glCopyBufferSubData(), which copies a portion of one buffer
* object's data to another. Independent source and destination offsets
* are allowed.
*/
static void
brw_copy_buffer_subdata(struct gl_context *ctx,
struct gl_buffer_object *src,
struct gl_buffer_object *dst,
GLintptr read_offset, GLintptr write_offset,
GLsizeiptr size)
{
struct brw_context *brw = brw_context(ctx);
struct brw_buffer_object *intel_src = brw_buffer_object(src);
struct brw_buffer_object *intel_dst = brw_buffer_object(dst);
struct brw_bo *src_bo, *dst_bo;
if (size == 0)
return;
dst_bo = brw_bufferobj_buffer(brw, intel_dst, write_offset, size, true);
src_bo = brw_bufferobj_buffer(brw, intel_src, read_offset, size, false);
brw_blorp_copy_buffers(brw,
src_bo, read_offset,
dst_bo, write_offset, size);
/* Since we've emitted some blits to buffers that will (likely) be used
* in rendering operations in other cache domains in this batch, emit a
* flush. Once again, we wish for a domain tracker in libdrm to cover
* usage inside of a batchbuffer.
*/
brw_emit_mi_flush(brw);
}
void
brw_init_buffer_object_functions(struct dd_function_table *functions)
{
functions->NewBufferObject = brw_new_buffer_object;
functions->DeleteBuffer = brw_delete_buffer;
functions->BufferData = brw_buffer_data;
functions->BufferDataMem = brw_buffer_data_mem;
functions->BufferSubData = brw_buffer_subdata;
functions->GetBufferSubData = brw_get_buffer_subdata;
functions->MapBufferRange = brw_map_buffer_range;
functions->FlushMappedBufferRange = brw_flush_mapped_buffer_range;
functions->UnmapBuffer = brw_unmap_buffer;
functions->CopyBufferSubData = brw_copy_buffer_subdata;
}

View file

@ -1,141 +0,0 @@
/*
* Copyright 2005 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef BRW_BUFFEROBJ_H
#define BRW_BUFFEROBJ_H
#include "main/mtypes.h"
struct brw_context;
struct gl_buffer_object;
/**
* Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object.
*/
struct brw_buffer_object
{
struct gl_buffer_object Base;
struct brw_bo *buffer; /* the low-level buffer manager's buffer handle */
struct brw_bo *range_map_bo[MAP_COUNT];
/**
* Alignment offset from the range_map_bo temporary mapping to the returned
* obj->Pointer (caused by GL_ARB_map_buffer_alignment).
*/
unsigned map_extra[MAP_COUNT];
/** @{
* Tracking for what range of the BO may currently be in use by the GPU.
*
* Users often want to either glBufferSubData() or glMapBufferRange() a
* buffer object where some subset of it is busy on the GPU, without either
* stalling or doing an extra blit (since our blits are extra expensive,
* given that we have to reupload most of the 3D state when switching
* rings). We wish they'd just use glMapBufferRange() with the
* UNSYNC|INVALIDATE_RANGE flag or the INVALIDATE_BUFFER flag, but lots
* don't.
*
* To work around apps, we track what range of the BO we might have used on
* the GPU as vertex data, tranform feedback output, buffer textures, etc.,
* and just do glBufferSubData() with an unsynchronized map when they're
* outside of that range.
*
* If gpu_active_start > gpu_active_end, then the GPU is not currently
* accessing the BO (and we can map it without synchronization).
*/
uint32_t gpu_active_start;
uint32_t gpu_active_end;
/** @{
* Tracking for what range of the BO may contain valid data.
*
* Users may create a large buffer object and only fill part of it
* with valid data. This is a conservative estimate of what part
* of the buffer contains valid data that we have to preserve.
*/
uint32_t valid_data_start;
uint32_t valid_data_end;
/** @} */
/**
* If we've avoided stalls/blits using the active tracking, flag the buffer
* for (occasional) stalling in the future to avoid getting stuck in a
* cycle of blitting on buffer wraparound.
*/
bool prefer_stall_to_blit;
/** @} */
};
/* Get the bm buffer associated with a GL bufferobject:
*/
struct brw_bo *brw_bufferobj_buffer(struct brw_context *brw,
struct brw_buffer_object *obj,
uint32_t offset,
uint32_t size,
bool write);
void brw_upload_data(struct brw_uploader *upload,
const void *data,
uint32_t size,
uint32_t alignment,
struct brw_bo **out_bo,
uint32_t *out_offset);
void *brw_upload_space(struct brw_uploader *upload,
uint32_t size,
uint32_t alignment,
struct brw_bo **out_bo,
uint32_t *out_offset);
void brw_upload_finish(struct brw_uploader *upload);
void brw_upload_init(struct brw_uploader *upload,
struct brw_bufmgr *bufmgr,
unsigned default_size);
/* Hook the bufferobject implementation into mesa:
*/
void brw_init_buffer_object_functions(struct dd_function_table *functions);
static inline struct brw_buffer_object *
brw_buffer_object(struct gl_buffer_object *obj)
{
return (struct brw_buffer_object *) obj;
}
struct brw_memory_object {
struct gl_memory_object Base;
struct brw_bo *bo;
};
static inline struct brw_memory_object *
brw_memory_object(struct gl_memory_object *obj)
{
return (struct brw_memory_object *)obj;
}
#endif

View file

@ -1,74 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "brw_context.h"
#include "brw_buffers.h"
#include "brw_fbo.h"
#include "brw_mipmap_tree.h"
#include "main/fbobject.h"
#include "main/framebuffer.h"
#include "main/renderbuffer.h"
static void
brw_drawbuffer(struct gl_context *ctx)
{
if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) {
struct brw_context *const brw = brw_context(ctx);
/* If we might be front-buffer rendering on this buffer for the first
* time, invalidate our DRI drawable so we'll ask for new buffers
* (including the fake front) before we start rendering again.
*/
if (brw->driContext->driDrawablePriv)
dri2InvalidateDrawable(brw->driContext->driDrawablePriv);
brw_prepare_render(brw);
}
}
static void
brw_readbuffer(struct gl_context * ctx, GLenum mode)
{
if (_mesa_is_front_buffer_reading(ctx->ReadBuffer)) {
struct brw_context *const brw = brw_context(ctx);
/* If we might be front-buffer reading on this buffer for the first
* time, invalidate our DRI drawable so we'll ask for new buffers
* (including the fake front) before we start reading again.
*/
if (brw->driContext->driDrawablePriv)
dri2InvalidateDrawable(brw->driContext->driReadablePriv);
brw_prepare_render(brw);
}
}
void
brw_init_buffer_functions(struct dd_function_table *functions)
{
functions->DrawBuffer = brw_drawbuffer;
functions->ReadBuffer = brw_readbuffer;
}

View file

@ -1,35 +0,0 @@
/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef BRW_BUFFERS_H
#define BRW_BUFFERS_H
#include "dri_util.h"
#include "drm-uapi/drm.h"
#include "brw_context.h"
extern void brw_init_buffer_functions(struct dd_function_table *functions);
#endif /* BRW_BUFFERS_H */

File diff suppressed because it is too large Load diff

View file

@ -1,404 +0,0 @@
/*
* Copyright © 2008-2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
*
*/
/**
* @file brw_bufmgr.h
*
* Public definitions of Intel-specific bufmgr functions.
*/
#ifndef BRW_BUFMGR_H
#define BRW_BUFMGR_H
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <time.h>
#include "c11/threads.h"
#include "util/u_atomic.h"
#include "util/list.h"
#if defined(__cplusplus)
extern "C" {
#endif
struct intel_device_info;
struct brw_context;
/**
* Memory zones. When allocating a buffer, you can request that it is
* placed into a specific region of the virtual address space (PPGTT).
*
* Most buffers can go anywhere (BRW_MEMZONE_OTHER). Some buffers are
* accessed via an offset from a base address. STATE_BASE_ADDRESS has
* a maximum 4GB size for each region, so we need to restrict those
* buffers to be within 4GB of the base. Each memory zone corresponds
* to a particular base address.
*
* Currently, i965 partitions the address space into two regions:
*
* - Low 4GB
* - Full 48-bit address space
*
* Eventually, we hope to carve out 4GB of VMA for each base address.
*/
enum brw_memory_zone {
BRW_MEMZONE_LOW_4G,
BRW_MEMZONE_OTHER,
/* Shaders - Instruction State Base Address */
BRW_MEMZONE_SHADER = BRW_MEMZONE_LOW_4G,
/* Scratch - General State Base Address */
BRW_MEMZONE_SCRATCH = BRW_MEMZONE_LOW_4G,
/* Surface State Base Address */
BRW_MEMZONE_SURFACE = BRW_MEMZONE_LOW_4G,
/* Dynamic State Base Address */
BRW_MEMZONE_DYNAMIC = BRW_MEMZONE_LOW_4G,
};
#define BRW_MEMZONE_COUNT (BRW_MEMZONE_OTHER + 1)
struct brw_bo {
/**
* Size in bytes of the buffer object.
*
* The size may be larger than the size originally requested for the
* allocation, such as being aligned to page size.
*/
uint64_t size;
/** Buffer manager context associated with this buffer object */
struct brw_bufmgr *bufmgr;
/** The GEM handle for this buffer object. */
uint32_t gem_handle;
/**
* Offset of the buffer inside the Graphics Translation Table.
*
* This is effectively our GPU address for the buffer and we use it
* as our base for all state pointers into the buffer. However, since the
* kernel may be forced to move it around during the course of the
* buffer's lifetime, we can only know where the buffer was on the last
* execbuf. We presume, and are usually right, that the buffer will not
* move and so we use that last offset for the next batch and by doing
* so we can avoid having the kernel perform a relocation fixup pass as
* our pointers inside the batch will be using the correct base offset.
*
* Since we do use it as a base address for the next batch of pointers,
* the kernel treats our offset as a request, and if possible will
* arrange the buffer to placed at that address (trying to balance
* the cost of buffer migration versus the cost of performing
* relocations). Furthermore, we can force the kernel to place the buffer,
* or report a failure if we specified a conflicting offset, at our chosen
* offset by specifying EXEC_OBJECT_PINNED.
*
* Note the GTT may be either per context, or shared globally across the
* system. On a shared system, our buffers have to contend for address
* space with both aperture mappings and framebuffers and so are more
* likely to be moved. On a full ppGTT system, each batch exists in its
* own GTT, and so each buffer may have their own offset within each
* context.
*/
uint64_t gtt_offset;
/**
* The validation list index for this buffer, or -1 when not in a batch.
* Note that a single buffer may be in multiple batches (contexts), and
* this is a global field, which refers to the last batch using the BO.
* It should not be considered authoritative, but can be used to avoid a
* linear walk of the validation list in the common case by guessing that
* exec_bos[bo->index] == bo and confirming whether that's the case.
*/
unsigned index;
/**
* Boolean of whether the GPU is definitely not accessing the buffer.
*
* This is only valid when reusable, since non-reusable
* buffers are those that have been shared with other
* processes, so we don't know their state.
*/
bool idle;
int refcount;
const char *name;
uint64_t kflags;
/**
* Kenel-assigned global name for this object
*
* List contains both flink named and prime fd'd objects
*/
unsigned int global_name;
/**
* Current tiling mode
*/
uint32_t tiling_mode;
uint32_t swizzle_mode;
uint32_t stride;
time_t free_time;
/** Mapped address for the buffer, saved across map/unmap cycles */
void *map_cpu;
/** GTT virtual address for the buffer, saved across map/unmap cycles */
void *map_gtt;
/** WC CPU address for the buffer, saved across map/unmap cycles */
void *map_wc;
/** BO cache list */
struct list_head head;
/**
* List of GEM handle exports of this buffer (bo_export).
*
* Hold bufmgr->lock when using this list.
*/
struct list_head exports;
/**
* Boolean of whether this buffer can be re-used
*/
bool reusable;
/**
* Boolean of whether this buffer has been shared with an external client.
*/
bool external;
/**
* Boolean of whether this buffer is cache coherent
*/
bool cache_coherent;
};
#define BO_ALLOC_BUSY (1<<0)
#define BO_ALLOC_ZEROED (1<<1)
/**
* Allocate a buffer object.
*
* Buffer objects are not necessarily initially mapped into CPU virtual
* address space or graphics device aperture. They must be mapped
* using brw_bo_map() to be used by the CPU.
*/
struct brw_bo *brw_bo_alloc(struct brw_bufmgr *bufmgr, const char *name,
uint64_t size, enum brw_memory_zone memzone);
/**
* Allocate a tiled buffer object.
*
* Alignment for tiled objects is set automatically; the 'flags'
* argument provides a hint about how the object will be used initially.
*
* Valid tiling formats are:
* I915_TILING_NONE
* I915_TILING_X
* I915_TILING_Y
*/
struct brw_bo *brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr,
const char *name,
uint64_t size,
enum brw_memory_zone memzone,
uint32_t tiling_mode,
uint32_t pitch,
unsigned flags);
/**
* Allocate a tiled buffer object.
*
* Alignment for tiled objects is set automatically; the 'flags'
* argument provides a hint about how the object will be used initially.
*
* Valid tiling formats are:
* I915_TILING_NONE
* I915_TILING_X
* I915_TILING_Y
*
* Note the tiling format may be rejected; callers should check the
* 'tiling_mode' field on return, as well as the pitch value, which
* may have been rounded up to accommodate for tiling restrictions.
*/
struct brw_bo *brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr,
const char *name,
int x, int y, int cpp,
enum brw_memory_zone memzone,
uint32_t tiling_mode,
uint32_t *pitch,
unsigned flags);
/** Takes a reference on a buffer object */
static inline void
brw_bo_reference(struct brw_bo *bo)
{
p_atomic_inc(&bo->refcount);
}
/**
* Releases a reference on a buffer object, freeing the data if
* no references remain.
*/
void brw_bo_unreference(struct brw_bo *bo);
/* Must match MapBufferRange interface (for convenience) */
#define MAP_READ GL_MAP_READ_BIT
#define MAP_WRITE GL_MAP_WRITE_BIT
#define MAP_ASYNC GL_MAP_UNSYNCHRONIZED_BIT
#define MAP_PERSISTENT GL_MAP_PERSISTENT_BIT
#define MAP_COHERENT GL_MAP_COHERENT_BIT
/* internal */
#define MAP_INTERNAL_MASK (0xffu << 24)
#define MAP_RAW (0x01 << 24)
/**
* Maps the buffer into userspace.
*
* This function will block waiting for any existing execution on the
* buffer to complete, first. The resulting mapping is returned.
*/
MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags);
/**
* Reduces the refcount on the userspace mapping of the buffer
* object.
*/
static inline int brw_bo_unmap(UNUSED struct brw_bo *bo) { return 0; }
/** Write data into an object. */
int brw_bo_subdata(struct brw_bo *bo, uint64_t offset,
uint64_t size, const void *data);
/**
* Waits for rendering to an object by the GPU to have completed.
*
* This is not required for any access to the BO by bo_map,
* bo_subdata, etc. It is merely a way for the driver to implement
* glFinish.
*/
void brw_bo_wait_rendering(struct brw_bo *bo);
/**
* Unref a buffer manager instance.
*/
void brw_bufmgr_unref(struct brw_bufmgr *bufmgr);
/**
* Get the current tiling (and resulting swizzling) mode for the bo.
*
* \param buf Buffer to get tiling mode for
* \param tiling_mode returned tiling mode
* \param swizzle_mode returned swizzling mode
*/
int brw_bo_get_tiling(struct brw_bo *bo, uint32_t *tiling_mode,
uint32_t *swizzle_mode);
/**
* Create a visible name for a buffer which can be used by other apps
*
* \param buf Buffer to create a name for
* \param name Returned name
*/
int brw_bo_flink(struct brw_bo *bo, uint32_t *name);
/**
* Returns 1 if mapping the buffer for write could cause the process
* to block, due to the object being active in the GPU.
*/
int brw_bo_busy(struct brw_bo *bo);
/**
* Specify the volatility of the buffer.
* \param bo Buffer to create a name for
* \param madv The purgeable status
*
* Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
* reclaimed under memory pressure. If you subsequently require the buffer,
* then you must pass I915_MADV_WILLNEED to mark the buffer as required.
*
* Returns 1 if the buffer was retained, or 0 if it was discarded whilst
* marked as I915_MADV_DONTNEED.
*/
int brw_bo_madvise(struct brw_bo *bo, int madv);
struct brw_bufmgr *brw_bufmgr_get_for_fd(struct intel_device_info *devinfo,
int fd, bool bo_reuse);
struct brw_bo *brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
const char *name,
unsigned int handle);
int brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns);
uint32_t brw_create_hw_context(struct brw_bufmgr *bufmgr);
int brw_hw_context_set_priority(struct brw_bufmgr *bufmgr,
uint32_t ctx_id,
int priority);
void brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id);
int brw_bufmgr_get_fd(struct brw_bufmgr *bufmgr);
int brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd);
struct brw_bo *brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr,
int prime_fd);
struct brw_bo *brw_bo_gem_create_from_prime_tiled(struct brw_bufmgr *bufmgr,
int prime_fd,
uint32_t tiling_mode,
uint32_t stride);
uint32_t brw_bo_export_gem_handle(struct brw_bo *bo);
/**
* Exports a bo as a GEM handle into a given DRM file descriptor
* \param bo Buffer to export
* \param drm_fd File descriptor where the new handle is created
* \param out_handle Pointer to store the new handle
*
* Returns 0 if the buffer was successfully exported, a non zero error code
* otherwise.
*/
int brw_bo_export_gem_handle_for_device(struct brw_bo *bo, int drm_fd,
uint32_t *out_handle);
int brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset,
uint64_t *result);
bool brw_using_softpin(struct brw_bufmgr *bufmgr);
/** @{ */
#if defined(__cplusplus)
}
#endif
#endif /* BRW_BUFMGR_H */

View file

@ -1,302 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* Copyright 2009, 2012 Intel Corporation.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "main/mtypes.h"
#include "main/condrender.h"
#include "swrast/swrast.h"
#include "drivers/common/meta.h"
#include "brw_batch.h"
#include "brw_fbo.h"
#include "brw_mipmap_tree.h"
#include "brw_context.h"
#include "brw_blorp.h"
#include "brw_defines.h"
#define FILE_DEBUG_FLAG DEBUG_BLIT
static const char *buffer_names[] = {
[BUFFER_FRONT_LEFT] = "front",
[BUFFER_BACK_LEFT] = "back",
[BUFFER_FRONT_RIGHT] = "front right",
[BUFFER_BACK_RIGHT] = "back right",
[BUFFER_DEPTH] = "depth",
[BUFFER_STENCIL] = "stencil",
[BUFFER_ACCUM] = "accum",
[BUFFER_COLOR0] = "color0",
[BUFFER_COLOR1] = "color1",
[BUFFER_COLOR2] = "color2",
[BUFFER_COLOR3] = "color3",
[BUFFER_COLOR4] = "color4",
[BUFFER_COLOR5] = "color5",
[BUFFER_COLOR6] = "color6",
[BUFFER_COLOR7] = "color7",
};
static void
debug_mask(const char *name, GLbitfield mask)
{
GLuint i;
if (INTEL_DEBUG(DEBUG_BLIT)) {
DBG("%s clear:", name);
for (i = 0; i < BUFFER_COUNT; i++) {
if (mask & (1 << i))
DBG(" %s", buffer_names[i]);
}
DBG("\n");
}
}
/**
* Returns true if the scissor is a noop (cuts out nothing).
*/
static bool
noop_scissor(struct gl_framebuffer *fb)
{
return fb->_Xmin <= 0 &&
fb->_Ymin <= 0 &&
fb->_Xmax >= fb->Width &&
fb->_Ymax >= fb->Height;
}
/**
* Implements fast depth clears on gfx6+.
*
* Fast clears basically work by setting a flag in each of the subspans
* represented in the HiZ buffer that says "When you need the depth values for
* this subspan, it's the hardware's current clear value." Then later rendering
* can just use the static clear value instead of referencing memory.
*
* The tricky part of the implementation is that you have to have the clear
* value that was used on the depth buffer in place for all further rendering,
* at least until a resolve to the real depth buffer happens.
*/
static bool
brw_fast_clear_depth(struct gl_context *ctx)
{
struct brw_context *brw = brw_context(ctx);
struct gl_framebuffer *fb = ctx->DrawBuffer;
struct brw_renderbuffer *depth_irb =
brw_get_renderbuffer(fb, BUFFER_DEPTH);
struct brw_mipmap_tree *mt = depth_irb->mt;
struct gl_renderbuffer_attachment *depth_att = &fb->Attachment[BUFFER_DEPTH];
const struct intel_device_info *devinfo = &brw->screen->devinfo;
if (INTEL_DEBUG(DEBUG_NO_FAST_CLEAR))
return false;
if (devinfo->ver < 6)
return false;
if (!brw_renderbuffer_has_hiz(depth_irb))
return false;
/* We only handle full buffer clears -- otherwise you'd have to track whether
* a previous clear had happened at a different clear value and resolve it
* first.
*/
if ((ctx->Scissor.EnableFlags & 1) && !noop_scissor(fb)) {
perf_debug("Failed to fast clear %dx%d depth because of scissors. "
"Possible 5%% performance win if avoided.\n",
mt->surf.logical_level0_px.width,
mt->surf.logical_level0_px.height);
return false;
}
switch (mt->format) {
case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
case MESA_FORMAT_Z24_UNORM_S8_UINT:
/* From the Sandy Bridge PRM, volume 2 part 1, page 314:
*
* "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
* enabled (the legacy method of clearing must be performed):
*
* - If the depth buffer format is D32_FLOAT_S8X24_UINT or
* D24_UNORM_S8_UINT.
*/
return false;
case MESA_FORMAT_Z_UNORM16:
/* From the Sandy Bridge PRM, volume 2 part 1, page 314:
*
* "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
* enabled (the legacy method of clearing must be performed):
*
* - DevSNB{W/A}]: When depth buffer format is D16_UNORM and the
* width of the map (LOD0) is not multiple of 16, fast clear
* optimization must be disabled.
*/
if (devinfo->ver == 6 &&
(minify(mt->surf.phys_level0_sa.width,
depth_irb->mt_level - mt->first_level) % 16) != 0)
return false;
break;
default:
break;
}
/* Quantize the clear value to what can be stored in the actual depth
* buffer. This makes the following check more accurate because it now
* checks if the actual depth bits will match. It also prevents us from
* getting a too-accurate depth value during depth testing or when sampling
* with HiZ enabled.
*/
float clear_value =
mt->format == MESA_FORMAT_Z_FLOAT32 ? ctx->Depth.Clear :
_mesa_lroundeven(ctx->Depth.Clear * fb->_DepthMax) / (float)(fb->_DepthMax);
const uint32_t num_layers = depth_att->Layered ? depth_irb->layer_count : 1;
/* If we're clearing to a new clear value, then we need to resolve any clear
* flags out of the HiZ buffer into the real depth buffer.
*/
if (mt->fast_clear_color.f32[0] != clear_value) {
for (uint32_t level = mt->first_level; level <= mt->last_level; level++) {
if (!brw_miptree_level_has_hiz(mt, level))
continue;
const unsigned level_layers = brw_get_num_logical_layers(mt, level);
for (uint32_t layer = 0; layer < level_layers; layer++) {
if (level == depth_irb->mt_level &&
layer >= depth_irb->mt_layer &&
layer < depth_irb->mt_layer + num_layers) {
/* We're going to clear this layer anyway. Leave it alone. */
continue;
}
enum isl_aux_state aux_state =
brw_miptree_get_aux_state(mt, level, layer);
if (aux_state != ISL_AUX_STATE_CLEAR &&
aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) {
/* This slice doesn't have any fast-cleared bits. */
continue;
}
/* If we got here, then the level may have fast-clear bits that
* use the old clear value. We need to do a depth resolve to get
* rid of their use of the clear value before we can change it.
* Fortunately, few applications ever change their depth clear
* value so this shouldn't happen often.
*/
brw_hiz_exec(brw, mt, level, layer, 1, ISL_AUX_OP_FULL_RESOLVE);
brw_miptree_set_aux_state(brw, mt, level, layer, 1,
ISL_AUX_STATE_RESOLVED);
}
}
const union isl_color_value clear_color = { .f32 = {clear_value, } };
brw_miptree_set_clear_color(brw, mt, clear_color);
}
for (unsigned a = 0; a < num_layers; a++) {
enum isl_aux_state aux_state =
brw_miptree_get_aux_state(mt, depth_irb->mt_level,
depth_irb->mt_layer + a);
if (aux_state != ISL_AUX_STATE_CLEAR) {
brw_hiz_exec(brw, mt, depth_irb->mt_level,
depth_irb->mt_layer + a, 1,
ISL_AUX_OP_FAST_CLEAR);
}
}
brw_miptree_set_aux_state(brw, mt, depth_irb->mt_level,
depth_irb->mt_layer, num_layers,
ISL_AUX_STATE_CLEAR);
return true;
}
/**
* Called by ctx->Driver.Clear.
*/
static void
brw_clear(struct gl_context *ctx, GLbitfield mask)
{
struct brw_context *brw = brw_context(ctx);
struct gl_framebuffer *fb = ctx->DrawBuffer;
const struct intel_device_info *devinfo = &brw->screen->devinfo;
bool partial_clear = ctx->Scissor.EnableFlags && !noop_scissor(fb);
if (!_mesa_check_conditional_render(ctx))
return;
if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
brw->front_buffer_dirty = true;
}
brw_prepare_render(brw);
brw_workaround_depthstencil_alignment(brw, partial_clear ? 0 : mask);
if (mask & BUFFER_BIT_DEPTH) {
if (brw_fast_clear_depth(ctx)) {
DBG("fast clear: depth\n");
mask &= ~BUFFER_BIT_DEPTH;
}
}
if (mask & BUFFER_BITS_COLOR) {
brw_blorp_clear_color(brw, fb, mask, partial_clear,
ctx->Color.sRGBEnabled);
debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
mask &= ~BUFFER_BITS_COLOR;
}
if (devinfo->ver >= 6 && (mask & BUFFER_BITS_DEPTH_STENCIL)) {
brw_blorp_clear_depth_stencil(brw, fb, mask, partial_clear);
debug_mask("blorp depth/stencil", mask & BUFFER_BITS_DEPTH_STENCIL);
mask &= ~BUFFER_BITS_DEPTH_STENCIL;
}
GLbitfield tri_mask = mask & (BUFFER_BIT_STENCIL |
BUFFER_BIT_DEPTH);
if (tri_mask) {
debug_mask("tri", tri_mask);
mask &= ~tri_mask;
_mesa_meta_glsl_Clear(&brw->ctx, tri_mask);
}
/* Any strange buffers get passed off to swrast. The only thing that
* should be left at this point is the accumulation buffer.
*/
assert((mask & ~BUFFER_BIT_ACCUM) == 0);
if (mask) {
debug_mask("swrast", mask);
_swrast_Clear(ctx, mask);
}
}
void
brw_init_clear_functions(struct dd_function_table *functions)
{
functions->Clear = brw_clear;
}

View file

@ -1,210 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#include "main/macros.h"
#include "main/enums.h"
#include "brw_batch.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_util.h"
#include "brw_state.h"
#include "compiler/brw_eu.h"
#include "util/ralloc.h"
static void
compile_clip_prog(struct brw_context *brw, struct brw_clip_prog_key *key)
{
const unsigned *program;
void *mem_ctx;
unsigned program_size;
mem_ctx = ralloc_context(NULL);
struct brw_clip_prog_data prog_data;
program = brw_compile_clip(brw->screen->compiler, mem_ctx, key, &prog_data,
&brw->vue_map_geom_out, &program_size);
brw_upload_cache(&brw->cache,
BRW_CACHE_CLIP_PROG,
key, sizeof(*key),
program, program_size,
&prog_data, sizeof(prog_data),
&brw->clip.prog_offset, &brw->clip.prog_data);
ralloc_free(mem_ctx);
}
/* Calculate interpolants for triangle and line rasterization.
*/
void
brw_upload_clip_prog(struct brw_context *brw)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
struct brw_clip_prog_key key;
if (!brw_state_dirty(brw,
_NEW_BUFFERS |
_NEW_LIGHT |
_NEW_POLYGON |
_NEW_TRANSFORM,
BRW_NEW_BLORP |
BRW_NEW_FS_PROG_DATA |
BRW_NEW_REDUCED_PRIMITIVE |
BRW_NEW_VUE_MAP_GEOM_OUT))
return;
memset(&key, 0, sizeof(key));
/* Populate the key:
*/
/* BRW_NEW_FS_PROG_DATA */
const struct brw_wm_prog_data *wm_prog_data =
brw_wm_prog_data(brw->wm.base.prog_data);
if (wm_prog_data) {
key.contains_flat_varying = wm_prog_data->contains_flat_varying;
key.contains_noperspective_varying =
wm_prog_data->contains_noperspective_varying;
STATIC_ASSERT(sizeof(key.interp_mode) ==
sizeof(wm_prog_data->interp_mode));
memcpy(key.interp_mode, wm_prog_data->interp_mode,
sizeof(key.interp_mode));
}
/* BRW_NEW_REDUCED_PRIMITIVE */
key.primitive = brw->reduced_primitive;
/* BRW_NEW_VUE_MAP_GEOM_OUT */
key.attrs = brw->vue_map_geom_out.slots_valid;
/* _NEW_LIGHT */
key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
/* _NEW_TRANSFORM (also part of VUE map)*/
if (ctx->Transform.ClipPlanesEnabled)
key.nr_userclip = util_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
if (devinfo->ver == 5)
key.clip_mode = BRW_CLIP_MODE_KERNEL_CLIP;
else
key.clip_mode = BRW_CLIP_MODE_NORMAL;
/* _NEW_POLYGON */
if (key.primitive == GL_TRIANGLES) {
if (ctx->Polygon.CullFlag &&
ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
key.clip_mode = BRW_CLIP_MODE_REJECT_ALL;
else {
GLuint fill_front = BRW_CLIP_FILL_MODE_CULL;
GLuint fill_back = BRW_CLIP_FILL_MODE_CULL;
GLuint offset_front = 0;
GLuint offset_back = 0;
if (!ctx->Polygon.CullFlag ||
ctx->Polygon.CullFaceMode != GL_FRONT) {
switch (ctx->Polygon.FrontMode) {
case GL_FILL:
fill_front = BRW_CLIP_FILL_MODE_FILL;
offset_front = 0;
break;
case GL_LINE:
fill_front = BRW_CLIP_FILL_MODE_LINE;
offset_front = ctx->Polygon.OffsetLine;
break;
case GL_POINT:
fill_front = BRW_CLIP_FILL_MODE_POINT;
offset_front = ctx->Polygon.OffsetPoint;
break;
}
}
if (!ctx->Polygon.CullFlag ||
ctx->Polygon.CullFaceMode != GL_BACK) {
switch (ctx->Polygon.BackMode) {
case GL_FILL:
fill_back = BRW_CLIP_FILL_MODE_FILL;
offset_back = 0;
break;
case GL_LINE:
fill_back = BRW_CLIP_FILL_MODE_LINE;
offset_back = ctx->Polygon.OffsetLine;
break;
case GL_POINT:
fill_back = BRW_CLIP_FILL_MODE_POINT;
offset_back = ctx->Polygon.OffsetPoint;
break;
}
}
if (ctx->Polygon.BackMode != GL_FILL ||
ctx->Polygon.FrontMode != GL_FILL) {
key.do_unfilled = 1;
/* Most cases the fixed function units will handle. Cases where
* one or more polygon faces are unfilled will require help:
*/
key.clip_mode = BRW_CLIP_MODE_CLIP_NON_REJECTED;
if (offset_back || offset_front) {
/* _NEW_POLYGON, _NEW_BUFFERS */
key.offset_units = ctx->Polygon.OffsetUnits * ctx->DrawBuffer->_MRD * 2;
key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD;
key.offset_clamp = ctx->Polygon.OffsetClamp * ctx->DrawBuffer->_MRD;
}
if (!brw->polygon_front_bit) {
key.fill_ccw = fill_front;
key.fill_cw = fill_back;
key.offset_ccw = offset_front;
key.offset_cw = offset_back;
if (ctx->Light.Model.TwoSide &&
key.fill_cw != BRW_CLIP_FILL_MODE_CULL)
key.copy_bfc_cw = 1;
} else {
key.fill_cw = fill_front;
key.fill_ccw = fill_back;
key.offset_cw = offset_front;
key.offset_ccw = offset_back;
if (ctx->Light.Model.TwoSide &&
key.fill_ccw != BRW_CLIP_FILL_MODE_CULL)
key.copy_bfc_ccw = 1;
}
}
}
}
if (!brw_search_cache(&brw->cache, BRW_CACHE_CLIP_PROG, &key, sizeof(key),
&brw->clip.prog_offset, &brw->clip.prog_data, true)) {
compile_clip_prog( brw, &key );
}
}

View file

@ -1,151 +0,0 @@
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <sys/errno.h>
#include "main/condrender.h"
#include "main/mtypes.h"
#include "main/state.h"
#include "brw_context.h"
#include "brw_draw.h"
#include "brw_state.h"
#include "brw_batch.h"
#include "brw_buffer_objects.h"
#include "brw_defines.h"
static void
brw_dispatch_compute_common(struct gl_context *ctx)
{
struct brw_context *brw = brw_context(ctx);
bool fail_next;
if (!_mesa_check_conditional_render(ctx))
return;
if (ctx->NewState)
_mesa_update_state(ctx);
brw_validate_textures(brw);
brw_predraw_resolve_inputs(brw, false, NULL);
/* Flush the batch if the batch/state buffers are nearly full. We can
* grow them if needed, but this is not free, so we'd like to avoid it.
*/
brw_batch_require_space(brw, 600);
brw_require_statebuffer_space(brw, 2500);
brw_batch_save_state(brw);
fail_next = brw_batch_saved_state_is_empty(brw);
retry:
brw->batch.no_wrap = true;
brw_upload_compute_state(brw);
brw->vtbl.emit_compute_walker(brw);
brw->batch.no_wrap = false;
if (!brw_batch_has_aperture_space(brw, 0)) {
if (!fail_next) {
brw_batch_reset_to_saved(brw);
brw_batch_flush(brw);
fail_next = true;
goto retry;
} else {
int ret = brw_batch_flush(brw);
WARN_ONCE(ret == -ENOSPC,
"i965: Single compute shader dispatch "
"exceeded available aperture space\n");
}
}
/* Now that we know we haven't run out of aperture space, we can safely
* reset the dirty bits.
*/
brw_compute_state_finished(brw);
if (brw->always_flush_batch)
brw_batch_flush(brw);
brw_program_cache_check_size(brw);
/* Note: since compute shaders can't write to framebuffers, there's no need
* to call brw_postdraw_set_buffers_need_resolve().
*/
}
static void
brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
struct brw_context *brw = brw_context(ctx);
brw->compute.num_work_groups_bo = NULL;
brw->compute.num_work_groups = num_groups;
brw->compute.group_size = NULL;
ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
brw_dispatch_compute_common(ctx);
}
static void
brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
{
struct brw_context *brw = brw_context(ctx);
static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
struct gl_buffer_object *indirect_buffer = ctx->DispatchIndirectBuffer;
struct brw_bo *bo =
brw_bufferobj_buffer(brw,
brw_buffer_object(indirect_buffer),
indirect, 3 * sizeof(GLuint), false);
brw->compute.num_work_groups_bo = bo;
brw->compute.num_work_groups_offset = indirect;
brw->compute.num_work_groups = indirect_group_counts;
brw->compute.group_size = NULL;
ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
brw_dispatch_compute_common(ctx);
}
static void
brw_dispatch_compute_group_size(struct gl_context *ctx,
const GLuint *num_groups,
const GLuint *group_size)
{
struct brw_context *brw = brw_context(ctx);
brw->compute.num_work_groups_bo = NULL;
brw->compute.num_work_groups = num_groups;
brw->compute.group_size = group_size;
ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
brw_dispatch_compute_common(ctx);
}
void
brw_init_compute_functions(struct dd_function_table *functions)
{
functions->DispatchCompute = brw_dispatch_compute;
functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
functions->DispatchComputeGroupSize = brw_dispatch_compute_group_size;
}

View file

@ -1,193 +0,0 @@
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Neil Roberts <neil@linux.intel.com>
*/
/** @file brw_conditional_render.c
*
* Support for conditional rendering based on query objects
* (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gfx7+.
*/
#include "main/condrender.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_batch.h"
static void
set_predicate_enable(struct brw_context *brw,
bool value)
{
if (value)
brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
else
brw->predicate.state = BRW_PREDICATE_STATE_DONT_RENDER;
}
static void
set_predicate_for_overflow_query(struct brw_context *brw,
struct brw_query_object *query,
int stream_start, int count)
{
if (!can_do_mi_math_and_lrr(brw->screen)) {
brw->predicate.state = BRW_PREDICATE_STATE_STALL_FOR_QUERY;
return;
}
brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
/* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
* command when loading the values into the predicate source registers for
* conditional rendering.
*/
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
hsw_overflow_result_to_gpr0(brw, query, count);
brw_load_register_reg64(brw, MI_PREDICATE_SRC0, HSW_CS_GPR(0));
brw_load_register_imm64(brw, MI_PREDICATE_SRC1, 0ull);
}
static void
set_predicate_for_occlusion_query(struct brw_context *brw,
struct brw_query_object *query)
{
if (!brw->predicate.supported) {
brw->predicate.state = BRW_PREDICATE_STATE_STALL_FOR_QUERY;
return;
}
brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
/* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
* command when loading the values into the predicate source registers for
* conditional rendering.
*/
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query->bo, 0 /* offset */);
brw_load_register_mem64(brw, MI_PREDICATE_SRC1, query->bo, 8 /* offset */);
}
static void
set_predicate_for_result(struct brw_context *brw,
struct brw_query_object *query,
bool inverted)
{
int load_op;
assert(query->bo != NULL);
switch (query->Base.Target) {
case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
set_predicate_for_overflow_query(brw, query, 0, 1);
break;
case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
set_predicate_for_overflow_query(brw, query, 0, MAX_VERTEX_STREAMS);
break;
default:
set_predicate_for_occlusion_query(brw, query);
}
if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) {
if (inverted)
load_op = MI_PREDICATE_LOADOP_LOAD;
else
load_op = MI_PREDICATE_LOADOP_LOADINV;
BEGIN_BATCH(1);
OUT_BATCH(GFX7_MI_PREDICATE |
load_op |
MI_PREDICATE_COMBINEOP_SET |
MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
ADVANCE_BATCH();
}
}
static void
brw_begin_conditional_render(struct gl_context *ctx,
struct gl_query_object *q,
GLenum mode)
{
struct brw_context *brw = brw_context(ctx);
struct brw_query_object *query = (struct brw_query_object *) q;
bool inverted;
switch (mode) {
case GL_QUERY_WAIT:
case GL_QUERY_NO_WAIT:
case GL_QUERY_BY_REGION_WAIT:
case GL_QUERY_BY_REGION_NO_WAIT:
inverted = false;
break;
case GL_QUERY_WAIT_INVERTED:
case GL_QUERY_NO_WAIT_INVERTED:
case GL_QUERY_BY_REGION_WAIT_INVERTED:
case GL_QUERY_BY_REGION_NO_WAIT_INVERTED:
inverted = true;
break;
default:
unreachable("Unexpected conditional render mode");
}
/* If there are already samples from a BLT operation or if the query object
* is ready then we can avoid looking at the values in the buffer and just
* decide whether to draw using the CPU without stalling.
*/
if (query->Base.Result || query->Base.Ready)
set_predicate_enable(brw, (query->Base.Result != 0) ^ inverted);
else
set_predicate_for_result(brw, query, inverted);
}
static void
brw_end_conditional_render(struct gl_context *ctx,
struct gl_query_object *q)
{
struct brw_context *brw = brw_context(ctx);
/* When there is no longer a conditional render in progress it should
* always render.
*/
brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
}
void
brw_init_conditional_render_functions(struct dd_function_table *functions)
{
functions->BeginConditionalRender = brw_begin_conditional_render;
functions->EndConditionalRender = brw_end_conditional_render;
}
bool
brw_check_conditional_render(struct brw_context *brw)
{
if (brw->predicate.state == BRW_PREDICATE_STATE_STALL_FOR_QUERY) {
perf_debug("Conditional rendering is implemented in software and may "
"stall.\n");
return _mesa_check_conditional_render(&brw->ctx);
}
return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER;
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,139 +0,0 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2014 Intel Corporation All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Jason Ekstrand <jason.ekstrand@intel.com>
*/
#include "brw_blorp.h"
#include "brw_fbo.h"
#include "brw_tex.h"
#include "brw_blit.h"
#include "brw_mipmap_tree.h"
#include "main/formats.h"
#include "main/teximage.h"
#include "drivers/common/meta.h"
static void
copy_miptrees(struct brw_context *brw,
struct brw_mipmap_tree *src_mt,
int src_x, int src_y, int src_z, unsigned src_level,
struct brw_mipmap_tree *dst_mt,
int dst_x, int dst_y, int dst_z, unsigned dst_level,
int src_width, int src_height)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
if (devinfo->ver <= 5) {
/* On gfx4-5, try BLT first.
*
* Gfx4-5 have a single ring for both 3D and BLT operations, so there's
* no inter-ring synchronization issues like on Gfx6+. It is apparently
* faster than using the 3D pipeline. Original Gfx4 also has to rebase
* and copy miptree slices in order to render to unaligned locations.
*/
if (brw_miptree_copy(brw, src_mt, src_level, src_z, src_x, src_y,
dst_mt, dst_level, dst_z, dst_x, dst_y,
src_width, src_height))
return;
}
brw_blorp_copy_miptrees(brw,
src_mt, src_level, src_z,
dst_mt, dst_level, dst_z,
src_x, src_y, dst_x, dst_y,
src_width, src_height);
}
static void
brw_copy_image_sub_data(struct gl_context *ctx,
struct gl_texture_image *src_image,
struct gl_renderbuffer *src_renderbuffer,
int src_x, int src_y, int src_z,
struct gl_texture_image *dst_image,
struct gl_renderbuffer *dst_renderbuffer,
int dst_x, int dst_y, int dst_z,
int src_width, int src_height)
{
struct brw_context *brw = brw_context(ctx);
struct brw_mipmap_tree *src_mt, *dst_mt;
unsigned src_level, dst_level;
if (src_image) {
src_mt = brw_texture_image(src_image)->mt;
src_level = src_image->Level + src_image->TexObject->Attrib.MinLevel;
/* Cube maps actually have different images per face */
if (src_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
src_z = src_image->Face;
src_z += src_image->TexObject->Attrib.MinLayer;
} else {
assert(src_renderbuffer);
src_mt = brw_renderbuffer(src_renderbuffer)->mt;
src_image = src_renderbuffer->TexImage;
src_level = 0;
}
if (dst_image) {
dst_mt = brw_texture_image(dst_image)->mt;
dst_level = dst_image->Level + dst_image->TexObject->Attrib.MinLevel;
/* Cube maps actually have different images per face */
if (dst_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
dst_z = dst_image->Face;
dst_z += dst_image->TexObject->Attrib.MinLayer;
} else {
assert(dst_renderbuffer);
dst_mt = brw_renderbuffer(dst_renderbuffer)->mt;
dst_image = dst_renderbuffer->TexImage;
dst_level = 0;
}
copy_miptrees(brw, src_mt, src_x, src_y, src_z, src_level,
dst_mt, dst_x, dst_y, dst_z, dst_level,
src_width, src_height);
/* CopyImage only works for equal formats, texture view equivalence
* classes, and a couple special cases for compressed textures.
*
* Notably, GL_DEPTH_STENCIL does not appear in any equivalence
* classes, so we know the formats must be the same, and thus both
* will either have stencil, or not. They can't be mismatched.
*/
assert((src_mt->stencil_mt != NULL) == (dst_mt->stencil_mt != NULL));
if (dst_mt->stencil_mt) {
copy_miptrees(brw, src_mt->stencil_mt, src_x, src_y, src_z, src_level,
dst_mt->stencil_mt, dst_x, dst_y, dst_z, dst_level,
src_width, src_height);
}
}
void
brw_init_copy_image_functions(struct dd_function_table *functions)
{
functions->CopyImageSubData = brw_copy_image_sub_data;
}

View file

@ -1,220 +0,0 @@
/*
* Copyright (c) 2014 - 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "util/ralloc.h"
#include "brw_context.h"
#include "brw_cs.h"
#include "brw_wm.h"
#include "brw_mipmap_tree.h"
#include "brw_state.h"
#include "brw_batch.h"
#include "compiler/brw_nir.h"
#include "brw_program.h"
#include "compiler/glsl/ir_uniform.h"
static void
assign_cs_binding_table_offsets(const struct intel_device_info *devinfo,
const struct gl_program *prog,
struct brw_cs_prog_data *prog_data)
{
uint32_t next_binding_table_offset = 0;
/* May not be used if the gl_NumWorkGroups variable is not accessed. */
prog_data->binding_table.work_groups_start = next_binding_table_offset;
next_binding_table_offset++;
brw_assign_common_binding_table_offsets(devinfo, prog, &prog_data->base,
next_binding_table_offset);
}
static bool
brw_codegen_cs_prog(struct brw_context *brw,
struct brw_program *cp,
struct brw_cs_prog_key *key)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
const GLuint *program;
void *mem_ctx = ralloc_context(NULL);
struct brw_cs_prog_data prog_data;
bool start_busy = false;
double start_time = 0;
nir_shader *nir = nir_shader_clone(mem_ctx, cp->program.nir);
memset(&prog_data, 0, sizeof(prog_data));
if (cp->program.info.shared_size > 64 * 1024) {
cp->program.sh.data->LinkStatus = LINKING_FAILURE;
const char *error_str =
"Compute shader used more than 64KB of shared variables";
ralloc_strcat(&cp->program.sh.data->InfoLog, error_str);
_mesa_problem(NULL, "Failed to link compute shader: %s\n", error_str);
ralloc_free(mem_ctx);
return false;
}
assign_cs_binding_table_offsets(devinfo, &cp->program, &prog_data);
brw_nir_setup_glsl_uniforms(mem_ctx, nir,
&cp->program, &prog_data.base, true);
if (unlikely(brw->perf_debug)) {
start_busy = (brw->batch.last_bo &&
brw_bo_busy(brw->batch.last_bo));
start_time = get_time();
}
brw_nir_lower_cs_intrinsics(nir);
struct brw_compile_cs_params params = {
.nir = nir,
.key = key,
.prog_data = &prog_data,
.log_data = brw,
};
if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
params.shader_time = true;
params.shader_time_index =
brw_get_shader_time_index(brw, &cp->program, ST_CS, true);
}
program = brw_compile_cs(brw->screen->compiler, mem_ctx, &params);
if (program == NULL) {
cp->program.sh.data->LinkStatus = LINKING_FAILURE;
ralloc_strcat(&cp->program.sh.data->InfoLog, params.error_str);
_mesa_problem(NULL, "Failed to compile compute shader: %s\n", params.error_str);
ralloc_free(mem_ctx);
return false;
}
if (unlikely(brw->perf_debug)) {
if (cp->compiled_once) {
brw_debug_recompile(brw, MESA_SHADER_COMPUTE, cp->program.Id,
&key->base);
}
cp->compiled_once = true;
if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
perf_debug("CS compile took %.03f ms and stalled the GPU\n",
(get_time() - start_time) * 1000);
}
}
brw_alloc_stage_scratch(brw, &brw->cs.base, prog_data.base.total_scratch);
/* The param and pull_param arrays will be freed by the shader cache. */
ralloc_steal(NULL, prog_data.base.param);
ralloc_steal(NULL, prog_data.base.pull_param);
brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG,
key, sizeof(*key),
program, prog_data.base.program_size,
&prog_data, sizeof(prog_data),
&brw->cs.base.prog_offset, &brw->cs.base.prog_data);
ralloc_free(mem_ctx);
return true;
}
void
brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key)
{
struct gl_context *ctx = &brw->ctx;
/* BRW_NEW_COMPUTE_PROGRAM */
const struct brw_program *cp =
(struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
memset(key, 0, sizeof(*key));
/* _NEW_TEXTURE */
brw_populate_base_prog_key(ctx, cp, &key->base);
}
void
brw_upload_cs_prog(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
struct brw_cs_prog_key key;
struct brw_program *cp =
(struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
if (!cp)
return;
if (!brw_state_dirty(brw, _NEW_TEXTURE, BRW_NEW_COMPUTE_PROGRAM))
return;
brw->cs.base.sampler_count =
util_last_bit(ctx->ComputeProgram._Current->SamplersUsed);
brw_cs_populate_key(brw, &key);
if (brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG, &key, sizeof(key),
&brw->cs.base.prog_offset, &brw->cs.base.prog_data,
true))
return;
if (brw_disk_cache_upload_program(brw, MESA_SHADER_COMPUTE))
return;
cp = (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
cp->id = key.base.program_string_id;
ASSERTED bool success = brw_codegen_cs_prog(brw, cp, &key);
assert(success);
}
void
brw_cs_populate_default_key(const struct brw_compiler *compiler,
struct brw_cs_prog_key *key,
struct gl_program *prog)
{
const struct intel_device_info *devinfo = compiler->devinfo;
memset(key, 0, sizeof(*key));
brw_populate_default_base_prog_key(devinfo, brw_program(prog), &key->base);
}
bool
brw_cs_precompile(struct gl_context *ctx, struct gl_program *prog)
{
struct brw_context *brw = brw_context(ctx);
struct brw_cs_prog_key key;
struct brw_program *bcp = brw_program(prog);
brw_cs_populate_default_key(brw->screen->compiler, &key, prog);
uint32_t old_prog_offset = brw->cs.base.prog_offset;
struct brw_stage_prog_data *old_prog_data = brw->cs.base.prog_data;
bool success = brw_codegen_cs_prog(brw, bcp, &key);
brw->cs.base.prog_offset = old_prog_offset;
brw->cs.base.prog_data = old_prog_data;
return success;
}

View file

@ -1,46 +0,0 @@
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef BRW_CS_H
#define BRW_CS_H
#ifdef __cplusplus
extern "C" {
#endif
void
brw_upload_cs_prog(struct brw_context *brw);
void
brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key);
void
brw_cs_populate_default_key(const struct brw_compiler *compiler,
struct brw_cs_prog_key *key,
struct gl_program *prog);
#ifdef __cplusplus
}
#endif
#endif /* BRW_CS_H */

View file

@ -1,356 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
/** @file brw_curbe.c
*
* Push constant handling for gfx4/5.
*
* Push constants are constant values (such as GLSL uniforms) that are
* pre-loaded into a shader stage's register space at thread spawn time. On
* gfx4 and gfx5, we create a blob in memory containing all the push constants
* for all the stages in order. At CMD_CONST_BUFFER time that blob is loaded
* into URB space as a constant URB entry (CURBE) so that it can be accessed
* quickly at thread setup time. Each individual fixed function unit's state
* (brw_vs_state.c for example) tells the hardware which subset of the CURBE
* it wants in its register space, and we calculate those areas here under the
* BRW_NEW_PUSH_CONSTANT_ALLOCATION state flag. The brw_urb.c allocation will control
* how many CURBEs can be loaded into the hardware at once before a pipeline
* stall occurs at CMD_CONST_BUFFER time.
*
* On gfx6+, constant handling becomes a much simpler set of per-unit state.
* See gfx6_upload_vec4_push_constants() in gfx6_vs_state.c for that code.
*/
#include "compiler/nir/nir.h"
#include "main/context.h"
#include "main/macros.h"
#include "main/enums.h"
#include "program/prog_parameter.h"
#include "program/prog_print.h"
#include "program/prog_statevars.h"
#include "util/bitscan.h"
#include "brw_batch.h"
#include "brw_buffer_objects.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_state.h"
#include "brw_util.h"
#include "util/u_math.h"
/**
* Partition the CURBE between the various users of constant values.
*
* If the users all fit within the previous allocatation, we avoid changing
* the layout because that means reuploading all unit state and uploading new
* constant buffers.
*/
static void calculate_curbe_offsets( struct brw_context *brw )
{
struct gl_context *ctx = &brw->ctx;
/* BRW_NEW_FS_PROG_DATA */
const GLuint nr_fp_regs = (brw->wm.base.prog_data->nr_params + 15) / 16;
/* BRW_NEW_VS_PROG_DATA */
const GLuint nr_vp_regs = (brw->vs.base.prog_data->nr_params + 15) / 16;
GLuint nr_clip_regs = 0;
GLuint total_regs;
/* _NEW_TRANSFORM */
if (ctx->Transform.ClipPlanesEnabled) {
GLuint nr_planes = 6 + util_bitcount(ctx->Transform.ClipPlanesEnabled);
nr_clip_regs = (nr_planes * 4 + 15) / 16;
}
total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
/* The CURBE allocation size is limited to 32 512-bit units (128 EU
* registers, or 1024 floats). See CS_URB_STATE in the gfx4 or gfx5
* (volume 1, part 1) PRMs.
*
* Note that in brw_fs.cpp we're only loading up to 16 EU registers of
* values as push constants before spilling to pull constants, and in
* brw_vec4.cpp we're loading up to 32 registers of push constants. An EU
* register is 1/2 of one of these URB entry units, so that leaves us 16 EU
* regs for clip.
*/
assert(total_regs <= 32);
/* Lazy resize:
*/
if (nr_fp_regs > brw->curbe.wm_size ||
nr_vp_regs > brw->curbe.vs_size ||
nr_clip_regs != brw->curbe.clip_size ||
(total_regs < brw->curbe.total_size / 4 &&
brw->curbe.total_size > 16)) {
GLuint reg = 0;
/* Calculate a new layout:
*/
reg = 0;
brw->curbe.wm_start = reg;
brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
brw->curbe.clip_start = reg;
brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
brw->curbe.vs_start = reg;
brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
brw->curbe.total_size = reg;
if (0)
fprintf(stderr, "curbe wm %d+%d clip %d+%d vs %d+%d\n",
brw->curbe.wm_start,
brw->curbe.wm_size,
brw->curbe.clip_start,
brw->curbe.clip_size,
brw->curbe.vs_start,
brw->curbe.vs_size );
brw->ctx.NewDriverState |= BRW_NEW_PUSH_CONSTANT_ALLOCATION;
}
}
const struct brw_tracked_state brw_curbe_offsets = {
.dirty = {
.mesa = _NEW_TRANSFORM,
.brw = BRW_NEW_CONTEXT |
BRW_NEW_BLORP |
BRW_NEW_FS_PROG_DATA |
BRW_NEW_VS_PROG_DATA,
},
.emit = calculate_curbe_offsets
};
/** Uploads the CS_URB_STATE packet.
*
* Just like brw_vs_state.c and brw_wm_state.c define a URB entry size and
* number of entries for their stages, constant buffers do so using this state
* packet. Having multiple CURBEs in the URB at the same time allows the
* hardware to avoid a pipeline stall between primitives using different
* constant buffer contents.
*/
void brw_upload_cs_urb_state(struct brw_context *brw)
{
BEGIN_BATCH(2);
OUT_BATCH(CMD_CS_URB_STATE << 16 | (2-2));
/* BRW_NEW_URB_FENCE */
if (brw->urb.csize == 0) {
OUT_BATCH(0);
} else {
/* BRW_NEW_URB_FENCE */
assert(brw->urb.nr_cs_entries);
OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries);
}
ADVANCE_BATCH();
}
static const GLfloat fixed_plane[6][4] = {
{ 0, 0, -1, 1 },
{ 0, 0, 1, 1 },
{ 0, -1, 0, 1 },
{ 0, 1, 0, 1 },
{-1, 0, 0, 1 },
{ 1, 0, 0, 1 }
};
/**
* Gathers together all the uniform values into a block of memory to be
* uploaded into the CURBE, then emits the state packet telling the hardware
* the new location.
*/
static void
brw_upload_constant_buffer(struct brw_context *brw)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
/* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
const GLuint sz = brw->curbe.total_size;
const GLuint bufsz = sz * 16 * sizeof(GLfloat);
gl_constant_value *buf;
GLuint i;
gl_clip_plane *clip_planes;
/* BRW_NEW_FRAGMENT_PROGRAM */
struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
/* BRW_NEW_VERTEX_PROGRAM */
struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
if (sz == 0) {
goto emit;
}
buf = brw_upload_space(&brw->upload, bufsz, 64,
&brw->curbe.curbe_bo, &brw->curbe.curbe_offset);
STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
/* fragment shader constants */
if (brw->curbe.wm_size) {
_mesa_load_state_parameters(ctx, fp->Parameters);
/* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
GLuint offset = brw->curbe.wm_start * 16;
/* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
brw_populate_constant_data(brw, fp, &brw->wm.base, &buf[offset],
brw->wm.base.prog_data->param,
brw->wm.base.prog_data->nr_params);
}
/* clipper constants */
if (brw->curbe.clip_size) {
GLuint offset = brw->curbe.clip_start * 16;
GLbitfield mask;
/* If any planes are going this way, send them all this way:
*/
for (i = 0; i < 6; i++) {
buf[offset + i * 4 + 0].f = fixed_plane[i][0];
buf[offset + i * 4 + 1].f = fixed_plane[i][1];
buf[offset + i * 4 + 2].f = fixed_plane[i][2];
buf[offset + i * 4 + 3].f = fixed_plane[i][3];
}
/* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
* clip-space:
*/
clip_planes = brw_select_clip_planes(ctx);
mask = ctx->Transform.ClipPlanesEnabled;
while (mask) {
const int j = u_bit_scan(&mask);
buf[offset + i * 4 + 0].f = clip_planes[j][0];
buf[offset + i * 4 + 1].f = clip_planes[j][1];
buf[offset + i * 4 + 2].f = clip_planes[j][2];
buf[offset + i * 4 + 3].f = clip_planes[j][3];
i++;
}
}
/* vertex shader constants */
if (brw->curbe.vs_size) {
_mesa_load_state_parameters(ctx, vp->Parameters);
GLuint offset = brw->curbe.vs_start * 16;
/* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
brw_populate_constant_data(brw, vp, &brw->vs.base, &buf[offset],
brw->vs.base.prog_data->param,
brw->vs.base.prog_data->nr_params);
}
if (0) {
for (i = 0; i < sz*16; i+=4)
fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f);
}
/* Because this provokes an action (ie copy the constants into the
* URB), it shouldn't be shortcircuited if identical to the
* previous time - because eg. the urb destination may have
* changed, or the urb contents different to last time.
*
* Note that the data referred to is actually copied internally,
* not just used in place according to passed pointer.
*
* It appears that the CS unit takes care of using each available
* URB entry (Const URB Entry == CURBE) in turn, and issuing
* flushes as necessary when doublebuffering of CURBEs isn't
* possible.
*/
emit:
/* BRW_NEW_URB_FENCE: From the gfx4 PRM, volume 1, section 3.9.8
* (CONSTANT_BUFFER (CURBE Load)):
*
* "Modifying the CS URB allocation via URB_FENCE invalidates any
* previous CURBE entries. Therefore software must subsequently
* [re]issue a CONSTANT_BUFFER command before CURBE data can be used
* in the pipeline."
*/
BEGIN_BATCH(2);
if (brw->curbe.total_size == 0) {
OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
OUT_BATCH(0);
} else {
OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
OUT_RELOC(brw->curbe.curbe_bo, 0,
(brw->curbe.total_size - 1) + brw->curbe.curbe_offset);
}
ADVANCE_BATCH();
/* Work around a Broadwater/Crestline depth interpolator bug. The
* following sequence will cause GPU hangs:
*
* 1. Change state so that all depth related fields in CC_STATE are
* disabled, and in WM_STATE, only "PS Use Source Depth" is enabled.
* 2. Emit a CONSTANT_BUFFER packet.
* 3. Draw via 3DPRIMITIVE.
*
* The recommended workaround is to emit a non-pipelined state change after
* emitting CONSTANT_BUFFER, in order to drain the windowizer pipeline.
*
* We arbitrarily choose 3DSTATE_GLOBAL_DEPTH_CLAMP_OFFSET (as it's small),
* and always emit it when "PS Use Source Depth" is set. We could be more
* precise, but the additional complexity is probably not worth it.
*
* BRW_NEW_FRAGMENT_PROGRAM
*/
if (devinfo->verx10 == 40 &&
BITSET_TEST(fp->info.system_values_read, SYSTEM_VALUE_FRAG_COORD)) {
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
OUT_BATCH(0);
ADVANCE_BATCH();
}
}
const struct brw_tracked_state brw_constant_buffer = {
.dirty = {
.mesa = _NEW_PROGRAM_CONSTANTS,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_PUSH_CONSTANT_ALLOCATION |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA |
BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
BRW_NEW_URB_FENCE |
BRW_NEW_VS_PROG_DATA,
},
.emit = brw_upload_constant_buffer,
};

File diff suppressed because it is too large Load diff

View file

@ -1,417 +0,0 @@
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "compiler/glsl/ir_uniform.h"
#include "compiler/glsl/shader_cache.h"
#include "main/mtypes.h"
#include "util/blob.h"
#include "util/build_id.h"
#include "util/debug.h"
#include "util/disk_cache.h"
#include "util/macros.h"
#include "util/mesa-sha1.h"
#include "compiler/brw_eu.h"
#include "dev/intel_debug.h"
#include "brw_context.h"
#include "brw_program.h"
#include "brw_cs.h"
#include "brw_gs.h"
#include "brw_state.h"
#include "brw_vs.h"
#include "brw_wm.h"
static bool
debug_enabled_for_stage(gl_shader_stage stage)
{
static const uint64_t stage_debug_flags[] = {
DEBUG_VS, DEBUG_TCS, DEBUG_TES, DEBUG_GS, DEBUG_WM, DEBUG_CS,
};
assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_debug_flags));
return INTEL_DEBUG(stage_debug_flags[stage]);
}
static void
intel_shader_sha1(struct gl_program *prog, gl_shader_stage stage,
void *key, unsigned char *out_sha1)
{
char sha1_buf[41];
unsigned char sha1[20];
char manifest[256];
int offset = 0;
_mesa_sha1_format(sha1_buf, prog->sh.data->sha1);
offset += snprintf(manifest, sizeof(manifest), "program: %s\n", sha1_buf);
_mesa_sha1_compute(key, brw_prog_key_size(stage), sha1);
_mesa_sha1_format(sha1_buf, sha1);
offset += snprintf(manifest + offset, sizeof(manifest) - offset,
"%s_key: %s\n", _mesa_shader_stage_to_abbrev(stage),
sha1_buf);
_mesa_sha1_compute(manifest, strlen(manifest), out_sha1);
}
static bool
read_blob_program_data(struct blob_reader *binary, struct gl_program *prog,
gl_shader_stage stage, const uint8_t **program,
struct brw_stage_prog_data *prog_data)
{
return
brw_read_blob_program_data(binary, prog, stage, program, prog_data) &&
(binary->current == binary->end);
}
static bool
read_and_upload(struct brw_context *brw, struct disk_cache *cache,
struct gl_program *prog, gl_shader_stage stage)
{
unsigned char binary_sha1[20];
union brw_any_prog_key prog_key;
switch (stage) {
case MESA_SHADER_VERTEX:
brw_vs_populate_key(brw, &prog_key.vs);
break;
case MESA_SHADER_TESS_CTRL:
brw_tcs_populate_key(brw, &prog_key.tcs);
break;
case MESA_SHADER_TESS_EVAL:
brw_tes_populate_key(brw, &prog_key.tes);
break;
case MESA_SHADER_GEOMETRY:
brw_gs_populate_key(brw, &prog_key.gs);
break;
case MESA_SHADER_FRAGMENT:
brw_wm_populate_key(brw, &prog_key.wm);
break;
case MESA_SHADER_COMPUTE:
brw_cs_populate_key(brw, &prog_key.cs);
break;
default:
unreachable("Unsupported stage!");
}
/* We don't care what instance of the program it is for the disk cache hash
* lookup, so set the id to 0 for the sha1 hashing. program_string_id will
* be set below.
*/
prog_key.base.program_string_id = 0;
intel_shader_sha1(prog, stage, &prog_key, binary_sha1);
size_t buffer_size;
uint8_t *buffer = disk_cache_get(cache, binary_sha1, &buffer_size);
if (buffer == NULL) {
if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
char sha1_buf[41];
_mesa_sha1_format(sha1_buf, binary_sha1);
fprintf(stderr, "No cached %s binary found for: %s\n",
_mesa_shader_stage_to_abbrev(stage), sha1_buf);
}
return false;
}
if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
char sha1_buf[41];
_mesa_sha1_format(sha1_buf, binary_sha1);
fprintf(stderr, "attempting to populate bo cache with binary: %s\n",
sha1_buf);
}
struct blob_reader binary;
blob_reader_init(&binary, buffer, buffer_size);
const uint8_t *program;
struct brw_stage_prog_data *prog_data =
ralloc_size(NULL, sizeof(union brw_any_prog_data));
if (!read_blob_program_data(&binary, prog, stage, &program, prog_data)) {
/* Something very bad has gone wrong discard the item from the cache and
* rebuild from source.
*/
if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
fprintf(stderr, "Error reading program from cache (invalid i965 "
"cache item)\n");
}
disk_cache_remove(cache, binary_sha1);
ralloc_free(prog_data);
free(buffer);
return false;
}
enum brw_cache_id cache_id;
struct brw_stage_state *stage_state;
switch (stage) {
case MESA_SHADER_VERTEX:
cache_id = BRW_CACHE_VS_PROG;
stage_state = &brw->vs.base;
break;
case MESA_SHADER_TESS_CTRL:
cache_id = BRW_CACHE_TCS_PROG;
stage_state = &brw->tcs.base;
break;
case MESA_SHADER_TESS_EVAL:
cache_id = BRW_CACHE_TES_PROG;
stage_state = &brw->tes.base;
break;
case MESA_SHADER_GEOMETRY:
cache_id = BRW_CACHE_GS_PROG;
stage_state = &brw->gs.base;
break;
case MESA_SHADER_FRAGMENT:
cache_id = BRW_CACHE_FS_PROG;
stage_state = &brw->wm.base;
break;
case MESA_SHADER_COMPUTE:
cache_id = BRW_CACHE_CS_PROG;
stage_state = &brw->cs.base;
break;
default:
unreachable("Unsupported stage!");
}
prog_key.base.program_string_id = brw_program(prog)->id;
brw_alloc_stage_scratch(brw, stage_state, prog_data->total_scratch);
if (unlikely(debug_enabled_for_stage(stage))) {
fprintf(stderr, "NIR for %s program %d loaded from disk shader cache:\n",
_mesa_shader_stage_to_abbrev(stage), brw_program(prog)->id);
brw_program_deserialize_driver_blob(&brw->ctx, prog, stage);
nir_shader *nir = prog->nir;
nir_print_shader(nir, stderr);
fprintf(stderr, "Native code for %s %s shader %s from disk cache:\n",
nir->info.label ? nir->info.label : "unnamed",
_mesa_shader_stage_to_string(nir->info.stage), nir->info.name);
brw_disassemble_with_labels(&brw->screen->devinfo, program, 0,
prog_data->program_size, stderr);
}
brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage),
program, prog_data->program_size, prog_data,
brw_prog_data_size(stage), &stage_state->prog_offset,
&stage_state->prog_data);
prog->program_written_to_cache = true;
ralloc_free(prog_data);
free(buffer);
return true;
}
bool
brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage stage)
{
struct disk_cache *cache = brw->ctx.Cache;
if (cache == NULL)
return false;
struct gl_program *prog = brw->ctx._Shader->CurrentProgram[stage];
if (prog == NULL)
return false;
if (prog->sh.data->spirv)
return false;
if (brw->ctx._Shader->Flags & GLSL_CACHE_FALLBACK)
goto fail;
if (!read_and_upload(brw, cache, prog, stage))
goto fail;
if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
fprintf(stderr, "read gen program from cache\n");
}
return true;
fail:
prog->program_written_to_cache = false;
if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
fprintf(stderr, "falling back to nir %s.\n",
_mesa_shader_stage_to_abbrev(prog->info.stage));
}
brw_program_deserialize_driver_blob(&brw->ctx, prog, stage);
return false;
}
static void
write_program_data(struct brw_context *brw, struct gl_program *prog,
void *key, struct brw_stage_prog_data *prog_data,
uint32_t prog_offset, struct disk_cache *cache,
gl_shader_stage stage)
{
struct blob binary;
blob_init(&binary);
const void *program_map = brw->cache.map + prog_offset;
/* TODO: Improve perf for non-LLC. It would be best to save it at program
* generation time when the program is in normal memory accessible with
* cache to the CPU. Another easier change would be to use
* _mesa_streaming_load_memcpy to read from the program mapped memory. */
brw_write_blob_program_data(&binary, stage, program_map, prog_data);
unsigned char sha1[20];
char buf[41];
intel_shader_sha1(prog, stage, key, sha1);
_mesa_sha1_format(buf, sha1);
if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
fprintf(stderr, "putting binary in cache: %s\n", buf);
}
disk_cache_put(cache, sha1, binary.data, binary.size, NULL);
prog->program_written_to_cache = true;
blob_finish(&binary);
}
void
brw_disk_cache_write_render_programs(struct brw_context *brw)
{
struct disk_cache *cache = brw->ctx.Cache;
if (cache == NULL)
return;
struct gl_program *prog;
gl_shader_stage stage;
for (stage = MESA_SHADER_VERTEX; stage <= MESA_SHADER_FRAGMENT; stage++) {
prog = brw->ctx._Shader->CurrentProgram[stage];
if (prog && prog->sh.data->spirv)
return;
}
prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_VERTEX];
if (prog && !prog->program_written_to_cache) {
struct brw_vs_prog_key vs_key;
brw_vs_populate_key(brw, &vs_key);
vs_key.base.program_string_id = 0;
write_program_data(brw, prog, &vs_key, brw->vs.base.prog_data,
brw->vs.base.prog_offset, cache,
MESA_SHADER_VERTEX);
}
prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
if (prog && !prog->program_written_to_cache) {
struct brw_tcs_prog_key tcs_key;
brw_tcs_populate_key(brw, &tcs_key);
tcs_key.base.program_string_id = 0;
write_program_data(brw, prog, &tcs_key, brw->tcs.base.prog_data,
brw->tcs.base.prog_offset, cache,
MESA_SHADER_TESS_CTRL);
}
prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
if (prog && !prog->program_written_to_cache) {
struct brw_tes_prog_key tes_key;
brw_tes_populate_key(brw, &tes_key);
tes_key.base.program_string_id = 0;
write_program_data(brw, prog, &tes_key, brw->tes.base.prog_data,
brw->tes.base.prog_offset, cache,
MESA_SHADER_TESS_EVAL);
}
prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
if (prog && !prog->program_written_to_cache) {
struct brw_gs_prog_key gs_key;
brw_gs_populate_key(brw, &gs_key);
gs_key.base.program_string_id = 0;
write_program_data(brw, prog, &gs_key, brw->gs.base.prog_data,
brw->gs.base.prog_offset, cache,
MESA_SHADER_GEOMETRY);
}
prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
if (prog && !prog->program_written_to_cache) {
struct brw_wm_prog_key wm_key;
brw_wm_populate_key(brw, &wm_key);
wm_key.base.program_string_id = 0;
write_program_data(brw, prog, &wm_key, brw->wm.base.prog_data,
brw->wm.base.prog_offset, cache,
MESA_SHADER_FRAGMENT);
}
}
void
brw_disk_cache_write_compute_program(struct brw_context *brw)
{
struct disk_cache *cache = brw->ctx.Cache;
if (cache == NULL)
return;
struct gl_program *prog =
brw->ctx._Shader->CurrentProgram[MESA_SHADER_COMPUTE];
if (prog && prog->sh.data->spirv)
return;
if (prog && !prog->program_written_to_cache) {
struct brw_cs_prog_key cs_key;
brw_cs_populate_key(brw, &cs_key);
cs_key.base.program_string_id = 0;
write_program_data(brw, prog, &cs_key, brw->cs.base.prog_data,
brw->cs.base.prog_offset, cache,
MESA_SHADER_COMPUTE);
}
}
void
brw_disk_cache_init(struct brw_screen *screen)
{
#ifdef ENABLE_SHADER_CACHE
if (INTEL_DEBUG(DEBUG_DISK_CACHE_DISABLE_MASK))
return;
/* array length: print length + null char + 1 extra to verify it is unused */
char renderer[11];
ASSERTED int len = snprintf(renderer, sizeof(renderer), "i965_%04x",
screen->deviceID);
assert(len == sizeof(renderer) - 2);
const struct build_id_note *note =
build_id_find_nhdr_for_addr(brw_disk_cache_init);
assert(note && build_id_length(note) == 20 /* sha1 */);
const uint8_t *id_sha1 = build_id_data(note);
assert(id_sha1);
char timestamp[41];
_mesa_sha1_format(timestamp, id_sha1);
const uint64_t driver_flags =
brw_get_compiler_config_value(screen->compiler);
screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
#endif
}

File diff suppressed because it is too large Load diff

View file

@ -1,87 +0,0 @@
/*
* Copyright 2005 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef BRW_DRAW_H
#define BRW_DRAW_H
#include "main/mtypes.h"
#include "brw_bufmgr.h"
struct brw_context;
uint32_t *
brw_emit_vertex_buffer_state(struct brw_context *brw,
unsigned buffer_nr,
struct brw_bo *bo,
unsigned start_offset,
unsigned end_offset,
unsigned stride,
unsigned step_rate,
uint32_t *__map);
#define EMIT_VERTEX_BUFFER_STATE(...) __map = \
brw_emit_vertex_buffer_state(__VA_ARGS__, __map)
void brw_draw_prims(struct gl_context *ctx,
const struct _mesa_prim *prims,
unsigned nr_prims,
const struct _mesa_index_buffer *ib,
bool index_bounds_valid,
bool primitive_restart,
unsigned restart_index,
unsigned min_index,
unsigned max_index,
unsigned num_instances,
unsigned base_instance);
void brw_init_draw_functions(struct dd_function_table *functions);
void brw_draw_init( struct brw_context *brw );
void brw_draw_destroy( struct brw_context *brw );
void brw_prepare_shader_draw_parameters(struct brw_context *);
/* brw_primitive_restart.c */
GLboolean
brw_handle_primitive_restart(struct gl_context *ctx,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLuint num_instances, GLuint base_instance,
bool primitive_restart,
unsigned restart_index);
void
brw_draw_indirect_prims(struct gl_context *ctx,
GLuint mode,
struct gl_buffer_object *indirect_data,
GLsizeiptr indirect_offset,
unsigned draw_count,
unsigned stride,
struct gl_buffer_object *indirect_params,
GLsizeiptr indirect_params_offset,
const struct _mesa_index_buffer *ib,
bool primitive_restart,
unsigned restart_index);
#endif

View file

@ -1,801 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "main/arrayobj.h"
#include "main/bufferobj.h"
#include "main/context.h"
#include "main/enums.h"
#include "main/macros.h"
#include "main/glformats.h"
#include "nir.h"
#include "brw_draw.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_batch.h"
#include "brw_buffer_objects.h"
static const GLuint double_types_float[5] = {
0,
ISL_FORMAT_R64_FLOAT,
ISL_FORMAT_R64G64_FLOAT,
ISL_FORMAT_R64G64B64_FLOAT,
ISL_FORMAT_R64G64B64A64_FLOAT
};
static const GLuint double_types_passthru[5] = {
0,
ISL_FORMAT_R64_PASSTHRU,
ISL_FORMAT_R64G64_PASSTHRU,
ISL_FORMAT_R64G64B64_PASSTHRU,
ISL_FORMAT_R64G64B64A64_PASSTHRU
};
static const GLuint float_types[5] = {
0,
ISL_FORMAT_R32_FLOAT,
ISL_FORMAT_R32G32_FLOAT,
ISL_FORMAT_R32G32B32_FLOAT,
ISL_FORMAT_R32G32B32A32_FLOAT
};
static const GLuint half_float_types[5] = {
0,
ISL_FORMAT_R16_FLOAT,
ISL_FORMAT_R16G16_FLOAT,
ISL_FORMAT_R16G16B16_FLOAT,
ISL_FORMAT_R16G16B16A16_FLOAT
};
static const GLuint fixed_point_types[5] = {
0,
ISL_FORMAT_R32_SFIXED,
ISL_FORMAT_R32G32_SFIXED,
ISL_FORMAT_R32G32B32_SFIXED,
ISL_FORMAT_R32G32B32A32_SFIXED,
};
static const GLuint uint_types_direct[5] = {
0,
ISL_FORMAT_R32_UINT,
ISL_FORMAT_R32G32_UINT,
ISL_FORMAT_R32G32B32_UINT,
ISL_FORMAT_R32G32B32A32_UINT
};
static const GLuint uint_types_norm[5] = {
0,
ISL_FORMAT_R32_UNORM,
ISL_FORMAT_R32G32_UNORM,
ISL_FORMAT_R32G32B32_UNORM,
ISL_FORMAT_R32G32B32A32_UNORM
};
static const GLuint uint_types_scale[5] = {
0,
ISL_FORMAT_R32_USCALED,
ISL_FORMAT_R32G32_USCALED,
ISL_FORMAT_R32G32B32_USCALED,
ISL_FORMAT_R32G32B32A32_USCALED
};
static const GLuint int_types_direct[5] = {
0,
ISL_FORMAT_R32_SINT,
ISL_FORMAT_R32G32_SINT,
ISL_FORMAT_R32G32B32_SINT,
ISL_FORMAT_R32G32B32A32_SINT
};
static const GLuint int_types_norm[5] = {
0,
ISL_FORMAT_R32_SNORM,
ISL_FORMAT_R32G32_SNORM,
ISL_FORMAT_R32G32B32_SNORM,
ISL_FORMAT_R32G32B32A32_SNORM
};
static const GLuint int_types_scale[5] = {
0,
ISL_FORMAT_R32_SSCALED,
ISL_FORMAT_R32G32_SSCALED,
ISL_FORMAT_R32G32B32_SSCALED,
ISL_FORMAT_R32G32B32A32_SSCALED
};
static const GLuint ushort_types_direct[5] = {
0,
ISL_FORMAT_R16_UINT,
ISL_FORMAT_R16G16_UINT,
ISL_FORMAT_R16G16B16_UINT,
ISL_FORMAT_R16G16B16A16_UINT
};
static const GLuint ushort_types_norm[5] = {
0,
ISL_FORMAT_R16_UNORM,
ISL_FORMAT_R16G16_UNORM,
ISL_FORMAT_R16G16B16_UNORM,
ISL_FORMAT_R16G16B16A16_UNORM
};
static const GLuint ushort_types_scale[5] = {
0,
ISL_FORMAT_R16_USCALED,
ISL_FORMAT_R16G16_USCALED,
ISL_FORMAT_R16G16B16_USCALED,
ISL_FORMAT_R16G16B16A16_USCALED
};
static const GLuint short_types_direct[5] = {
0,
ISL_FORMAT_R16_SINT,
ISL_FORMAT_R16G16_SINT,
ISL_FORMAT_R16G16B16_SINT,
ISL_FORMAT_R16G16B16A16_SINT
};
static const GLuint short_types_norm[5] = {
0,
ISL_FORMAT_R16_SNORM,
ISL_FORMAT_R16G16_SNORM,
ISL_FORMAT_R16G16B16_SNORM,
ISL_FORMAT_R16G16B16A16_SNORM
};
static const GLuint short_types_scale[5] = {
0,
ISL_FORMAT_R16_SSCALED,
ISL_FORMAT_R16G16_SSCALED,
ISL_FORMAT_R16G16B16_SSCALED,
ISL_FORMAT_R16G16B16A16_SSCALED
};
static const GLuint ubyte_types_direct[5] = {
0,
ISL_FORMAT_R8_UINT,
ISL_FORMAT_R8G8_UINT,
ISL_FORMAT_R8G8B8_UINT,
ISL_FORMAT_R8G8B8A8_UINT
};
static const GLuint ubyte_types_norm[5] = {
0,
ISL_FORMAT_R8_UNORM,
ISL_FORMAT_R8G8_UNORM,
ISL_FORMAT_R8G8B8_UNORM,
ISL_FORMAT_R8G8B8A8_UNORM
};
static const GLuint ubyte_types_scale[5] = {
0,
ISL_FORMAT_R8_USCALED,
ISL_FORMAT_R8G8_USCALED,
ISL_FORMAT_R8G8B8_USCALED,
ISL_FORMAT_R8G8B8A8_USCALED
};
static const GLuint byte_types_direct[5] = {
0,
ISL_FORMAT_R8_SINT,
ISL_FORMAT_R8G8_SINT,
ISL_FORMAT_R8G8B8_SINT,
ISL_FORMAT_R8G8B8A8_SINT
};
static const GLuint byte_types_norm[5] = {
0,
ISL_FORMAT_R8_SNORM,
ISL_FORMAT_R8G8_SNORM,
ISL_FORMAT_R8G8B8_SNORM,
ISL_FORMAT_R8G8B8A8_SNORM
};
static const GLuint byte_types_scale[5] = {
0,
ISL_FORMAT_R8_SSCALED,
ISL_FORMAT_R8G8_SSCALED,
ISL_FORMAT_R8G8B8_SSCALED,
ISL_FORMAT_R8G8B8A8_SSCALED
};
static GLuint
double_types(int size, GLboolean doubles)
{
/* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
* "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
* 64-bit components are stored in the URB without any conversion."
* Also included on BDW PRM, Volume 7, page 470, table "Source Element
* Formats Supported in VF Unit"
*
* Previous PRMs don't include those references, so for gfx7 we can't use
* PASSTHRU formats directly. But in any case, we prefer to return passthru
* even in that case, because that reflects what we want to achieve, even
* if we would need to workaround on gen < 8.
*/
return (doubles
? double_types_passthru[size]
: double_types_float[size]);
}
/**
* Given vertex array type/size/format/normalized info, return
* the appopriate hardware surface type.
* Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
*/
unsigned
brw_get_vertex_surface_type(struct brw_context *brw,
const struct gl_vertex_format *glformat)
{
int size = glformat->Size;
const struct intel_device_info *devinfo = &brw->screen->devinfo;
const bool is_ivybridge_or_older =
devinfo->verx10 < 70 || devinfo->platform == INTEL_PLATFORM_IVB;
if (INTEL_DEBUG(DEBUG_VERTS))
fprintf(stderr, "type %s size %d normalized %d\n",
_mesa_enum_to_string(glformat->Type),
glformat->Size, glformat->Normalized);
if (glformat->Integer) {
assert(glformat->Format == GL_RGBA); /* sanity check */
switch (glformat->Type) {
case GL_INT: return int_types_direct[size];
case GL_SHORT:
if (is_ivybridge_or_older && size == 3)
return short_types_direct[4];
else
return short_types_direct[size];
case GL_BYTE:
if (is_ivybridge_or_older && size == 3)
return byte_types_direct[4];
else
return byte_types_direct[size];
case GL_UNSIGNED_INT: return uint_types_direct[size];
case GL_UNSIGNED_SHORT:
if (is_ivybridge_or_older && size == 3)
return ushort_types_direct[4];
else
return ushort_types_direct[size];
case GL_UNSIGNED_BYTE:
if (is_ivybridge_or_older && size == 3)
return ubyte_types_direct[4];
else
return ubyte_types_direct[size];
default: unreachable("not reached");
}
} else if (glformat->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) {
return ISL_FORMAT_R11G11B10_FLOAT;
} else if (glformat->Normalized) {
switch (glformat->Type) {
case GL_DOUBLE: return double_types(size, glformat->Doubles);
case GL_FLOAT: return float_types[size];
case GL_HALF_FLOAT:
case GL_HALF_FLOAT_OES:
if (devinfo->ver < 6 && size == 3)
return half_float_types[4];
else
return half_float_types[size];
case GL_INT: return int_types_norm[size];
case GL_SHORT: return short_types_norm[size];
case GL_BYTE: return byte_types_norm[size];
case GL_UNSIGNED_INT: return uint_types_norm[size];
case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
case GL_UNSIGNED_BYTE:
if (glformat->Format == GL_BGRA) {
/* See GL_EXT_vertex_array_bgra */
assert(size == 4);
return ISL_FORMAT_B8G8R8A8_UNORM;
}
else {
return ubyte_types_norm[size];
}
case GL_FIXED:
if (devinfo->verx10 >= 75)
return fixed_point_types[size];
/* This produces GL_FIXED inputs as values between INT32_MIN and
* INT32_MAX, which will be scaled down by 1/65536 by the VS.
*/
return int_types_scale[size];
/* See GL_ARB_vertex_type_2_10_10_10_rev.
* W/A: Pre-Haswell, the hardware doesn't really support the formats we'd
* like to use here, so upload everything as UINT and fix
* it in the shader
*/
case GL_INT_2_10_10_10_REV:
assert(size == 4);
if (devinfo->verx10 >= 75) {
return glformat->Format == GL_BGRA
? ISL_FORMAT_B10G10R10A2_SNORM
: ISL_FORMAT_R10G10B10A2_SNORM;
}
return ISL_FORMAT_R10G10B10A2_UINT;
case GL_UNSIGNED_INT_2_10_10_10_REV:
assert(size == 4);
if (devinfo->verx10 >= 75) {
return glformat->Format == GL_BGRA
? ISL_FORMAT_B10G10R10A2_UNORM
: ISL_FORMAT_R10G10B10A2_UNORM;
}
return ISL_FORMAT_R10G10B10A2_UINT;
default: unreachable("not reached");
}
}
else {
/* See GL_ARB_vertex_type_2_10_10_10_rev.
* W/A: the hardware doesn't really support the formats we'd
* like to use here, so upload everything as UINT and fix
* it in the shader
*/
if (glformat->Type == GL_INT_2_10_10_10_REV) {
assert(size == 4);
if (devinfo->verx10 >= 75) {
return glformat->Format == GL_BGRA
? ISL_FORMAT_B10G10R10A2_SSCALED
: ISL_FORMAT_R10G10B10A2_SSCALED;
}
return ISL_FORMAT_R10G10B10A2_UINT;
} else if (glformat->Type == GL_UNSIGNED_INT_2_10_10_10_REV) {
assert(size == 4);
if (devinfo->verx10 >= 75) {
return glformat->Format == GL_BGRA
? ISL_FORMAT_B10G10R10A2_USCALED
: ISL_FORMAT_R10G10B10A2_USCALED;
}
return ISL_FORMAT_R10G10B10A2_UINT;
}
assert(glformat->Format == GL_RGBA); /* sanity check */
switch (glformat->Type) {
case GL_DOUBLE: return double_types(size, glformat->Doubles);
case GL_FLOAT: return float_types[size];
case GL_HALF_FLOAT:
case GL_HALF_FLOAT_OES:
if (devinfo->ver < 6 && size == 3)
return half_float_types[4];
else
return half_float_types[size];
case GL_INT: return int_types_scale[size];
case GL_SHORT: return short_types_scale[size];
case GL_BYTE: return byte_types_scale[size];
case GL_UNSIGNED_INT: return uint_types_scale[size];
case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
case GL_FIXED:
if (devinfo->verx10 >= 75)
return fixed_point_types[size];
/* This produces GL_FIXED inputs as values between INT32_MIN and
* INT32_MAX, which will be scaled down by 1/65536 by the VS.
*/
return int_types_scale[size];
default: unreachable("not reached");
}
}
}
static void
copy_array_to_vbo_array(struct brw_context *brw,
const uint8_t *const ptr, const int src_stride,
int min, int max,
struct brw_vertex_buffer *buffer,
GLuint dst_stride)
{
const unsigned char *src = ptr + min * src_stride;
int count = max - min + 1;
GLuint size = count * dst_stride;
uint8_t *dst = brw_upload_space(&brw->upload, size, dst_stride,
&buffer->bo, &buffer->offset);
/* The GL 4.5 spec says:
* "If any enabled arrays buffer binding is zero when DrawArrays or
* one of the other drawing commands defined in section 10.4 is called,
* the result is undefined."
*
* In this case, let's the dst with undefined values
*/
if (ptr != NULL) {
if (dst_stride == src_stride) {
memcpy(dst, src, size);
} else {
while (count--) {
memcpy(dst, src, dst_stride);
src += src_stride;
dst += dst_stride;
}
}
}
buffer->stride = dst_stride;
buffer->size = size;
}
void
brw_prepare_vertices(struct brw_context *brw)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
/* BRW_NEW_VERTEX_PROGRAM */
const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
/* BRW_NEW_VS_PROG_DATA */
const struct brw_vs_prog_data *vs_prog_data =
brw_vs_prog_data(brw->vs.base.prog_data);
const uint64_t vs_inputs64 =
nir_get_single_slot_attribs_mask(vs_prog_data->inputs_read,
vp->DualSlotInputs);
assert((vs_inputs64 & ~(uint64_t)VERT_BIT_ALL) == 0);
unsigned vs_inputs = (unsigned)vs_inputs64;
unsigned int min_index = brw->vb.min_index + brw->basevertex;
unsigned int max_index = brw->vb.max_index + brw->basevertex;
int delta, j;
/* _NEW_POLYGON
*
* On gfx6+, edge flags don't end up in the VUE (either in or out of the
* VS). Instead, they're uploaded as the last vertex element, and the data
* is passed sideband through the fixed function units. So, we need to
* prepare the vertex buffer for it, but it's not present in inputs_read.
*/
if (devinfo->ver >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
ctx->Polygon.BackMode != GL_FILL)) {
vs_inputs |= VERT_BIT_EDGEFLAG;
}
if (0)
fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index);
/* Accumulate the list of enabled arrays. */
brw->vb.nr_enabled = 0;
unsigned mask = vs_inputs;
while (mask) {
const gl_vert_attrib attr = u_bit_scan(&mask);
struct brw_vertex_element *input = &brw->vb.inputs[attr];
brw->vb.enabled[brw->vb.nr_enabled++] = input;
}
assert(brw->vb.nr_enabled <= VERT_ATTRIB_MAX);
if (brw->vb.nr_enabled == 0)
return;
if (brw->vb.nr_buffers)
return;
j = 0;
const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
unsigned vbomask = vs_inputs & _mesa_draw_vbo_array_bits(ctx);
while (vbomask) {
const struct gl_vertex_buffer_binding *const glbinding =
_mesa_draw_buffer_binding(vao, ffs(vbomask) - 1);
const GLsizei stride = glbinding->Stride;
assert(glbinding->BufferObj);
/* Accumulate the range of a single vertex, start with inverted range */
uint32_t vertex_range_start = ~(uint32_t)0;
uint32_t vertex_range_end = 0;
const unsigned boundmask = _mesa_draw_bound_attrib_bits(glbinding);
unsigned attrmask = vbomask & boundmask;
/* Mark the those attributes as processed */
vbomask ^= attrmask;
/* We can assume that we have an array for the binding */
assert(attrmask);
/* Walk attributes belonging to the binding */
while (attrmask) {
const gl_vert_attrib attr = u_bit_scan(&attrmask);
const struct gl_array_attributes *const glattrib =
_mesa_draw_array_attrib(vao, attr);
const uint32_t rel_offset =
_mesa_draw_attributes_relative_offset(glattrib);
const uint32_t rel_end = rel_offset + glattrib->Format._ElementSize;
vertex_range_start = MIN2(vertex_range_start, rel_offset);
vertex_range_end = MAX2(vertex_range_end, rel_end);
struct brw_vertex_element *input = &brw->vb.inputs[attr];
input->glformat = &glattrib->Format;
input->buffer = j;
input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
input->offset = rel_offset;
}
assert(vertex_range_start <= vertex_range_end);
struct brw_buffer_object *intel_buffer =
brw_buffer_object(glbinding->BufferObj);
struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
const uint32_t offset = _mesa_draw_binding_offset(glbinding);
/* If nothing else is known take the buffer size and offset as a bound */
uint32_t start = vertex_range_start;
uint32_t range = intel_buffer->Base.Size - offset - vertex_range_start;
/* Check if we can get a more narrow range */
if (glbinding->InstanceDivisor) {
if (brw->num_instances) {
const uint32_t vertex_size = vertex_range_end - vertex_range_start;
start = vertex_range_start + stride * brw->baseinstance;
range = (stride * ((brw->num_instances - 1) /
glbinding->InstanceDivisor) +
vertex_size);
}
} else {
if (brw->vb.index_bounds_valid) {
const uint32_t vertex_size = vertex_range_end - vertex_range_start;
start = vertex_range_start + stride * min_index;
range = (stride * (max_index - min_index) +
vertex_size);
/**
* Unreal Engine 4 has a bug in usage of glDrawRangeElements,
* causing it to be called with a number of vertices in place
* of "end" parameter (which specifies the maximum array index
* contained in indices).
*
* Since there is unknown amount of games affected and we
* could not identify that a game is built with UE4 - we are
* forced to make a blanket workaround, disregarding max_index
* in range calculations. Fortunately all such calls look like:
* glDrawRangeElements(GL_TRIANGLES, 0, 3, 3, ...);
* So we are able to narrow down this workaround.
*
* See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2917
*/
if (unlikely(max_index == 3 && min_index == 0 &&
brw->draw.derived_params.is_indexed_draw)) {
range = intel_buffer->Base.Size - offset - start;
}
}
}
buffer->offset = offset;
buffer->size = start + range;
buffer->stride = stride;
buffer->step_rate = glbinding->InstanceDivisor;
buffer->bo = brw_bufferobj_buffer(brw, intel_buffer, offset + start,
range, false);
brw_bo_reference(buffer->bo);
j++;
}
/* If we need to upload all the arrays, then we can trim those arrays to
* only the used elements [min_index, max_index] so long as we adjust all
* the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
*/
brw->vb.start_vertex_bias = 0;
delta = min_index;
if ((vs_inputs & _mesa_draw_vbo_array_bits(ctx)) == 0) {
brw->vb.start_vertex_bias = -delta;
delta = 0;
}
unsigned usermask = vs_inputs & _mesa_draw_user_array_bits(ctx);
while (usermask) {
const struct gl_vertex_buffer_binding *const glbinding =
_mesa_draw_buffer_binding(vao, ffs(usermask) - 1);
const GLsizei stride = glbinding->Stride;
assert(!glbinding->BufferObj);
assert(brw->vb.index_bounds_valid);
/* Accumulate the range of a single vertex, start with inverted range */
uint32_t vertex_range_start = ~(uint32_t)0;
uint32_t vertex_range_end = 0;
const unsigned boundmask = _mesa_draw_bound_attrib_bits(glbinding);
unsigned attrmask = usermask & boundmask;
/* Mark the those attributes as processed */
usermask ^= attrmask;
/* We can assume that we have an array for the binding */
assert(attrmask);
/* Walk attributes belonging to the binding */
while (attrmask) {
const gl_vert_attrib attr = u_bit_scan(&attrmask);
const struct gl_array_attributes *const glattrib =
_mesa_draw_array_attrib(vao, attr);
const uint32_t rel_offset =
_mesa_draw_attributes_relative_offset(glattrib);
const uint32_t rel_end = rel_offset + glattrib->Format._ElementSize;
vertex_range_start = MIN2(vertex_range_start, rel_offset);
vertex_range_end = MAX2(vertex_range_end, rel_end);
struct brw_vertex_element *input = &brw->vb.inputs[attr];
input->glformat = &glattrib->Format;
input->buffer = j;
input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
input->offset = rel_offset;
}
assert(vertex_range_start <= vertex_range_end);
struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
const uint8_t *ptr = (const uint8_t*)_mesa_draw_binding_offset(glbinding);
ptr += vertex_range_start;
const uint32_t vertex_size = vertex_range_end - vertex_range_start;
if (glbinding->Stride == 0) {
/* If the source stride is zero, we just want to upload the current
* attribute once and set the buffer's stride to 0. There's no need
* to replicate it out.
*/
copy_array_to_vbo_array(brw, ptr, 0, 0, 0, buffer, vertex_size);
} else if (glbinding->InstanceDivisor == 0) {
copy_array_to_vbo_array(brw, ptr, stride, min_index,
max_index, buffer, vertex_size);
} else {
/* This is an instanced attribute, since its InstanceDivisor
* is not zero. Therefore, its data will be stepped after the
* instanced draw has been run InstanceDivisor times.
*/
uint32_t instanced_attr_max_index =
(brw->num_instances - 1) / glbinding->InstanceDivisor;
copy_array_to_vbo_array(brw, ptr, stride, 0,
instanced_attr_max_index, buffer, vertex_size);
}
buffer->offset -= delta * buffer->stride + vertex_range_start;
buffer->size += delta * buffer->stride + vertex_range_start;
buffer->step_rate = glbinding->InstanceDivisor;
j++;
}
/* Upload the current values */
unsigned curmask = vs_inputs & _mesa_draw_current_bits(ctx);
if (curmask) {
/* For each attribute, upload the maximum possible size. */
uint8_t data[VERT_ATTRIB_MAX * sizeof(GLdouble) * 4];
uint8_t *cursor = data;
do {
const gl_vert_attrib attr = u_bit_scan(&curmask);
const struct gl_array_attributes *const glattrib =
_mesa_draw_current_attrib(ctx, attr);
const unsigned size = glattrib->Format._ElementSize;
const unsigned alignment = align(size, sizeof(GLdouble));
memcpy(cursor, glattrib->Ptr, size);
if (alignment != size)
memset(cursor + size, 0, alignment - size);
struct brw_vertex_element *input = &brw->vb.inputs[attr];
input->glformat = &glattrib->Format;
input->buffer = j;
input->is_dual_slot = (vp->DualSlotInputs & BITFIELD64_BIT(attr)) != 0;
input->offset = cursor - data;
cursor += alignment;
} while (curmask);
struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
const unsigned size = cursor - data;
brw_upload_data(&brw->upload, data, size, size,
&buffer->bo, &buffer->offset);
buffer->stride = 0;
buffer->size = size;
buffer->step_rate = 0;
j++;
}
brw->vb.nr_buffers = j;
}
void
brw_prepare_shader_draw_parameters(struct brw_context *brw)
{
const struct brw_vs_prog_data *vs_prog_data =
brw_vs_prog_data(brw->vs.base.prog_data);
/* For non-indirect draws, upload the shader draw parameters */
if ((vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) &&
brw->draw.draw_params_bo == NULL) {
brw_upload_data(&brw->upload,
&brw->draw.params, sizeof(brw->draw.params), 4,
&brw->draw.draw_params_bo,
&brw->draw.draw_params_offset);
}
if (vs_prog_data->uses_drawid || vs_prog_data->uses_is_indexed_draw) {
brw_upload_data(&brw->upload,
&brw->draw.derived_params, sizeof(brw->draw.derived_params), 4,
&brw->draw.derived_draw_params_bo,
&brw->draw.derived_draw_params_offset);
}
}
static void
brw_upload_indices(struct brw_context *brw)
{
const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
GLuint ib_size;
struct brw_bo *old_bo = brw->ib.bo;
struct gl_buffer_object *bufferobj;
GLuint offset;
GLuint ib_type_size;
if (index_buffer == NULL)
return;
ib_type_size = 1 << index_buffer->index_size_shift;
ib_size = index_buffer->count ? ib_type_size * index_buffer->count :
index_buffer->obj->Size;
bufferobj = index_buffer->obj;
/* Turn into a proper VBO:
*/
if (!bufferobj) {
/* Get new bufferobj, offset:
*/
brw_upload_data(&brw->upload, index_buffer->ptr, ib_size, ib_type_size,
&brw->ib.bo, &offset);
brw->ib.size = brw->ib.bo->size;
} else {
offset = (GLuint) (unsigned long) index_buffer->ptr;
struct brw_bo *bo =
brw_bufferobj_buffer(brw, brw_buffer_object(bufferobj),
offset, ib_size, false);
if (bo != brw->ib.bo) {
brw_bo_unreference(brw->ib.bo);
brw->ib.bo = bo;
brw->ib.size = bufferobj->Size;
brw_bo_reference(bo);
}
}
/* Use 3DPRIMITIVE's start_vertex_offset to avoid re-uploading
* the index buffer state when we're just moving the start index
* of our drawing.
*/
brw->ib.start_vertex_offset = offset / ib_type_size;
if (brw->ib.bo != old_bo)
brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
unsigned index_size = 1 << index_buffer->index_size_shift;
if (index_size != brw->ib.index_size) {
brw->ib.index_size = index_size;
brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
}
/* We need to re-emit an index buffer state each time
* when cut index flag is changed
*/
if (brw->prim_restart.enable_cut_index != brw->ib.enable_cut_index) {
brw->ib.enable_cut_index = brw->prim_restart.enable_cut_index;
brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
}
}
const struct brw_tracked_state brw_indices = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BLORP |
BRW_NEW_INDICES,
},
.emit = brw_upload_indices,
};

View file

@ -1,404 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "main/version.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_batch.h"
/**
* Initializes potential list of extensions if ctx == NULL, or actually enables
* extensions for a context.
*/
void
brw_init_extensions(struct gl_context *ctx)
{
struct brw_context *brw = brw_context(ctx);
const struct intel_device_info *devinfo = &brw->screen->devinfo;
assert(devinfo->ver >= 4);
ctx->Extensions.ARB_arrays_of_arrays = true;
ctx->Extensions.ARB_buffer_storage = true;
ctx->Extensions.ARB_clear_texture = true;
ctx->Extensions.ARB_clip_control = true;
ctx->Extensions.ARB_copy_image = true;
ctx->Extensions.ARB_depth_buffer_float = true;
ctx->Extensions.ARB_depth_clamp = true;
ctx->Extensions.ARB_depth_texture = true;
ctx->Extensions.ARB_draw_elements_base_vertex = true;
ctx->Extensions.ARB_draw_instanced = true;
ctx->Extensions.ARB_ES2_compatibility = true;
ctx->Extensions.ARB_explicit_attrib_location = true;
ctx->Extensions.ARB_explicit_uniform_location = true;
ctx->Extensions.ARB_fragment_coord_conventions = true;
ctx->Extensions.ARB_fragment_program = true;
ctx->Extensions.ARB_fragment_program_shadow = true;
ctx->Extensions.ARB_fragment_shader = true;
ctx->Extensions.ARB_framebuffer_object = true;
ctx->Extensions.ARB_half_float_vertex = true;
ctx->Extensions.ARB_instanced_arrays = true;
ctx->Extensions.ARB_internalformat_query = true;
ctx->Extensions.ARB_internalformat_query2 = true;
ctx->Extensions.ARB_map_buffer_range = true;
ctx->Extensions.ARB_occlusion_query = true;
ctx->Extensions.ARB_occlusion_query2 = true;
ctx->Extensions.ARB_point_sprite = true;
ctx->Extensions.ARB_polygon_offset_clamp = true;
ctx->Extensions.ARB_seamless_cube_map = true;
ctx->Extensions.ARB_shader_bit_encoding = true;
ctx->Extensions.ARB_shader_draw_parameters = true;
ctx->Extensions.ARB_shader_group_vote = true;
ctx->Extensions.ARB_shader_texture_lod = true;
ctx->Extensions.ARB_shading_language_packing = true;
ctx->Extensions.ARB_shadow = true;
ctx->Extensions.ARB_sync = true;
ctx->Extensions.ARB_texture_border_clamp = true;
ctx->Extensions.ARB_texture_compression_rgtc = true;
ctx->Extensions.ARB_texture_cube_map = true;
ctx->Extensions.ARB_texture_env_combine = true;
ctx->Extensions.ARB_texture_env_crossbar = true;
ctx->Extensions.ARB_texture_env_dot3 = true;
ctx->Extensions.ARB_texture_filter_anisotropic = true;
ctx->Extensions.ARB_texture_float = true;
ctx->Extensions.ARB_texture_mirror_clamp_to_edge = true;
ctx->Extensions.ARB_texture_non_power_of_two = true;
ctx->Extensions.ARB_texture_rg = true;
ctx->Extensions.ARB_texture_rgb10_a2ui = true;
ctx->Extensions.ARB_vertex_program = true;
ctx->Extensions.ARB_vertex_shader = true;
ctx->Extensions.ARB_vertex_type_2_10_10_10_rev = true;
ctx->Extensions.ARB_vertex_type_10f_11f_11f_rev = true;
ctx->Extensions.EXT_blend_color = true;
ctx->Extensions.EXT_blend_equation_separate = true;
ctx->Extensions.EXT_blend_func_separate = true;
ctx->Extensions.EXT_blend_minmax = true;
ctx->Extensions.EXT_color_buffer_half_float = true;
ctx->Extensions.EXT_draw_buffers2 = true;
ctx->Extensions.EXT_EGL_image_storage = true;
ctx->Extensions.EXT_float_blend = true;
ctx->Extensions.EXT_framebuffer_sRGB = true;
ctx->Extensions.EXT_gpu_program_parameters = true;
ctx->Extensions.EXT_packed_float = true;
ctx->Extensions.EXT_pixel_buffer_object = true;
ctx->Extensions.EXT_point_parameters = true;
ctx->Extensions.EXT_provoking_vertex = true;
ctx->Extensions.EXT_render_snorm = true;
ctx->Extensions.EXT_sRGB = true;
ctx->Extensions.EXT_stencil_two_side = true;
ctx->Extensions.EXT_texture_array = true;
ctx->Extensions.EXT_texture_env_dot3 = true;
ctx->Extensions.EXT_texture_filter_anisotropic = true;
ctx->Extensions.EXT_texture_integer = true;
ctx->Extensions.EXT_texture_norm16 = true;
ctx->Extensions.EXT_texture_shared_exponent = true;
ctx->Extensions.EXT_texture_snorm = true;
ctx->Extensions.EXT_texture_sRGB = true;
ctx->Extensions.EXT_texture_sRGB_decode = true;
ctx->Extensions.EXT_texture_sRGB_R8 = true;
ctx->Extensions.EXT_texture_swizzle = true;
ctx->Extensions.EXT_texture_type_2_10_10_10_REV = true;
ctx->Extensions.EXT_vertex_array_bgra = true;
ctx->Extensions.KHR_robustness = true;
ctx->Extensions.AMD_seamless_cubemap_per_texture = true;
ctx->Extensions.APPLE_object_purgeable = true;
ctx->Extensions.ATI_texture_env_combine3 = true;
ctx->Extensions.MESA_framebuffer_flip_y = true;
ctx->Extensions.NV_conditional_render = true;
ctx->Extensions.NV_fog_distance = true;
ctx->Extensions.NV_primitive_restart = true;
ctx->Extensions.NV_texture_barrier = true;
ctx->Extensions.NV_texture_env_combine4 = true;
ctx->Extensions.NV_texture_rectangle = true;
ctx->Extensions.TDFX_texture_compression_FXT1 = true;
ctx->Extensions.OES_compressed_ETC1_RGB8_texture = true;
ctx->Extensions.OES_draw_texture = true;
ctx->Extensions.OES_EGL_image = true;
ctx->Extensions.OES_EGL_image_external = true;
ctx->Extensions.OES_standard_derivatives = true;
ctx->Extensions.OES_texture_float = true;
ctx->Extensions.OES_texture_float_linear = true;
ctx->Extensions.OES_texture_half_float = true;
ctx->Extensions.OES_texture_half_float_linear = true;
if (devinfo->ver >= 8)
ctx->Const.GLSLVersion = 460;
else if (devinfo->platform == INTEL_PLATFORM_HSW &&
can_do_pipelined_register_writes(brw->screen))
ctx->Const.GLSLVersion = 450;
else if (devinfo->ver >= 7 && can_do_pipelined_register_writes(brw->screen))
ctx->Const.GLSLVersion = 420;
else if (devinfo->ver >= 6)
ctx->Const.GLSLVersion = 330;
else
ctx->Const.GLSLVersion = 120;
if (devinfo->ver >= 6)
ctx->Const.GLSLVersionCompat = 130;
else
ctx->Const.GLSLVersionCompat = 120;
_mesa_override_glsl_version(&ctx->Const);
ctx->Extensions.EXT_shader_integer_mix = ctx->Const.GLSLVersion >= 130;
ctx->Extensions.MESA_shader_integer_functions = ctx->Const.GLSLVersion >= 130;
if (devinfo->verx10 >= 45) {
ctx->Extensions.EXT_shader_framebuffer_fetch_non_coherent = true;
ctx->Extensions.KHR_blend_equation_advanced = true;
}
if (devinfo->ver >= 5) {
ctx->Extensions.ARB_texture_query_levels = ctx->Const.GLSLVersion >= 130;
ctx->Extensions.ARB_texture_query_lod = true;
ctx->Extensions.EXT_timer_query = true;
}
if (devinfo->ver == 6)
ctx->Extensions.ARB_transform_feedback2 = true;
if (devinfo->ver >= 6) {
ctx->Extensions.ARB_blend_func_extended =
!driQueryOptionb(&brw->screen->optionCache, "disable_blend_func_extended");
ctx->Extensions.ARB_conditional_render_inverted = true;
ctx->Extensions.ARB_cull_distance = true;
ctx->Extensions.ARB_draw_buffers_blend = true;
if (ctx->API != API_OPENGL_COMPAT ||
ctx->Const.AllowHigherCompatVersion)
ctx->Extensions.ARB_enhanced_layouts = true;
ctx->Extensions.ARB_ES3_compatibility = true;
ctx->Extensions.ARB_fragment_layer_viewport = true;
ctx->Extensions.ARB_pipeline_statistics_query = true;
ctx->Extensions.ARB_sample_shading = true;
ctx->Extensions.ARB_shading_language_420pack = true;
if (ctx->API != API_OPENGL_COMPAT ||
ctx->Const.AllowHigherCompatVersion) {
ctx->Extensions.ARB_texture_buffer_object = true;
ctx->Extensions.ARB_texture_buffer_object_rgb32 = true;
ctx->Extensions.ARB_texture_buffer_range = true;
}
ctx->Extensions.ARB_texture_cube_map_array = true;
ctx->Extensions.ARB_texture_gather = true;
ctx->Extensions.ARB_texture_multisample = true;
ctx->Extensions.ARB_uniform_buffer_object = true;
ctx->Extensions.EXT_gpu_shader4 = true;
ctx->Extensions.EXT_texture_shadow_lod = true;
if (ctx->API != API_OPENGL_COMPAT ||
ctx->Const.AllowHigherCompatVersion)
ctx->Extensions.AMD_vertex_shader_layer = true;
ctx->Extensions.EXT_framebuffer_multisample = true;
ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true;
ctx->Extensions.EXT_transform_feedback = true;
ctx->Extensions.ARB_transform_feedback_overflow_query = true;
ctx->Extensions.OES_depth_texture_cube_map = true;
ctx->Extensions.OES_sample_variables = true;
ctx->Extensions.ARB_timer_query = brw->screen->hw_has_timestamp;
ctx->Extensions.EXT_disjoint_timer_query =
ctx->Extensions.ARB_timer_query;
/* Only enable this in core profile because geometry shaders are
* required, and Mesa only supports geometry shaders in OpenGL 3.2 and
* later. In this driver, that currently means Core profile.
*/
if (ctx->API == API_OPENGL_CORE ||
ctx->Const.AllowHigherCompatVersion) {
ctx->Extensions.ARB_shader_viewport_layer_array = true;
ctx->Extensions.ARB_viewport_array = true;
ctx->Extensions.AMD_vertex_shader_viewport_index = true;
}
}
brw->predicate.supported = false;
if (devinfo->ver >= 7) {
ctx->Extensions.ARB_conservative_depth = true;
ctx->Extensions.ARB_derivative_control = true;
ctx->Extensions.ARB_framebuffer_no_attachments = true;
if (ctx->API != API_OPENGL_COMPAT ||
ctx->Const.AllowHigherCompatVersion) {
ctx->Extensions.ARB_gpu_shader5 = true;
ctx->Extensions.ARB_gpu_shader_fp64 = true;
}
ctx->Extensions.ARB_shader_atomic_counters = true;
ctx->Extensions.ARB_shader_atomic_counter_ops = true;
ctx->Extensions.ARB_shader_clock = true;
ctx->Extensions.ARB_shader_image_load_store = true;
ctx->Extensions.ARB_shader_image_size = true;
ctx->Extensions.ARB_shader_precision = true;
ctx->Extensions.ARB_shader_texture_image_samples = true;
if (ctx->API != API_OPENGL_COMPAT ||
ctx->Const.AllowHigherCompatVersion)
ctx->Extensions.ARB_tessellation_shader = true;
ctx->Extensions.ARB_texture_compression_bptc = true;
ctx->Extensions.ARB_texture_view = true;
ctx->Extensions.ARB_shader_storage_buffer_object = true;
ctx->Extensions.ARB_vertex_attrib_64bit = true;
ctx->Extensions.EXT_shader_samples_identical = true;
ctx->Extensions.OES_primitive_bounding_box = true;
ctx->Extensions.OES_texture_buffer = true;
if (can_do_pipelined_register_writes(brw->screen)) {
ctx->Extensions.ARB_draw_indirect = true;
ctx->Extensions.ARB_transform_feedback2 = true;
ctx->Extensions.ARB_transform_feedback3 = true;
ctx->Extensions.ARB_transform_feedback_instanced = true;
if (can_do_compute_dispatch(brw->screen) &&
ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) {
ctx->Extensions.ARB_compute_shader = true;
ctx->Extensions.ARB_ES3_1_compatibility =
devinfo->verx10 >= 75;
ctx->Extensions.NV_compute_shader_derivatives = true;
ctx->Extensions.ARB_compute_variable_group_size = true;
}
if (can_do_predicate_writes(brw->screen)) {
brw->predicate.supported = true;
ctx->Extensions.ARB_indirect_parameters = true;
}
}
ctx->Extensions.ARB_gl_spirv = true;
ctx->Extensions.ARB_spirv_extensions = true;
}
if (devinfo->verx10 >= 75) {
ctx->Extensions.ARB_stencil_texturing = true;
ctx->Extensions.ARB_texture_stencil8 = true;
ctx->Extensions.OES_geometry_shader = true;
ctx->Extensions.OES_texture_cube_map_array = true;
ctx->Extensions.OES_viewport_array = true;
}
if (devinfo->verx10 >= 75 || devinfo->platform == INTEL_PLATFORM_BYT) {
ctx->Extensions.ARB_robust_buffer_access_behavior = true;
}
if (can_do_mi_math_and_lrr(brw->screen)) {
ctx->Extensions.ARB_query_buffer_object = true;
}
if (devinfo->ver >= 8 || devinfo->platform == INTEL_PLATFORM_BYT) {
/* For now, we can't enable OES_texture_view on Gen 7 because of
* some piglit failures coming from
* piglit/tests/spec/arb_texture_view/rendering-formats.c that need
* investigation.
*/
ctx->Extensions.OES_texture_view = true;
}
if (devinfo->ver >= 7) {
/* We can safely enable OES_copy_image on Gen 7, since we emulate
* the ETC2 support using the shadow_miptree to store the
* compressed data.
*/
ctx->Extensions.OES_copy_image = true;
}
/* Gen < 6 still uses the blitter. It's somewhat annoying to add support
* for blackhole there... Does anybody actually care anymore anyway?
*/
if (devinfo->ver >= 6)
ctx->Extensions.INTEL_blackhole_render = true;
if (devinfo->ver >= 8) {
ctx->Extensions.ARB_gpu_shader_int64 = true;
/* requires ARB_gpu_shader_int64 */
ctx->Extensions.ARB_shader_ballot = true;
ctx->Extensions.ARB_ES3_2_compatibility = true;
/* Currently only implemented in the scalar backend, so only enable for
* Gfx8+. Eventually Gfx6+ could be supported.
*/
ctx->Extensions.INTEL_shader_integer_functions2 = true;
}
if (devinfo->ver >= 9) {
ctx->Extensions.ANDROID_extension_pack_es31a = true;
ctx->Extensions.AMD_depth_clamp_separate = true;
ctx->Extensions.ARB_post_depth_coverage = true;
ctx->Extensions.ARB_shader_stencil_export = true;
ctx->Extensions.EXT_shader_framebuffer_fetch = true;
ctx->Extensions.INTEL_conservative_rasterization = true;
ctx->Extensions.INTEL_shader_atomic_float_minmax = true;
ctx->Extensions.KHR_blend_equation_advanced_coherent = true;
ctx->Extensions.KHR_texture_compression_astc_ldr = true;
ctx->Extensions.KHR_texture_compression_astc_sliced_3d = true;
/*
* From the Skylake PRM Vol. 7 (Memory Fence Message, page 221):
* "A memory fence message issued by a thread causes further messages
* issued by the thread to be blocked until all previous data port
* messages have completed, or the results can be globally observed from
* the point of view of other threads in the system."
*
* From the Haswell PRM Vol. 7 (Memory Fence, page 256):
* "A memory fence message issued by a thread causes further messages
* issued by the thread to be blocked until all previous messages issued
* by the thread to that data port (data cache or render cache) have
* been globally observed from the point of view of other threads in the
* system."
*
* Summarized: For ARB_fragment_shader_interlock to work, we need to
* ensure memory access ordering for all messages to the dataport from
* all threads. Memory fence messages prior to SKL only provide memory
* access ordering for messages from the same thread, so we can only
* support the feature from Gfx9 onwards.
*
*/
ctx->Extensions.ARB_fragment_shader_interlock = true;
}
if (intel_device_info_is_9lp(devinfo))
ctx->Extensions.KHR_texture_compression_astc_hdr = true;
if (devinfo->ver >= 6)
ctx->Extensions.INTEL_performance_query = true;
if (ctx->API != API_OPENGL_COMPAT ||
ctx->Const.AllowHigherCompatVersion)
ctx->Extensions.ARB_base_instance = true;
if (ctx->API != API_OPENGL_CORE)
ctx->Extensions.ARB_color_buffer_float = true;
ctx->Extensions.EXT_texture_compression_s3tc = true;
ctx->Extensions.EXT_texture_compression_s3tc_srgb = true;
ctx->Extensions.ANGLE_texture_compression_dxt = true;
ctx->Extensions.EXT_demote_to_helper_invocation = true;
ctx->Const.PrimitiveRestartFixedIndex = true;
if (devinfo->ver >= 7) {
ctx->Extensions.EXT_memory_object_fd = true;
ctx->Extensions.EXT_memory_object = true;
ctx->Extensions.EXT_semaphore = true;
ctx->Extensions.EXT_semaphore_fd = true;
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,255 +0,0 @@
/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef BRW_FBO_H
#define BRW_FBO_H
#include <stdbool.h>
#include <assert.h>
#include "main/formats.h"
#include "main/macros.h"
#include "brw_context.h"
#include "brw_mipmap_tree.h"
#include "brw_screen.h"
#ifdef __cplusplus
extern "C" {
#endif
struct brw_mipmap_tree;
/**
* Intel renderbuffer, derived from gl_renderbuffer.
*/
struct brw_renderbuffer
{
struct swrast_renderbuffer Base;
/**
* The real renderbuffer storage.
*
* This is multisampled if NumSamples is > 1.
*/
struct brw_mipmap_tree *mt;
/**
* Downsampled contents for window-system MSAA renderbuffers.
*
* For window system MSAA color buffers, the singlesample_mt is shared with
* other processes in DRI2 (and in DRI3, it's the image buffer managed by
* glx_dri3.c), while mt is private to our process. To do a swapbuffers,
* we have to downsample out of mt into singlesample_mt. For depth and
* stencil buffers, the singlesample_mt is also private, and since we don't
* expect to need to do resolves (except if someone does a glReadPixels()
* or glCopyTexImage()), we just temporarily allocate singlesample_mt when
* asked to map the renderbuffer.
*/
struct brw_mipmap_tree *singlesample_mt;
/* Gen < 6 doesn't have layer specifier for render targets or depth. Driver
* needs to manually offset surfaces to correct level/layer. There are,
* however, alignment restrictions to respect as well and in come cases
* the only option is to use temporary single slice surface which driver
* copies after rendering to the full miptree.
*
* See brw_renderbuffer_move_to_temp().
*/
struct brw_mipmap_tree *align_wa_mt;
/**
* \name Miptree view
* \{
*
* Multiple renderbuffers may simultaneously wrap a single texture and each
* provide a different view into that texture. The fields below indicate
* which miptree slice is wrapped by this renderbuffer. The fields' values
* are consistent with the 'level' and 'layer' parameters of
* glFramebufferTextureLayer().
*
* For renderbuffers not created with glFramebufferTexture*(), mt_level and
* mt_layer are 0.
*/
unsigned int mt_level;
unsigned int mt_layer;
/* The number of attached logical layers. */
unsigned int layer_count;
/** \} */
GLuint draw_x, draw_y; /**< Offset of drawing within the region */
/**
* Set to true at every draw call, to indicate if a window-system
* renderbuffer needs to be downsampled before using singlesample_mt.
*/
bool need_downsample;
/**
* Set to true when doing an brw_renderbuffer_map()/unmap() that requires
* an upsample at the end.
*/
bool need_map_upsample;
/**
* Set to true if singlesample_mt is temporary storage that persists only
* for the duration of a mapping.
*/
bool singlesample_mt_is_tmp;
/**
* Set to true when application specifically asked for a sRGB visual.
*/
bool need_srgb;
};
/**
* gl_renderbuffer is a base class which we subclass. The Class field
* is used for simple run-time type checking.
*/
#define INTEL_RB_CLASS 0x12345678
/**
* Return a gl_renderbuffer ptr casted to brw_renderbuffer.
* NULL will be returned if the rb isn't really an brw_renderbuffer.
* This is determined by checking the ClassID.
*/
static inline struct brw_renderbuffer *
brw_renderbuffer(struct gl_renderbuffer *rb)
{
struct brw_renderbuffer *irb = (struct brw_renderbuffer *) rb;
if (irb && irb->Base.Base.ClassID == INTEL_RB_CLASS)
return irb;
else
return NULL;
}
static inline struct brw_mipmap_tree *
brw_renderbuffer_get_mt(struct brw_renderbuffer *irb)
{
if (!irb)
return NULL;
return (irb->align_wa_mt) ? irb->align_wa_mt : irb->mt;
}
/**
* \brief Return the framebuffer attachment specified by attIndex.
*
* If the framebuffer lacks the specified attachment, then return null.
*
* If the attached renderbuffer is a wrapper, then return wrapped
* renderbuffer.
*/
static inline struct brw_renderbuffer *
brw_get_renderbuffer(struct gl_framebuffer *fb, gl_buffer_index attIndex)
{
struct gl_renderbuffer *rb;
assert((unsigned)attIndex < ARRAY_SIZE(fb->Attachment));
rb = fb->Attachment[attIndex].Renderbuffer;
if (!rb)
return NULL;
return brw_renderbuffer(rb);
}
static inline mesa_format
brw_rb_format(const struct brw_renderbuffer *rb)
{
return rb->Base.Base.Format;
}
extern struct brw_renderbuffer *
brw_create_winsys_renderbuffer(struct brw_screen *screen,
mesa_format format, unsigned num_samples);
struct brw_renderbuffer *
brw_create_private_renderbuffer(struct brw_screen *screen,
mesa_format format, unsigned num_samples);
struct gl_renderbuffer*
brw_create_wrapped_renderbuffer(struct gl_context *ctx,
int width, int height,
mesa_format format);
extern void
brw_fbo_init(struct brw_context *brw);
void
brw_renderbuffer_set_draw_offset(struct brw_renderbuffer *irb);
static inline uint32_t
brw_renderbuffer_get_tile_offsets(struct brw_renderbuffer *irb,
uint32_t *tile_x,
uint32_t *tile_y)
{
if (irb->align_wa_mt) {
*tile_x = 0;
*tile_y = 0;
return 0;
}
return brw_miptree_get_tile_offsets(irb->mt, irb->mt_level, irb->mt_layer,
tile_x, tile_y);
}
bool
brw_renderbuffer_has_hiz(struct brw_renderbuffer *irb);
void brw_renderbuffer_move_to_temp(struct brw_context *brw,
struct brw_renderbuffer *irb,
bool invalidate);
void
brw_renderbuffer_downsample(struct brw_context *brw,
struct brw_renderbuffer *irb);
void
brw_renderbuffer_upsample(struct brw_context *brw,
struct brw_renderbuffer *irb);
void brw_cache_sets_clear(struct brw_context *brw);
void brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo);
void brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo,
enum isl_format format,
enum isl_aux_usage aux_usage);
void brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo);
void brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo,
enum isl_format format,
enum isl_aux_usage aux_usage);
void brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo);
unsigned
brw_quantize_num_samples(struct brw_screen *intel, unsigned num_samples);
#ifdef __cplusplus
}
#endif
#endif /* BRW_FBO_H */

View file

@ -1,178 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#include "main/macros.h"
#include "main/enums.h"
#include "main/transformfeedback.h"
#include "brw_batch.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_util.h"
#include "brw_state.h"
#include "brw_ff_gs.h"
#include "util/ralloc.h"
static void
compile_ff_gs_prog(struct brw_context *brw,
struct brw_ff_gs_prog_key *key)
{
const GLuint *program;
void *mem_ctx;
GLuint program_size;
mem_ctx = ralloc_context(NULL);
struct brw_ff_gs_prog_data prog_data;
program = brw_compile_ff_gs_prog(brw->screen->compiler, mem_ctx, key,
&prog_data,
&brw_vue_prog_data(brw->vs.base.prog_data)->vue_map,
&program_size);
brw_upload_cache(&brw->cache, BRW_CACHE_FF_GS_PROG,
key, sizeof(*key),
program, program_size,
&prog_data, sizeof(prog_data),
&brw->ff_gs.prog_offset, &brw->ff_gs.prog_data);
ralloc_free(mem_ctx);
}
static bool
brw_ff_gs_state_dirty(const struct brw_context *brw)
{
return brw_state_dirty(brw,
_NEW_LIGHT,
BRW_NEW_PRIMITIVE |
BRW_NEW_TRANSFORM_FEEDBACK |
BRW_NEW_VS_PROG_DATA);
}
static void
brw_ff_gs_populate_key(struct brw_context *brw,
struct brw_ff_gs_prog_key *key)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
static const unsigned swizzle_for_offset[4] = {
BRW_SWIZZLE4(0, 1, 2, 3),
BRW_SWIZZLE4(1, 2, 3, 3),
BRW_SWIZZLE4(2, 3, 3, 3),
BRW_SWIZZLE4(3, 3, 3, 3)
};
struct gl_context *ctx = &brw->ctx;
assert(devinfo->ver < 7);
memset(key, 0, sizeof(*key));
/* BRW_NEW_VS_PROG_DATA (part of VUE map) */
key->attrs = brw_vue_prog_data(brw->vs.base.prog_data)->vue_map.slots_valid;
/* BRW_NEW_PRIMITIVE */
key->primitive = brw->primitive;
/* _NEW_LIGHT */
key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
if (key->primitive == _3DPRIM_QUADLIST && ctx->Light.ShadeModel != GL_FLAT) {
/* Provide consistent primitive order with brw_set_prim's
* optimization of single quads to trifans.
*/
key->pv_first = true;
}
if (devinfo->ver == 6) {
/* On Gfx6, GS is used for transform feedback. */
/* BRW_NEW_TRANSFORM_FEEDBACK */
if (_mesa_is_xfb_active_and_unpaused(ctx)) {
const struct gl_program *prog =
ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
const struct gl_transform_feedback_info *linked_xfb_info =
prog->sh.LinkedTransformFeedback;
int i;
/* Make sure that the VUE slots won't overflow the unsigned chars in
* key->transform_feedback_bindings[].
*/
STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
/* Make sure that we don't need more binding table entries than we've
* set aside for use in transform feedback. (We shouldn't, since we
* set aside enough binding table entries to have one per component).
*/
assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
key->need_gs_prog = true;
key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
for (i = 0; i < key->num_transform_feedback_bindings; ++i) {
key->transform_feedback_bindings[i] =
linked_xfb_info->Outputs[i].OutputRegister;
key->transform_feedback_swizzles[i] =
swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
}
}
} else {
/* Pre-gfx6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
* into simpler primitives.
*/
key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST ||
brw->primitive == _3DPRIM_QUADSTRIP ||
brw->primitive == _3DPRIM_LINELOOP);
}
}
/* Calculate interpolants for triangle and line rasterization.
*/
void
brw_upload_ff_gs_prog(struct brw_context *brw)
{
struct brw_ff_gs_prog_key key;
if (!brw_ff_gs_state_dirty(brw))
return;
/* Populate the key:
*/
brw_ff_gs_populate_key(brw, &key);
if (brw->ff_gs.prog_active != key.need_gs_prog) {
brw->ctx.NewDriverState |= BRW_NEW_FF_GS_PROG_DATA;
brw->ff_gs.prog_active = key.need_gs_prog;
}
if (brw->ff_gs.prog_active) {
if (!brw_search_cache(&brw->cache, BRW_CACHE_FF_GS_PROG, &key,
sizeof(key), &brw->ff_gs.prog_offset,
&brw->ff_gs.prog_data, true)) {
compile_ff_gs_prog(brw, &key);
}
}
}

View file

@ -1,42 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#ifndef BRW_GS_H
#define BRW_GS_H
#include "brw_context.h"
#include "compiler/brw_eu.h"
void
brw_upload_ff_gs_prog(struct brw_context *brw);
#endif

View file

@ -1,119 +0,0 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_context.h"
#include "brw_state.h"
#include "main/context.h"
#include "main/formatquery.h"
#include "main/glformats.h"
static size_t
brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
GLenum internalFormat, int samples[16])
{
struct brw_context *brw = brw_context(ctx);
const struct intel_device_info *devinfo = &brw->screen->devinfo;
(void) target;
(void) internalFormat;
switch (devinfo->ver) {
case 11:
case 10:
case 9:
samples[0] = 16;
samples[1] = 8;
samples[2] = 4;
samples[3] = 2;
return 4;
case 8:
samples[0] = 8;
samples[1] = 4;
samples[2] = 2;
return 3;
case 7:
if (internalFormat == GL_RGBA32F && _mesa_is_gles(ctx)) {
/* For GLES, we are allowed to return a smaller number of samples for
* GL_RGBA32F. See OpenGLES 3.2 spec, section 20.3.1 Internal Format
* Query Parameters, under SAMPLES:
*
* "A value less than or equal to the value of MAX_SAMPLES, if
* internalformat is RGBA16F, R32F, RG32F, or RGBA32F."
*
* In brw_render_target_supported, we prevent formats with a size
* greater than 8 bytes from using 8x MSAA on gfx7.
*/
samples[0] = 4;
return 1;
} else {
samples[0] = 8;
samples[1] = 4;
return 2;
}
case 6:
samples[0] = 4;
return 1;
default:
assert(devinfo->ver < 6);
samples[0] = 1;
return 1;
}
}
void
brw_query_internal_format(struct gl_context *ctx, GLenum target,
GLenum internalFormat, GLenum pname, GLint *params)
{
/* The Mesa layer gives us a temporary params buffer that is guaranteed
* to be non-NULL, and have at least 16 elements.
*/
assert(params != NULL);
switch (pname) {
case GL_SAMPLES:
brw_query_samples_for_format(ctx, target, internalFormat, params);
break;
case GL_NUM_SAMPLE_COUNTS: {
size_t num_samples;
GLint dummy_buffer[16];
num_samples = brw_query_samples_for_format(ctx, target, internalFormat,
dummy_buffer);
params[0] = (GLint) num_samples;
break;
}
default:
/* By default, we call the driver hook's fallback function from the frontend,
* which has generic implementation for all pnames.
*/
_mesa_query_internal_format_default(ctx, target, internalFormat, pname,
params);
break;
}
}

View file

@ -1,144 +0,0 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "main/mipmap.h"
#include "main/teximage.h"
#include "brw_blorp.h"
#include "brw_context.h"
#include "brw_tex.h"
#include "drivers/common/meta.h"
#define FILE_DEBUG_FLAG DEBUG_BLORP
/**
* The GenerateMipmap() driver hook.
*/
void
brw_generate_mipmap(struct gl_context *ctx, GLenum target,
struct gl_texture_object *tex_obj)
{
struct brw_context *brw = brw_context(ctx);
struct intel_device_info *devinfo = &brw->screen->devinfo;
struct brw_texture_object *intel_obj = brw_texture_object(tex_obj);
const unsigned base_level = tex_obj->Attrib.BaseLevel;
unsigned last_level, first_layer, last_layer;
/* Blorp doesn't handle combined depth/stencil surfaces on Gfx4-5 yet. */
if (devinfo->ver <= 5 &&
(tex_obj->Image[0][base_level]->_BaseFormat == GL_DEPTH_COMPONENT ||
tex_obj->Image[0][base_level]->_BaseFormat == GL_DEPTH_STENCIL)) {
_mesa_meta_GenerateMipmap(ctx, target, tex_obj);
return;
}
/* find expected last mipmap level to generate */
last_level = _mesa_compute_num_levels(ctx, tex_obj, target) - 1;
if (last_level == 0)
return;
/* The texture isn't in a "complete" state yet so set the expected
* last_level here; we're not going through normal texture validation.
*/
intel_obj->_MaxLevel = last_level;
if (!tex_obj->Immutable) {
_mesa_prepare_mipmap_levels(ctx, tex_obj, base_level, last_level);
/* At this point, memory for all the texture levels has been
* allocated. However, the base level image may be in one resource
* while the subsequent/smaller levels may be in another resource.
* Finalizing the texture will copy the base images from the former
* resource to the latter.
*
* After this, we'll have all mipmap levels in one resource.
*/
brw_finalize_mipmap_tree(brw, tex_obj);
}
struct brw_mipmap_tree *mt = intel_obj->mt;
if (!mt) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "mipmap generation");
return;
}
const mesa_format format = intel_obj->_Format;
/* Fall back to the CPU for non-renderable cases.
*
* TODO: 3D textures require blending data from multiple slices,
* which means we need custom shaders. For now, fall back.
*/
if (!brw->mesa_format_supports_render[format] || target == GL_TEXTURE_3D) {
_mesa_generate_mipmap(ctx, target, tex_obj);
return;
}
const struct isl_extent4d *base_size = &mt->surf.logical_level0_px;
if (mt->target == GL_TEXTURE_CUBE_MAP) {
first_layer = _mesa_tex_target_to_face(target);
last_layer = first_layer;
} else {
first_layer = 0;
last_layer = base_size->array_len - 1;
}
/* The GL_EXT_texture_sRGB_decode extension's issues section says:
*
* "10) How is mipmap generation of sRGB textures affected by the
* TEXTURE_SRGB_DECODE_EXT parameter?
*
* RESOLVED: When the TEXTURE_SRGB_DECODE parameter is DECODE_EXT
* for an sRGB texture, mipmap generation should decode sRGB texels
* to a linear RGB color space, perform downsampling, then encode
* back to an sRGB color space. (Issue 24 in the EXT_texture_sRGB
* specification provides a rationale for why.) When the parameter
* is SKIP_DECODE_EXT instead, mipmap generation skips the encode
* and decode steps during mipmap generation. By skipping the
* encode and decode steps, sRGB mipmap generation should match
* the mipmap generation for a non-sRGB texture."
*/
bool do_srgb = tex_obj->Sampler.Attrib.sRGBDecode == GL_DECODE_EXT;
for (unsigned dst_level = base_level + 1;
dst_level <= last_level;
dst_level++) {
const unsigned src_level = dst_level - 1;
for (unsigned layer = first_layer; layer <= last_layer; layer++) {
brw_blorp_blit_miptrees(brw, mt, src_level, layer, format,
SWIZZLE_XYZW, mt, dst_level, layer, format,
0, 0,
minify(base_size->width, src_level),
minify(base_size->height, src_level),
0, 0,
minify(base_size->width, dst_level),
minify(base_size->height, dst_level),
GL_LINEAR, false, false,
do_srgb, do_srgb);
}
}
}

View file

@ -1,256 +0,0 @@
/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* \file brw_vec4_gs.c
*
* State atom for client-programmable geometry shaders, and support code.
*/
#include "brw_gs.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_ff_gs.h"
#include "compiler/brw_nir.h"
#include "brw_program.h"
#include "compiler/glsl/ir_uniform.h"
static void
assign_gs_binding_table_offsets(const struct intel_device_info *devinfo,
const struct gl_program *prog,
struct brw_gs_prog_data *prog_data)
{
/* In gfx6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform
* feedback surfaces.
*/
uint32_t reserved = devinfo->ver == 6 ? BRW_MAX_SOL_BINDINGS : 0;
brw_assign_common_binding_table_offsets(devinfo, prog,
&prog_data->base.base, reserved);
}
static void
brw_gfx6_xfb_setup(const struct gl_transform_feedback_info *linked_xfb_info,
struct brw_gs_prog_data *gs_prog_data)
{
static const unsigned swizzle_for_offset[4] = {
BRW_SWIZZLE4(0, 1, 2, 3),
BRW_SWIZZLE4(1, 2, 3, 3),
BRW_SWIZZLE4(2, 3, 3, 3),
BRW_SWIZZLE4(3, 3, 3, 3)
};
int i;
/* Make sure that the VUE slots won't overflow the unsigned chars in
* prog_data->transform_feedback_bindings[].
*/
STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
/* Make sure that we don't need more binding table entries than we've
* set aside for use in transform feedback. (We shouldn't, since we
* set aside enough binding table entries to have one per component).
*/
assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
gs_prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
for (i = 0; i < gs_prog_data->num_transform_feedback_bindings; i++) {
gs_prog_data->transform_feedback_bindings[i] =
linked_xfb_info->Outputs[i].OutputRegister;
gs_prog_data->transform_feedback_swizzles[i] =
swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
}
}
static bool
brw_codegen_gs_prog(struct brw_context *brw,
struct brw_program *gp,
struct brw_gs_prog_key *key)
{
struct brw_compiler *compiler = brw->screen->compiler;
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct brw_stage_state *stage_state = &brw->gs.base;
struct brw_gs_prog_data prog_data;
bool start_busy = false;
double start_time = 0;
memset(&prog_data, 0, sizeof(prog_data));
void *mem_ctx = ralloc_context(NULL);
nir_shader *nir = nir_shader_clone(mem_ctx, gp->program.nir);
assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data);
brw_nir_setup_glsl_uniforms(mem_ctx, nir, &gp->program,
&prog_data.base.base,
compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
if (brw->can_push_ubos) {
brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
prog_data.base.base.ubo_ranges);
}
uint64_t outputs_written = nir->info.outputs_written;
brw_compute_vue_map(devinfo,
&prog_data.base.vue_map, outputs_written,
gp->program.info.separate_shader, 1);
if (devinfo->ver == 6)
brw_gfx6_xfb_setup(gp->program.sh.LinkedTransformFeedback,
&prog_data);
int st_index = -1;
if (INTEL_DEBUG(DEBUG_SHADER_TIME))
st_index = brw_get_shader_time_index(brw, &gp->program, ST_GS, true);
if (unlikely(brw->perf_debug)) {
start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo);
start_time = get_time();
}
char *error_str;
const unsigned *program =
brw_compile_gs(brw->screen->compiler, brw, mem_ctx, key,
&prog_data, nir, st_index,
NULL, &error_str);
if (program == NULL) {
ralloc_strcat(&gp->program.sh.data->InfoLog, error_str);
_mesa_problem(NULL, "Failed to compile geometry shader: %s\n", error_str);
ralloc_free(mem_ctx);
return false;
}
if (unlikely(brw->perf_debug)) {
if (gp->compiled_once) {
brw_debug_recompile(brw, MESA_SHADER_GEOMETRY, gp->program.Id,
&key->base);
}
if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
perf_debug("GS compile took %.03f ms and stalled the GPU\n",
(get_time() - start_time) * 1000);
}
gp->compiled_once = true;
}
/* Scratch space is used for register spilling */
brw_alloc_stage_scratch(brw, stage_state,
prog_data.base.base.total_scratch);
/* The param and pull_param arrays will be freed by the shader cache. */
ralloc_steal(NULL, prog_data.base.base.param);
ralloc_steal(NULL, prog_data.base.base.pull_param);
brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
key, sizeof(*key),
program, prog_data.base.base.program_size,
&prog_data, sizeof(prog_data),
&stage_state->prog_offset, &brw->gs.base.prog_data);
ralloc_free(mem_ctx);
return true;
}
static bool
brw_gs_state_dirty(const struct brw_context *brw)
{
return brw_state_dirty(brw,
_NEW_TEXTURE,
BRW_NEW_GEOMETRY_PROGRAM |
BRW_NEW_TRANSFORM_FEEDBACK);
}
void
brw_gs_populate_key(struct brw_context *brw,
struct brw_gs_prog_key *key)
{
struct gl_context *ctx = &brw->ctx;
struct brw_program *gp =
(struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
memset(key, 0, sizeof(*key));
brw_populate_base_prog_key(ctx, gp, &key->base);
}
void
brw_upload_gs_prog(struct brw_context *brw)
{
struct brw_stage_state *stage_state = &brw->gs.base;
struct brw_gs_prog_key key;
/* BRW_NEW_GEOMETRY_PROGRAM */
struct brw_program *gp =
(struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
if (!brw_gs_state_dirty(brw))
return;
brw_gs_populate_key(brw, &key);
if (brw_search_cache(&brw->cache, BRW_CACHE_GS_PROG, &key, sizeof(key),
&stage_state->prog_offset, &brw->gs.base.prog_data,
true))
return;
if (brw_disk_cache_upload_program(brw, MESA_SHADER_GEOMETRY))
return;
gp = (struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
gp->id = key.base.program_string_id;
ASSERTED bool success = brw_codegen_gs_prog(brw, gp, &key);
assert(success);
}
void
brw_gs_populate_default_key(const struct brw_compiler *compiler,
struct brw_gs_prog_key *key,
struct gl_program *prog)
{
const struct intel_device_info *devinfo = compiler->devinfo;
memset(key, 0, sizeof(*key));
brw_populate_default_base_prog_key(devinfo, brw_program(prog),
&key->base);
}
bool
brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog)
{
struct brw_context *brw = brw_context(ctx);
struct brw_gs_prog_key key;
uint32_t old_prog_offset = brw->gs.base.prog_offset;
struct brw_stage_prog_data *old_prog_data = brw->gs.base.prog_data;
bool success;
struct brw_program *bgp = brw_program(prog);
brw_gs_populate_default_key(brw->screen->compiler, &key, prog);
success = brw_codegen_gs_prog(brw, bgp, &key);
brw->gs.base.prog_offset = old_prog_offset;
brw->gs.base.prog_data = old_prog_data;
return success;
}

View file

@ -1,52 +0,0 @@
/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef BRW_VEC4_GS_H
#define BRW_VEC4_GS_H
#include <stdbool.h>
#include "brw_context.h"
#ifdef __cplusplus
extern "C" {
#endif
struct gl_shader_program;
void
brw_upload_gs_prog(struct brw_context *brw);
void
brw_gs_populate_key(struct brw_context *brw,
struct brw_gs_prog_key *key);
void
brw_gs_populate_default_key(const struct brw_compiler *compiler,
struct brw_gs_prog_key *key,
struct gl_program *prog);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* BRW_VEC4_GS_H */

View file

@ -1,117 +0,0 @@
/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "main/mtypes.h"
#include "program/prog_parameter.h"
#include "main/shaderapi.h"
#include "brw_context.h"
#include "brw_state.h"
/* Creates a new GS constant buffer reflecting the current GS program's
* constants, if needed by the GS program.
*
* Otherwise, constants go through the CURBEs using the brw_constant_buffer
* state atom.
*/
static void
brw_upload_gs_pull_constants(struct brw_context *brw)
{
struct brw_stage_state *stage_state = &brw->gs.base;
/* BRW_NEW_GEOMETRY_PROGRAM */
struct brw_program *gp =
(struct brw_program *) brw->programs[MESA_SHADER_GEOMETRY];
if (!gp)
return;
/* BRW_NEW_GS_PROG_DATA */
const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
_mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_GEOMETRY);
/* _NEW_PROGRAM_CONSTANTS */
brw_upload_pull_constants(brw, BRW_NEW_GS_CONSTBUF, &gp->program,
stage_state, prog_data);
}
const struct brw_tracked_state brw_gs_pull_constants = {
.dirty = {
.mesa = _NEW_PROGRAM_CONSTANTS,
.brw = BRW_NEW_BATCH |
BRW_NEW_GEOMETRY_PROGRAM |
BRW_NEW_GS_PROG_DATA,
},
.emit = brw_upload_gs_pull_constants,
};
static void
brw_upload_gs_ubo_surfaces(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
/* _NEW_PROGRAM */
struct gl_program *prog =
ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
/* BRW_NEW_GS_PROG_DATA */
struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
brw_upload_ubo_surfaces(brw, prog, &brw->gs.base, prog_data);
}
const struct brw_tracked_state brw_gs_ubo_surfaces = {
.dirty = {
.mesa = _NEW_PROGRAM,
.brw = BRW_NEW_BATCH |
BRW_NEW_GS_PROG_DATA |
BRW_NEW_UNIFORM_BUFFER,
},
.emit = brw_upload_gs_ubo_surfaces,
};
static void
brw_upload_gs_image_surfaces(struct brw_context *brw)
{
/* BRW_NEW_GEOMETRY_PROGRAM */
const struct gl_program *gp = brw->programs[MESA_SHADER_GEOMETRY];
if (gp) {
/* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
brw_upload_image_surfaces(brw, gp, &brw->gs.base,
brw->gs.base.prog_data);
}
}
const struct brw_tracked_state brw_gs_image_surfaces = {
.dirty = {
.mesa = _NEW_TEXTURE,
.brw = BRW_NEW_BATCH |
BRW_NEW_AUX_STATE |
BRW_NEW_GEOMETRY_PROGRAM |
BRW_NEW_GS_PROG_DATA |
BRW_NEW_IMAGE_UNITS,
},
.emit = brw_upload_gs_image_surfaces,
};

View file

@ -1,122 +0,0 @@
/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef BRW_IMAGE_H
#define BRW_IMAGE_H
/** @file intel_image.h
*
* Structure definitions and prototypes for __DRIimage, the driver-private
* structure backing EGLImage or a drawable in DRI3.
*
* The __DRIimage is passed around the loader code (src/glx and src/egl), but
* it's opaque to that code and may only be accessed by loader extensions
* (mostly located in brw_screen.c).
*/
#include <stdbool.h>
#include <xf86drm.h>
#include "main/mtypes.h"
#include "brw_bufmgr.h"
#include <GL/internal/dri_interface.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Used with images created with image_from_names
* to help support planar images.
*/
struct brw_image_format {
int fourcc;
int components;
int nplanes;
struct {
int buffer_index;
int width_shift;
int height_shift;
uint32_t dri_format;
int cpp;
} planes[3];
float scaling_factor;
};
struct __DRIimageRec {
struct brw_screen *screen;
struct brw_bo *bo;
uint32_t pitch; /**< in bytes */
GLenum internal_format;
uint32_t dri_format;
GLuint format; /**< mesa_format or mesa_array_format */
uint64_t modifier; /**< fb modifier (fourcc) */
uint32_t offset;
/*
* Need to save these here between calls to
* image_from_names and calls to image_from_planar.
*/
uint32_t strides[3];
uint32_t offsets[3];
const struct brw_image_format *planar_format;
/* particular miptree level */
GLuint width;
GLuint height;
GLuint tile_x;
GLuint tile_y;
bool has_depthstencil;
bool imported_dmabuf;
/** Offset of the auxiliary compression surface in the bo. */
uint32_t aux_offset;
/** Pitch of the auxiliary compression surface. */
uint32_t aux_pitch;
/** Total size in bytes of the auxiliary compression surface. */
uint32_t aux_size;
/**
* Provided by EGL_EXT_image_dma_buf_import.
* \{
*/
enum __DRIYUVColorSpace yuv_color_space;
enum __DRISampleRange sample_range;
enum __DRIChromaSiting horizontal_siting;
enum __DRIChromaSiting vertical_siting;
/* \} */
__DRIscreen *driScrnPriv;
void *loader_private;
};
#ifdef __cplusplus
}
#endif
#endif

View file

@ -1,401 +0,0 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_context.h"
#include "compiler/brw_nir.h"
#include "brw_program.h"
#include "compiler/glsl/gl_nir.h"
#include "compiler/glsl/gl_nir_linker.h"
#include "compiler/glsl/ir.h"
#include "compiler/glsl/ir_optimization.h"
#include "compiler/glsl/program.h"
#include "compiler/nir/nir_serialize.h"
#include "program/program.h"
#include "main/glspirv.h"
#include "main/mtypes.h"
#include "main/shaderapi.h"
#include "main/shaderobj.h"
#include "main/uniforms.h"
/**
* Performs a compile of the shader stages even when we don't know
* what non-orthogonal state will be set, in the hope that it reflects
* the eventual NOS used, and thus allows us to produce link failures.
*/
static bool
brw_shader_precompile(struct gl_context *ctx,
struct gl_shader_program *sh_prog)
{
struct gl_linked_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
struct gl_linked_shader *tcs = sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
struct gl_linked_shader *tes = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
struct gl_linked_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
struct gl_linked_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
struct gl_linked_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
if (fs && !brw_fs_precompile(ctx, fs->Program))
return false;
if (gs && !brw_gs_precompile(ctx, gs->Program))
return false;
if (tes && !brw_tes_precompile(ctx, sh_prog, tes->Program))
return false;
if (tcs && !brw_tcs_precompile(ctx, sh_prog, tcs->Program))
return false;
if (vs && !brw_vs_precompile(ctx, vs->Program))
return false;
if (cs && !brw_cs_precompile(ctx, cs->Program))
return false;
return true;
}
static void
brw_lower_packing_builtins(struct brw_context *brw,
exec_list *ir)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
/* Gens < 7 don't have instructions to convert to or from half-precision,
* and Gens < 6 don't expose that functionality.
*/
if (devinfo->ver != 6)
return;
lower_packing_builtins(ir, LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16);
}
static void
process_glsl_ir(struct brw_context *brw,
struct gl_shader_program *shader_prog,
struct gl_linked_shader *shader)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
/* Temporary memory context for any new IR. */
void *mem_ctx = ralloc_context(NULL);
ralloc_adopt(mem_ctx, shader->ir);
if (shader->Stage == MESA_SHADER_FRAGMENT) {
lower_blend_equation_advanced(
shader, ctx->Extensions.KHR_blend_equation_advanced_coherent);
}
/* lower_packing_builtins() inserts arithmetic instructions, so it
* must precede lower_instructions().
*/
brw_lower_packing_builtins(brw, shader->ir);
do_mat_op_to_vec(shader->ir);
unsigned instructions_to_lower = (DIV_TO_MUL_RCP |
SUB_TO_ADD_NEG |
EXP_TO_EXP2 |
LOG_TO_LOG2 |
DFREXP_DLDEXP_TO_ARITH);
if (devinfo->ver < 7) {
instructions_to_lower |= BIT_COUNT_TO_MATH |
EXTRACT_TO_SHIFTS |
INSERT_TO_SHIFTS |
REVERSE_TO_SHIFTS;
}
lower_instructions(shader->ir, instructions_to_lower);
/* Pre-gfx6 HW can only nest if-statements 16 deep. Beyond this,
* if-statements need to be flattened.
*/
if (devinfo->ver < 6)
lower_if_to_cond_assign(shader->Stage, shader->ir, 16);
do_vec_index_to_cond_assign(shader->ir);
lower_vector_insert(shader->ir, true);
lower_offset_arrays(shader->ir);
lower_quadop_vector(shader->ir, false);
validate_ir_tree(shader->ir);
/* Now that we've finished altering the linked IR, reparent any live IR back
* to the permanent memory context, and free the temporary one (discarding any
* junk we optimized away).
*/
reparent_ir(shader->ir, shader->ir);
ralloc_free(mem_ctx);
if (ctx->_Shader->Flags & GLSL_DUMP) {
fprintf(stderr, "\n");
if (shader->ir) {
fprintf(stderr, "GLSL IR for linked %s program %d:\n",
_mesa_shader_stage_to_string(shader->Stage),
shader_prog->Name);
_mesa_print_ir(stderr, shader->ir, NULL);
} else {
fprintf(stderr, "No GLSL IR for linked %s program %d (shader may be "
"from cache)\n", _mesa_shader_stage_to_string(shader->Stage),
shader_prog->Name);
}
fprintf(stderr, "\n");
}
}
static void
unify_interfaces(struct shader_info **infos)
{
struct shader_info *prev_info = NULL;
for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) {
if (!infos[i])
continue;
if (prev_info) {
prev_info->outputs_written |= infos[i]->inputs_read &
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
infos[i]->inputs_read |= prev_info->outputs_written &
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
prev_info->patch_outputs_written |= infos[i]->patch_inputs_read;
infos[i]->patch_inputs_read |= prev_info->patch_outputs_written;
}
prev_info = infos[i];
}
}
static void
update_xfb_info(struct gl_transform_feedback_info *xfb_info,
struct shader_info *info)
{
if (!xfb_info)
return;
for (unsigned i = 0; i < xfb_info->NumOutputs; i++) {
struct gl_transform_feedback_output *output = &xfb_info->Outputs[i];
/* The VUE header contains three scalar fields packed together:
* - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
* - gl_Layer is stored in VARYING_SLOT_PSIZ.y
* - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
*/
switch (output->OutputRegister) {
case VARYING_SLOT_LAYER:
assert(output->NumComponents == 1);
output->OutputRegister = VARYING_SLOT_PSIZ;
output->ComponentOffset = 1;
break;
case VARYING_SLOT_VIEWPORT:
assert(output->NumComponents == 1);
output->OutputRegister = VARYING_SLOT_PSIZ;
output->ComponentOffset = 2;
break;
case VARYING_SLOT_PSIZ:
assert(output->NumComponents == 1);
output->ComponentOffset = 3;
break;
}
info->outputs_written |= 1ull << output->OutputRegister;
}
}
extern "C" GLboolean
brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
{
struct brw_context *brw = brw_context(ctx);
const struct brw_compiler *compiler = brw->screen->compiler;
unsigned int stage;
struct shader_info *infos[MESA_SHADER_STAGES] = { 0, };
if (shProg->data->LinkStatus == LINKING_SKIPPED)
return GL_TRUE;
for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
if (!shader)
continue;
struct gl_program *prog = shader->Program;
prog->Parameters = _mesa_new_parameter_list();
if (!shader->spirv_data)
process_glsl_ir(brw, shProg, shader);
_mesa_copy_linked_program_data(shProg, shader);
prog->ShadowSamplers = shader->shadow_samplers;
bool debug_enabled =
INTEL_DEBUG(intel_debug_flag_for_shader_stage(shader->Stage));
if (debug_enabled && shader->ir) {
fprintf(stderr, "GLSL IR for native %s shader %d:\n",
_mesa_shader_stage_to_string(shader->Stage), shProg->Name);
_mesa_print_ir(stderr, shader->ir, NULL);
fprintf(stderr, "\n\n");
}
prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
compiler->scalar_stage[stage]);
}
/* TODO: Verify if its feasible to split up the NIR linking work into a
* per-stage part (that fill out information we need for the passes) and a
* actual linking part, so that we could fold back brw_nir_lower_resources
* back into brw_create_nir.
*/
/* SPIR-V programs use a NIR linker */
if (shProg->data->spirv) {
static const gl_nir_linker_options opts = {
.fill_parameters = false,
};
if (!gl_nir_link_spirv(ctx, shProg, &opts))
return GL_FALSE;
}
for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
if (!shader)
continue;
struct gl_program *prog = shader->Program;
brw_nir_lower_resources(prog->nir, shProg, prog, &brw->screen->devinfo);
NIR_PASS_V(prog->nir, brw_nir_lower_gl_images, prog);
}
/* Determine first and last stage. */
unsigned first = MESA_SHADER_STAGES;
unsigned last = 0;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (!shProg->_LinkedShaders[i])
continue;
if (first == MESA_SHADER_STAGES)
first = i;
last = i;
}
/* Linking the stages in the opposite order (from fragment to vertex)
* ensures that inter-shader outputs written to in an earlier stage
* are eliminated if they are (transitively) not used in a later
* stage.
*
* TODO: Look into Shadow of Mordor regressions on HSW and enable this for
* all platforms. See: https://bugs.freedesktop.org/show_bug.cgi?id=103537
*/
if (first != last && brw->screen->devinfo.ver >= 8) {
int next = last;
for (int i = next - 1; i >= 0; i--) {
if (shProg->_LinkedShaders[i] == NULL)
continue;
brw_nir_link_shaders(compiler,
shProg->_LinkedShaders[i]->Program->nir,
shProg->_LinkedShaders[next]->Program->nir);
next = i;
}
}
for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
if (!shader)
continue;
struct gl_program *prog = shader->Program;
_mesa_update_shader_textures_used(shProg, prog);
brw_shader_gather_info(prog->nir, prog);
NIR_PASS_V(prog->nir, gl_nir_lower_atomics, shProg, false);
NIR_PASS_V(prog->nir, nir_lower_atomics_to_ssbo);
nir_sweep(prog->nir);
infos[stage] = &prog->nir->info;
update_xfb_info(prog->sh.LinkedTransformFeedback, infos[stage]);
/* Make a pass over the IR to add state references for any built-in
* uniforms that are used. This has to be done now (during linking).
* Code generation doesn't happen until the first time this shader is
* used for rendering. Waiting until then to generate the parameters is
* too late. At that point, the values for the built-in uniforms won't
* get sent to the shader.
*/
nir_foreach_uniform_variable(var, prog->nir) {
const nir_state_slot *const slots = var->state_slots;
for (unsigned int i = 0; i < var->num_state_slots; i++) {
assert(slots != NULL);
_mesa_add_state_reference(prog->Parameters, slots[i].tokens);
}
}
}
/* The linker tries to dead code eliminate unused varying components,
* and make sure interfaces match. But it isn't able to do so in all
* cases. So, explicitly make the interfaces match by OR'ing together
* the inputs_read/outputs_written bitfields of adjacent stages.
*/
if (!shProg->SeparateShader)
unify_interfaces(infos);
if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
for (unsigned i = 0; i < shProg->NumShaders; i++) {
const struct gl_shader *sh = shProg->Shaders[i];
if (!sh)
continue;
fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
_mesa_shader_stage_to_string(sh->Stage),
i, shProg->Name);
fprintf(stderr, "%s", sh->Source);
fprintf(stderr, "\n");
}
}
if (brw->precompile && !brw_shader_precompile(ctx, shProg))
return GL_FALSE;
/* SPIR-V programs build its resource list from linked NIR shaders. */
if (!shProg->data->spirv)
build_program_resource_list(ctx, shProg, false);
else
nir_build_program_resource_list(ctx, shProg, true);
for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
if (!shader)
continue;
/* The GLSL IR won't be needed anymore. */
ralloc_free(shader->ir);
shader->ir = NULL;
}
return GL_TRUE;
}

View file

@ -1,422 +0,0 @@
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_fbo.h"
#include "brw_meta_util.h"
#include "brw_state.h"
#include "main/blend.h"
#include "main/fbobject.h"
#include "util/format_srgb.h"
/**
* Helper function for handling mirror image blits.
*
* If coord0 > coord1, swap them and invert the "mirror" boolean.
*/
static inline void
fixup_mirroring(bool *mirror, float *coord0, float *coord1)
{
if (*coord0 > *coord1) {
*mirror = !*mirror;
float tmp = *coord0;
*coord0 = *coord1;
*coord1 = tmp;
}
}
/**
* Compute the number of pixels to clip for each side of a rect
*
* \param x0 The rect's left coordinate
* \param y0 The rect's bottom coordinate
* \param x1 The rect's right coordinate
* \param y1 The rect's top coordinate
* \param min_x The clipping region's left coordinate
* \param min_y The clipping region's bottom coordinate
* \param max_x The clipping region's right coordinate
* \param max_y The clipping region's top coordinate
* \param clipped_x0 The number of pixels to clip from the left side
* \param clipped_y0 The number of pixels to clip from the bottom side
* \param clipped_x1 The number of pixels to clip from the right side
* \param clipped_y1 The number of pixels to clip from the top side
*
* \return false if we clip everything away, true otherwise
*/
static inline bool
compute_pixels_clipped(float x0, float y0, float x1, float y1,
float min_x, float min_y, float max_x, float max_y,
float *clipped_x0, float *clipped_y0, float *clipped_x1, float *clipped_y1)
{
/* If we are going to clip everything away, stop. */
if (!(min_x <= max_x &&
min_y <= max_y &&
x0 <= max_x &&
y0 <= max_y &&
min_x <= x1 &&
min_y <= y1 &&
x0 <= x1 &&
y0 <= y1)) {
return false;
}
if (x0 < min_x)
*clipped_x0 = min_x - x0;
else
*clipped_x0 = 0;
if (max_x < x1)
*clipped_x1 = x1 - max_x;
else
*clipped_x1 = 0;
if (y0 < min_y)
*clipped_y0 = min_y - y0;
else
*clipped_y0 = 0;
if (max_y < y1)
*clipped_y1 = y1 - max_y;
else
*clipped_y1 = 0;
return true;
}
/**
* Clips a coordinate (left, right, top or bottom) for the src or dst rect
* (whichever requires the largest clip) and adjusts the coordinate
* for the other rect accordingly.
*
* \param mirror true if mirroring is required
* \param src the source rect coordinate (for example srcX0)
* \param dst0 the dst rect coordinate (for example dstX0)
* \param dst1 the opposite dst rect coordinate (for example dstX1)
* \param clipped_src0 number of pixels to clip from the src coordinate
* \param clipped_dst0 number of pixels to clip from the dst coordinate
* \param clipped_dst1 number of pixels to clip from the opposite dst coordinate
* \param scale the src vs dst scale involved for that coordinate
* \param isLeftOrBottom true if we are clipping the left or bottom sides
* of the rect.
*/
static inline void
clip_coordinates(bool mirror,
float *src, float *dst0, float *dst1,
float clipped_src0,
float clipped_dst0,
float clipped_dst1,
float scale,
bool isLeftOrBottom)
{
/* When clipping we need to add or subtract pixels from the original
* coordinates depending on whether we are acting on the left/bottom
* or right/top sides of the rect respectively. We assume we have to
* add them in the code below, and multiply by -1 when we should
* subtract.
*/
int mult = isLeftOrBottom ? 1 : -1;
if (!mirror) {
if (clipped_src0 >= clipped_dst0 * scale) {
*src += clipped_src0 * mult;
*dst0 += clipped_src0 / scale * mult;
} else {
*dst0 += clipped_dst0 * mult;
*src += clipped_dst0 * scale * mult;
}
} else {
if (clipped_src0 >= clipped_dst1 * scale) {
*src += clipped_src0 * mult;
*dst1 -= clipped_src0 / scale * mult;
} else {
*dst1 -= clipped_dst1 * mult;
*src += clipped_dst1 * scale * mult;
}
}
}
bool
brw_meta_mirror_clip_and_scissor(const struct gl_context *ctx,
const struct gl_framebuffer *read_fb,
const struct gl_framebuffer *draw_fb,
GLfloat *srcX0, GLfloat *srcY0,
GLfloat *srcX1, GLfloat *srcY1,
GLfloat *dstX0, GLfloat *dstY0,
GLfloat *dstX1, GLfloat *dstY1,
bool *mirror_x, bool *mirror_y)
{
*mirror_x = false;
*mirror_y = false;
/* Detect if the blit needs to be mirrored */
fixup_mirroring(mirror_x, srcX0, srcX1);
fixup_mirroring(mirror_x, dstX0, dstX1);
fixup_mirroring(mirror_y, srcY0, srcY1);
fixup_mirroring(mirror_y, dstY0, dstY1);
/* Compute number of pixels to clip for each side of both rects. Return
* early if we are going to clip everything away.
*/
float clip_src_x0;
float clip_src_x1;
float clip_src_y0;
float clip_src_y1;
float clip_dst_x0;
float clip_dst_x1;
float clip_dst_y0;
float clip_dst_y1;
if (!compute_pixels_clipped(*srcX0, *srcY0, *srcX1, *srcY1,
0, 0, read_fb->Width, read_fb->Height,
&clip_src_x0, &clip_src_y0, &clip_src_x1, &clip_src_y1))
return true;
if (!compute_pixels_clipped(*dstX0, *dstY0, *dstX1, *dstY1,
draw_fb->_Xmin, draw_fb->_Ymin, draw_fb->_Xmax, draw_fb->_Ymax,
&clip_dst_x0, &clip_dst_y0, &clip_dst_x1, &clip_dst_y1))
return true;
/* When clipping any of the two rects we need to adjust the coordinates in
* the other rect considering the scaling factor involved. To obtain the best
* precision we want to make sure that we only clip once per side to avoid
* accumulating errors due to the scaling adjustment.
*
* For example, if srcX0 and dstX0 need both to be clipped we want to avoid
* the situation where we clip srcX0 first, then adjust dstX0 accordingly
* but then we realize that the resulting dstX0 still needs to be clipped,
* so we clip dstX0 and adjust srcX0 again. Because we are applying scaling
* factors to adjust the coordinates in each clipping pass we lose some
* precision and that can affect the results of the blorp blit operation
* slightly. What we want to do here is detect the rect that we should
* clip first for each side so that when we adjust the other rect we ensure
* the resulting coordinate does not need to be clipped again.
*
* The code below implements this by comparing the number of pixels that
* we need to clip for each side of both rects considering the scales
* involved. For example, clip_src_x0 represents the number of pixels to be
* clipped for the src rect's left side, so if clip_src_x0 = 5,
* clip_dst_x0 = 4 and scaleX = 2 it means that we are clipping more from
* the dst rect so we should clip dstX0 only and adjust srcX0. This is
* because clipping 4 pixels in the dst is equivalent to clipping
* 4 * 2 = 8 > 5 in the src.
*/
if (*srcX0 == *srcX1 || *srcY0 == *srcY1
|| *dstX0 == *dstX1 || *dstY0 == *dstY1)
return true;
float scaleX = (float) (*srcX1 - *srcX0) / (*dstX1 - *dstX0);
float scaleY = (float) (*srcY1 - *srcY0) / (*dstY1 - *dstY0);
/* Clip left side */
clip_coordinates(*mirror_x,
srcX0, dstX0, dstX1,
clip_src_x0, clip_dst_x0, clip_dst_x1,
scaleX, true);
/* Clip right side */
clip_coordinates(*mirror_x,
srcX1, dstX1, dstX0,
clip_src_x1, clip_dst_x1, clip_dst_x0,
scaleX, false);
/* Clip bottom side */
clip_coordinates(*mirror_y,
srcY0, dstY0, dstY1,
clip_src_y0, clip_dst_y0, clip_dst_y1,
scaleY, true);
/* Clip top side */
clip_coordinates(*mirror_y,
srcY1, dstY1, dstY0,
clip_src_y1, clip_dst_y1, clip_dst_y0,
scaleY, false);
/* Account for the fact that in the system framebuffer, the origin is at
* the lower left.
*/
if (read_fb->FlipY) {
GLint tmp = read_fb->Height - *srcY0;
*srcY0 = read_fb->Height - *srcY1;
*srcY1 = tmp;
*mirror_y = !*mirror_y;
}
if (draw_fb->FlipY) {
GLint tmp = draw_fb->Height - *dstY0;
*dstY0 = draw_fb->Height - *dstY1;
*dstY1 = tmp;
*mirror_y = !*mirror_y;
}
/* Check for invalid bounds
* Can't blit for 0-dimensions
*/
return *srcX0 == *srcX1 || *srcY0 == *srcY1
|| *dstX0 == *dstX1 || *dstY0 == *dstY1;
}
/**
* Determine if fast color clear supports the given clear color.
*
* Fast color clear can only clear to color values of 1.0 or 0.0. At the
* moment we only support floating point, unorm, and snorm buffers.
*/
bool
brw_is_color_fast_clear_compatible(struct brw_context *brw,
const struct brw_mipmap_tree *mt,
const union gl_color_union *color)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
const struct gl_context *ctx = &brw->ctx;
/* If we're mapping the render format to a different format than the
* format we use for texturing then it is a bit questionable whether it
* should be possible to use a fast clear. Although we only actually
* render using a renderable format, without the override workaround it
* wouldn't be possible to have a non-renderable surface in a fast clear
* state so the hardware probably legitimately doesn't need to support
* this case. At least on Gfx9 this really does seem to cause problems.
*/
if (devinfo->ver >= 9 &&
brw_isl_format_for_mesa_format(mt->format) !=
brw->mesa_to_isl_render_format[mt->format])
return false;
const mesa_format format = _mesa_get_render_format(ctx, mt->format);
if (_mesa_is_format_integer_color(format)) {
if (devinfo->ver >= 8) {
perf_debug("Integer fast clear not enabled for (%s)",
_mesa_get_format_name(format));
}
return false;
}
for (int i = 0; i < 4; i++) {
if (!_mesa_format_has_color_component(format, i)) {
continue;
}
if (devinfo->ver < 9 &&
color->f[i] != 0.0f && color->f[i] != 1.0f) {
return false;
}
}
return true;
}
/**
* Convert the given color to a bitfield suitable for ORing into DWORD 7 of
* SURFACE_STATE (DWORD 12-15 on SKL+).
*/
union isl_color_value
brw_meta_convert_fast_clear_color(const struct brw_context *brw,
const struct brw_mipmap_tree *mt,
const union gl_color_union *color)
{
union isl_color_value override_color = {
.u32 = {
color->ui[0],
color->ui[1],
color->ui[2],
color->ui[3],
},
};
/* The sampler doesn't look at the format of the surface when the fast
* clear color is used so we need to implement luminance, intensity and
* missing components manually.
*/
switch (_mesa_get_format_base_format(mt->format)) {
case GL_INTENSITY:
override_color.u32[3] = override_color.u32[0];
FALLTHROUGH;
case GL_LUMINANCE:
case GL_LUMINANCE_ALPHA:
override_color.u32[1] = override_color.u32[0];
override_color.u32[2] = override_color.u32[0];
break;
default:
for (int i = 0; i < 3; i++) {
if (!_mesa_format_has_color_component(mt->format, i))
override_color.u32[i] = 0;
}
break;
}
switch (_mesa_get_format_datatype(mt->format)) {
case GL_UNSIGNED_NORMALIZED:
for (int i = 0; i < 4; i++)
override_color.f32[i] = SATURATE(override_color.f32[i]);
break;
case GL_SIGNED_NORMALIZED:
for (int i = 0; i < 4; i++)
override_color.f32[i] = CLAMP(override_color.f32[i], -1.0f, 1.0f);
break;
case GL_UNSIGNED_INT:
for (int i = 0; i < 4; i++) {
unsigned bits = _mesa_get_format_bits(mt->format, GL_RED_BITS + i);
if (bits < 32) {
uint32_t max = (1u << bits) - 1;
override_color.u32[i] = MIN2(override_color.u32[i], max);
}
}
break;
case GL_INT:
for (int i = 0; i < 4; i++) {
unsigned bits = _mesa_get_format_bits(mt->format, GL_RED_BITS + i);
if (bits < 32) {
int32_t max = (1 << (bits - 1)) - 1;
int32_t min = -(1 << (bits - 1));
override_color.i32[i] = CLAMP(override_color.i32[i], min, max);
}
}
break;
case GL_FLOAT:
if (!_mesa_is_format_signed(mt->format)) {
for (int i = 0; i < 4; i++)
override_color.f32[i] = MAX2(override_color.f32[i], 0.0f);
}
break;
}
if (!_mesa_format_has_color_component(mt->format, 3)) {
if (_mesa_is_format_integer_color(mt->format))
override_color.u32[3] = 1;
else
override_color.f32[3] = 1.0f;
}
/* Handle linear to SRGB conversion */
if (brw->ctx.Color.sRGBEnabled &&
_mesa_get_srgb_format_linear(mt->format) != mt->format) {
for (int i = 0; i < 3; i++) {
override_color.f32[i] =
util_format_linear_to_srgb_float(override_color.f32[i]);
}
}
return override_color;
}

View file

@ -1,59 +0,0 @@
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef BRW_META_UTIL_H
#define BRW_META_UTIL_H
#include <stdbool.h>
#include "main/mtypes.h"
#include "brw_mipmap_tree.h"
#ifdef __cplusplus
extern "C" {
#endif
bool
brw_meta_mirror_clip_and_scissor(const struct gl_context *ctx,
const struct gl_framebuffer *read_fb,
const struct gl_framebuffer *draw_fb,
GLfloat *srcX0, GLfloat *srcY0,
GLfloat *srcX1, GLfloat *srcY1,
GLfloat *dstX0, GLfloat *dstY0,
GLfloat *dstX1, GLfloat *dstY1,
bool *mirror_x, bool *mirror_y);
union isl_color_value
brw_meta_convert_fast_clear_color(const struct brw_context *brw,
const struct brw_mipmap_tree *mt,
const union gl_color_union *color);
bool
brw_is_color_fast_clear_compatible(struct brw_context *brw,
const struct brw_mipmap_tree *mt,
const union gl_color_union *color);
#ifdef __cplusplus
}
#endif
#endif /* BRW_META_UTIL_H */

File diff suppressed because it is too large Load diff

View file

@ -1,741 +0,0 @@
/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/** @file intel_mipmap_tree.h
*
* This file defines the structure that wraps a BO and describes how the
* mipmap levels and slices of a texture are laid out.
*
* The hardware has a fixed layout of a texture depending on parameters such
* as the target/type (2D, 3D, CUBE), width, height, pitch, and number of
* mipmap levels. The individual level/layer slices are each 2D rectangles of
* pixels at some x/y offset from the start of the brw_bo.
*
* Original OpenGL allowed texture miplevels to be specified in arbitrary
* order, and a texture may change size over time. Thus, each
* brw_texture_image has a reference to a miptree that contains the pixel
* data sized appropriately for it, which will later be referenced by/copied
* to the brw_texture_object at draw time (brw_finalize_mipmap_tree()) so
* that there's a single miptree for the complete texture.
*/
#ifndef BRW_MIPMAP_TREE_H
#define BRW_MIPMAP_TREE_H
#include <assert.h>
#include "main/mtypes.h"
#include "isl/isl.h"
#include "blorp/blorp.h"
#include "brw_bufmgr.h"
#include "brw_context.h"
#include <GL/internal/dri_interface.h>
#ifdef __cplusplus
extern "C" {
#endif
struct brw_context;
struct brw_renderbuffer;
struct brw_texture_image;
/**
* This bit extends the set of GL_MAP_*_BIT enums.
*
* When calling brw_miptree_map() on an ETC-transcoded-to-RGB miptree or a
* depthstencil-split-to-separate-stencil miptree, we'll normally make a
* temporary and recreate the kind of data requested by Mesa core, since we're
* satisfying some glGetTexImage() request or something.
*
* However, occasionally you want to actually map the miptree's current data
* without transcoding back. This flag to brw_miptree_map() gets you that.
*/
#define BRW_MAP_DIRECT_BIT 0x80000000
struct brw_miptree_map {
/** Bitfield of GL_MAP_*_BIT and BRW_MAP_*_BIT. */
GLbitfield mode;
/** Region of interest for the map. */
int x, y, w, h;
/** Possibly malloced temporary buffer for the mapping. */
void *buffer;
/** Possible pointer to a temporary linear miptree for the mapping. */
struct brw_mipmap_tree *linear_mt;
/** Pointer to the start of (map_x, map_y) returned by the mapping. */
void *ptr;
/** Stride of the mapping. */
int stride;
void (*unmap)(struct brw_context *brw,
struct brw_mipmap_tree *mt,
struct brw_miptree_map *map,
unsigned int level,
unsigned int slice);
};
/**
* Describes the location of each texture image within a miptree.
*/
struct brw_mipmap_level
{
/** Offset to this miptree level, used in computing x_offset. */
GLuint level_x;
/** Offset to this miptree level, used in computing y_offset. */
GLuint level_y;
/**
* \brief Is HiZ enabled for this level?
*
* If \c mt->level[l].has_hiz is set, then (1) \c mt->hiz_mt has been
* allocated and (2) the HiZ memory for the slices in this level reside at
* \c mt->hiz_mt->level[l].
*/
bool has_hiz;
/**
* \brief List of 2D images in this mipmap level.
*
* This may be a list of cube faces, array slices in 2D array texture, or
* layers in a 3D texture. The list's length is \c depth.
*/
struct brw_mipmap_slice {
/**
* Mapping information. Persistent for the duration of
* brw_miptree_map/unmap on this slice.
*/
struct brw_miptree_map *map;
} *slice;
};
/**
* Miptree aux buffer. These buffers are associated with a miptree, but the
* format is managed by the hardware.
*
* For Gfx7+, we always give the hardware the start of the buffer, and let it
* handle all accesses to the buffer. Therefore we don't need the full miptree
* layout structure for this buffer.
*/
struct brw_miptree_aux_buffer
{
struct isl_surf surf;
/**
* Buffer object containing the pixel data.
*
* @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
* @see 3DSTATE_HIER_DEPTH_BUFFER.AuxiliarySurfaceBaseAddress
*/
struct brw_bo *bo;
/**
* Offset into bo where the surface starts.
*
* @see brw_mipmap_aux_buffer::bo
*
* @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
* @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
* @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
* @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
*/
uint32_t offset;
/**
* Buffer object containing the indirect clear color.
*
* @see create_ccs_buf_for_image
* @see RENDER_SURFACE_STATE.ClearValueAddress
*/
struct brw_bo *clear_color_bo;
/**
* Offset into bo where the clear color can be found.
*
* @see create_ccs_buf_for_image
* @see RENDER_SURFACE_STATE.ClearValueAddress
*/
uint32_t clear_color_offset;
};
struct brw_mipmap_tree
{
struct isl_surf surf;
/**
* Buffer object containing the surface.
*
* @see brw_mipmap_tree::offset
* @see RENDER_SURFACE_STATE.SurfaceBaseAddress
* @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
* @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
* @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
* @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
*/
struct brw_bo *bo;
/**
* @brief One of GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, etc.
*
* @see RENDER_SURFACE_STATE.SurfaceType
* @see RENDER_SURFACE_STATE.SurfaceArray
* @see 3DSTATE_DEPTH_BUFFER.SurfaceType
*/
GLenum target;
/**
* Generally, this is just the same as the gl_texture_image->TexFormat or
* gl_renderbuffer->Format.
*
* However, for textures and renderbuffers with packed depth/stencil formats
* on hardware where we want or need to use separate stencil, there will be
* two miptrees for storing the data. If the depthstencil texture or rb is
* MESA_FORMAT_Z32_FLOAT_S8X24_UINT, then mt->format will be
* MESA_FORMAT_Z_FLOAT32, otherwise for MESA_FORMAT_Z24_UNORM_S8_UINT objects it will be
* MESA_FORMAT_Z24_UNORM_X8_UINT.
*
* @see RENDER_SURFACE_STATE.SurfaceFormat
* @see 3DSTATE_DEPTH_BUFFER.SurfaceFormat
*/
mesa_format format;
GLuint first_level;
GLuint last_level;
/** Bytes per pixel (or bytes per block if compressed) */
GLuint cpp;
bool compressed;
/* Includes image offset tables: */
struct brw_mipmap_level level[MAX_TEXTURE_LEVELS];
/**
* Offset into bo where the surface starts.
*
* @see brw_mipmap_tree::bo
*
* @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
* @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
* @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
* @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
*/
uint32_t offset;
/**
* \brief The type of auxiliary compression used by this miptree.
*
* This describes the type of auxiliary compression that is intended to be
* used by this miptree. An aux usage of ISL_AUX_USAGE_NONE means that
* auxiliary compression is permanently disabled. An aux usage other than
* ISL_AUX_USAGE_NONE does not imply that the auxiliary buffer has actually
* been allocated nor does it imply that auxiliary compression will always
* be enabled for this surface. For instance, with CCS_D, we may allocate
* the CCS on-the-fly and it may not be used for texturing if the miptree
* is fully resolved.
*/
enum isl_aux_usage aux_usage;
/**
* \brief Whether or not this miptree supports fast clears.
*/
bool supports_fast_clear;
/**
* \brief Maps miptree slices to their current aux state
*
* This two-dimensional array is indexed as [level][layer] and stores an
* aux state for each slice.
*/
enum isl_aux_state **aux_state;
/**
* \brief Stencil miptree for depthstencil textures.
*
* This miptree is used for depthstencil textures and renderbuffers that
* require separate stencil. It always has the true copy of the stencil
* bits, regardless of mt->format.
*
* \see 3DSTATE_STENCIL_BUFFER
* \see brw_miptree_map_depthstencil()
* \see brw_miptree_unmap_depthstencil()
*/
struct brw_mipmap_tree *stencil_mt;
/**
* \brief Shadow miptree for sampling when the main isn't supported by HW.
*
* To workaround various sampler bugs and limitations, we blit the main
* texture into a new texture that can be sampled.
*
* This miptree may be used for:
* - Stencil texturing (pre-BDW) as required by GL_ARB_stencil_texturing.
* - To store the decompressed ETC/EAC data in case we emulate the ETC
* compression on Gen 7 or earlier GPUs.
*/
struct brw_mipmap_tree *shadow_mt;
bool shadow_needs_update;
/**
* \brief CCS, MCS, or HiZ auxiliary buffer.
*
* NULL if no auxiliary buffer is in use for this surface.
*
* For single-sampled color miptrees:
* This buffer contains the Color Control Surface, which stores the
* necessary information to implement lossless color compression (CCS_E)
* and "fast color clear" (CCS_D) behaviour.
*
* For multi-sampled color miptrees:
* This buffer contains the Multisample Control Surface, which stores the
* necessary information to implement compressed MSAA
* (INTEL_MSAA_FORMAT_CMS).
*
* For depth miptrees:
* This buffer contains the Hierarchical Depth Buffer, which stores the
* necessary information to implement lossless depth compression and fast
* depth clear behavior.
*
* To determine if HiZ is enabled, do not check this pointer. Instead,
* use brw_miptree_level_has_hiz().
*/
struct brw_miptree_aux_buffer *aux_buf;
/**
* Planes 1 and 2 in case this is a planar surface.
*/
struct brw_mipmap_tree *plane[2];
/**
* Fast clear color for this surface. For depth surfaces, the clear value
* is stored as a float32 in the red component.
*/
union isl_color_value fast_clear_color;
/**
* For external surfaces, this is DRM format modifier that was used to
* create or import the surface. For internal surfaces, this will always
* be DRM_FORMAT_MOD_INVALID.
*/
uint64_t drm_modifier;
/* These are also refcounted:
*/
GLuint refcount;
};
bool
brw_miptree_alloc_aux(struct brw_context *brw,
struct brw_mipmap_tree *mt);
enum brw_miptree_create_flags {
/** No miptree create flags */
MIPTREE_CREATE_DEFAULT = 0,
/** Miptree creation should try to allocate a currently busy BO
*
* This may be advantageous if we know the next thing to touch the BO will
* be the GPU because the BO will likely already be in the GTT and maybe
* even in some caches. If there is a chance that the next thing to touch
* the miptree BO will be the CPU, this flag should not be set.
*/
MIPTREE_CREATE_BUSY = 1 << 0,
/** Create the miptree with auxiliary compression disabled
*
* This does not prevent the caller of brw_miptree_create from coming
* along later and turning auxiliary compression back on but it does mean
* that the miptree will be created with mt->aux_usage == NONE.
*/
MIPTREE_CREATE_NO_AUX = 1 << 1,
};
struct brw_mipmap_tree *brw_miptree_create(struct brw_context *brw,
GLenum target,
mesa_format format,
GLuint first_level,
GLuint last_level,
GLuint width0,
GLuint height0,
GLuint depth0,
GLuint num_samples,
enum brw_miptree_create_flags flags);
struct brw_mipmap_tree *
brw_miptree_create_for_bo(struct brw_context *brw,
struct brw_bo *bo,
mesa_format format,
uint32_t offset,
uint32_t width,
uint32_t height,
uint32_t depth,
int pitch,
enum isl_tiling tiling,
enum brw_miptree_create_flags flags);
struct brw_mipmap_tree *
brw_miptree_create_for_dri_image(struct brw_context *brw,
__DRIimage *image,
GLenum target,
mesa_format format,
bool allow_internal_aux);
bool
brw_update_winsys_renderbuffer_miptree(struct brw_context *intel,
struct brw_renderbuffer *irb,
struct brw_mipmap_tree *singlesample_mt,
uint32_t width, uint32_t height,
uint32_t pitch);
/**
* Create a miptree appropriate as the storage for a non-texture renderbuffer.
* The miptree has the following properties:
* - The target is GL_TEXTURE_2D.
* - There are no levels other than the base level 0.
* - Depth is 1.
*/
struct brw_mipmap_tree*
brw_miptree_create_for_renderbuffer(struct brw_context *brw,
mesa_format format,
uint32_t width,
uint32_t height,
uint32_t num_samples);
mesa_format
brw_depth_format_for_depthstencil_format(mesa_format format);
mesa_format
brw_lower_compressed_format(struct brw_context *brw, mesa_format format);
unsigned
brw_get_num_logical_layers(const struct brw_mipmap_tree *mt, unsigned level);
/** \brief Assert that the level and layer are valid for the miptree. */
void
brw_miptree_check_level_layer(const struct brw_mipmap_tree *mt,
uint32_t level,
uint32_t layer);
void brw_miptree_reference(struct brw_mipmap_tree **dst,
struct brw_mipmap_tree *src);
void brw_miptree_release(struct brw_mipmap_tree **mt);
/* Check if an image fits an existing mipmap tree layout
*/
bool brw_miptree_match_image(struct brw_mipmap_tree *mt,
struct gl_texture_image *image);
void
brw_miptree_get_image_offset(const struct brw_mipmap_tree *mt,
GLuint level, GLuint slice,
GLuint *x, GLuint *y);
enum isl_surf_dim
get_isl_surf_dim(GLenum target);
enum isl_dim_layout
get_isl_dim_layout(const struct intel_device_info *devinfo,
enum isl_tiling tiling, GLenum target);
void
brw_get_image_dims(struct gl_texture_image *image,
int *width, int *height, int *depth);
uint32_t
brw_miptree_get_tile_offsets(const struct brw_mipmap_tree *mt,
GLuint level, GLuint slice,
uint32_t *tile_x,
uint32_t *tile_y);
uint32_t
brw_miptree_get_aligned_offset(const struct brw_mipmap_tree *mt,
uint32_t x, uint32_t y);
void
brw_miptree_copy_slice(struct brw_context *brw,
struct brw_mipmap_tree *src_mt,
unsigned src_level, unsigned src_layer,
struct brw_mipmap_tree *dst_mt,
unsigned dst_level, unsigned dst_layer);
void
brw_miptree_copy_teximage(struct brw_context *brw,
struct brw_texture_image *brw_image,
struct brw_mipmap_tree *dst_mt);
/**
* \name Miptree HiZ functions
* \{
*
* It is safe to call the "slice_set_need_resolve" and "slice_resolve"
* functions on a miptree without HiZ. In that case, each function is a no-op.
*/
bool
brw_miptree_level_has_hiz(const struct brw_mipmap_tree *mt, uint32_t level);
/**\}*/
bool
brw_miptree_has_color_unresolved(const struct brw_mipmap_tree *mt,
unsigned start_level, unsigned num_levels,
unsigned start_layer, unsigned num_layers);
#define INTEL_REMAINING_LAYERS UINT32_MAX
#define INTEL_REMAINING_LEVELS UINT32_MAX
/** Prepare a miptree for access
*
* This function should be called prior to any access to miptree in order to
* perform any needed resolves.
*
* \param[in] start_level The first mip level to be accessed
*
* \param[in] num_levels The number of miplevels to be accessed or
* INTEL_REMAINING_LEVELS to indicate every level
* above start_level will be accessed
*
* \param[in] start_layer The first array slice or 3D layer to be accessed
*
* \param[in] num_layers The number of array slices or 3D layers be
* accessed or INTEL_REMAINING_LAYERS to indicate
* every layer above start_layer will be accessed
*
* \param[in] aux_supported Whether or not the access will support the
* miptree's auxiliary compression format; this
* must be false for uncompressed miptrees
*
* \param[in] fast_clear_supported Whether or not the access will support
* fast clears in the miptree's auxiliary
* compression format
*/
void
brw_miptree_prepare_access(struct brw_context *brw,
struct brw_mipmap_tree *mt,
uint32_t start_level, uint32_t num_levels,
uint32_t start_layer, uint32_t num_layers,
enum isl_aux_usage aux_usage,
bool fast_clear_supported);
/** Complete a write operation
*
* This function should be called after any operation writes to a miptree.
* This will update the miptree's compression state so that future resolves
* happen correctly. Technically, this function can be called before the
* write occurs but the caller must ensure that they don't interlace
* brw_miptree_prepare_access and brw_miptree_finish_write calls to
* overlapping layer/level ranges.
*
* \param[in] level The mip level that was written
*
* \param[in] start_layer The first array slice or 3D layer written
*
* \param[in] num_layers The number of array slices or 3D layers
* written or INTEL_REMAINING_LAYERS to indicate
* every layer above start_layer was written
*
* \param[in] written_with_aux Whether or not the write was done with
* auxiliary compression enabled
*/
void
brw_miptree_finish_write(struct brw_context *brw,
struct brw_mipmap_tree *mt, uint32_t level,
uint32_t start_layer, uint32_t num_layers,
enum isl_aux_usage aux_usage);
/** Get the auxiliary compression state of a miptree slice */
enum isl_aux_state
brw_miptree_get_aux_state(const struct brw_mipmap_tree *mt,
uint32_t level, uint32_t layer);
/** Set the auxiliary compression state of a miptree slice range
*
* This function directly sets the auxiliary compression state of a slice
* range of a miptree. It only modifies data structures and does not do any
* resolves. This should only be called by code which directly performs
* compression operations such as fast clears and resolves. Most code should
* use brw_miptree_prepare_access or brw_miptree_finish_write.
*/
void
brw_miptree_set_aux_state(struct brw_context *brw,
struct brw_mipmap_tree *mt, uint32_t level,
uint32_t start_layer, uint32_t num_layers,
enum isl_aux_state aux_state);
/**
* Prepare a miptree for raw access
*
* This helper prepares the miptree for access that knows nothing about any
* sort of compression whatsoever. This is useful when mapping the surface or
* using it with the blitter.
*/
static inline void
brw_miptree_access_raw(struct brw_context *brw,
struct brw_mipmap_tree *mt,
uint32_t level, uint32_t layer,
bool write)
{
brw_miptree_prepare_access(brw, mt, level, 1, layer, 1,
ISL_AUX_USAGE_NONE, false);
if (write)
brw_miptree_finish_write(brw, mt, level, layer, 1, ISL_AUX_USAGE_NONE);
}
enum isl_aux_usage
brw_miptree_texture_aux_usage(struct brw_context *brw,
struct brw_mipmap_tree *mt,
enum isl_format view_format,
enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits);
void
brw_miptree_prepare_texture(struct brw_context *brw,
struct brw_mipmap_tree *mt,
enum isl_format view_format,
uint32_t start_level, uint32_t num_levels,
uint32_t start_layer, uint32_t num_layers,
enum gfx9_astc5x5_wa_tex_type astc5x5_wa_bits);
void
brw_miptree_prepare_image(struct brw_context *brw,
struct brw_mipmap_tree *mt);
enum isl_aux_usage
brw_miptree_render_aux_usage(struct brw_context *brw,
struct brw_mipmap_tree *mt,
enum isl_format render_format,
bool blend_enabled,
bool draw_aux_disabled);
void
brw_miptree_prepare_render(struct brw_context *brw,
struct brw_mipmap_tree *mt, uint32_t level,
uint32_t start_layer, uint32_t layer_count,
enum isl_aux_usage aux_usage);
void
brw_miptree_finish_render(struct brw_context *brw,
struct brw_mipmap_tree *mt, uint32_t level,
uint32_t start_layer, uint32_t layer_count,
enum isl_aux_usage aux_usage);
void
brw_miptree_prepare_depth(struct brw_context *brw,
struct brw_mipmap_tree *mt, uint32_t level,
uint32_t start_layer, uint32_t layer_count);
void
brw_miptree_finish_depth(struct brw_context *brw,
struct brw_mipmap_tree *mt, uint32_t level,
uint32_t start_layer, uint32_t layer_count,
bool depth_written);
void
brw_miptree_prepare_external(struct brw_context *brw,
struct brw_mipmap_tree *mt);
void
brw_miptree_finish_external(struct brw_context *brw,
struct brw_mipmap_tree *mt);
void
brw_miptree_make_shareable(struct brw_context *brw,
struct brw_mipmap_tree *mt);
void
brw_miptree_updownsample(struct brw_context *brw,
struct brw_mipmap_tree *src,
struct brw_mipmap_tree *dst);
void
brw_update_r8stencil(struct brw_context *brw,
struct brw_mipmap_tree *mt);
void
brw_miptree_map(struct brw_context *brw,
struct brw_mipmap_tree *mt,
unsigned int level,
unsigned int slice,
unsigned int x,
unsigned int y,
unsigned int w,
unsigned int h,
GLbitfield mode,
void **out_ptr,
ptrdiff_t *out_stride);
void
brw_miptree_unmap(struct brw_context *brw,
struct brw_mipmap_tree *mt,
unsigned int level,
unsigned int slice);
bool
brw_miptree_sample_with_hiz(struct brw_context *brw,
struct brw_mipmap_tree *mt);
bool
brw_miptree_set_clear_color(struct brw_context *brw,
struct brw_mipmap_tree *mt,
union isl_color_value clear_color);
/* Get a clear color suitable for filling out an ISL surface state. */
union isl_color_value
brw_miptree_get_clear_color(const struct brw_mipmap_tree *mt,
struct brw_bo **clear_color_bo,
uint64_t *clear_color_offset);
static inline int
brw_miptree_blt_pitch(struct brw_mipmap_tree *mt)
{
int pitch = mt->surf.row_pitch_B;
if (mt->surf.tiling != ISL_TILING_LINEAR)
pitch /= 4;
return pitch;
}
isl_memcpy_type
brw_miptree_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type,
uint32_t *cpp);
static inline bool
brw_miptree_needs_fake_etc(struct brw_context *brw,
struct brw_mipmap_tree *mt)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
bool is_etc = _mesa_is_format_etc2(mt->format) ||
(mt->format == MESA_FORMAT_ETC1_RGB8);
return devinfo->ver < 8 && devinfo->platform != INTEL_PLATFORM_BYT && is_etc;
}
static inline bool
brw_miptree_has_etc_shadow(struct brw_context *brw,
struct brw_mipmap_tree *mt)
{
return brw_miptree_needs_fake_etc(brw, mt) && mt->shadow_mt;
}
void
brw_miptree_update_etc_shadow_levels(struct brw_context *brw,
struct brw_mipmap_tree *mt);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -1,728 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#include "brw_batch.h"
#include "brw_fbo.h"
#include "brw_mipmap_tree.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "compiler/brw_eu_defines.h"
#include "main/framebuffer.h"
#include "main/fbobject.h"
#include "main/format_utils.h"
#include "main/glformats.h"
/**
* Upload pointers to the per-stage state.
*
* The state pointers in this packet are all relative to the general state
* base address set by CMD_STATE_BASE_ADDRESS, which is 0.
*/
static void
upload_pipelined_state_pointers(struct brw_context *brw)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
if (devinfo->ver == 5) {
/* Need to flush before changing clip max threads for errata. */
BEGIN_BATCH(1);
OUT_BATCH(MI_FLUSH);
ADVANCE_BATCH();
}
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
OUT_RELOC(brw->batch.state.bo, 0, brw->vs.base.state_offset);
if (brw->ff_gs.prog_active)
OUT_RELOC(brw->batch.state.bo, 0, brw->ff_gs.state_offset | 1);
else
OUT_BATCH(0);
OUT_RELOC(brw->batch.state.bo, 0, brw->clip.state_offset | 1);
OUT_RELOC(brw->batch.state.bo, 0, brw->sf.state_offset);
OUT_RELOC(brw->batch.state.bo, 0, brw->wm.base.state_offset);
OUT_RELOC(brw->batch.state.bo, 0, brw->cc.state_offset);
ADVANCE_BATCH();
brw->ctx.NewDriverState |= BRW_NEW_PSP;
}
static void
upload_psp_urb_cbs(struct brw_context *brw)
{
upload_pipelined_state_pointers(brw);
brw_upload_urb_fence(brw);
brw_upload_cs_urb_state(brw);
}
const struct brw_tracked_state brw_psp_urb_cbs = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_FF_GS_PROG_DATA |
BRW_NEW_GFX4_UNIT_STATE |
BRW_NEW_STATE_BASE_ADDRESS |
BRW_NEW_URB_FENCE,
},
.emit = upload_psp_urb_cbs,
};
uint32_t
brw_depthbuffer_format(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
struct gl_framebuffer *fb = ctx->DrawBuffer;
struct brw_renderbuffer *drb = brw_get_renderbuffer(fb, BUFFER_DEPTH);
struct brw_renderbuffer *srb;
if (!drb &&
(srb = brw_get_renderbuffer(fb, BUFFER_STENCIL)) &&
!srb->mt->stencil_mt &&
(brw_rb_format(srb) == MESA_FORMAT_Z24_UNORM_S8_UINT ||
brw_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_S8X24_UINT)) {
drb = srb;
}
if (!drb)
return BRW_DEPTHFORMAT_D32_FLOAT;
return brw_depth_format(brw, drb->mt->format);
}
static struct brw_mipmap_tree *
get_stencil_miptree(struct brw_renderbuffer *irb)
{
if (!irb)
return NULL;
if (irb->mt->stencil_mt)
return irb->mt->stencil_mt;
return brw_renderbuffer_get_mt(irb);
}
static bool
rebase_depth_stencil(struct brw_context *brw, struct brw_renderbuffer *irb,
bool invalidate)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
uint32_t tile_mask_x = 0, tile_mask_y = 0;
isl_get_tile_masks(irb->mt->surf.tiling, irb->mt->cpp,
&tile_mask_x, &tile_mask_y);
assert(!brw_miptree_level_has_hiz(irb->mt, irb->mt_level));
uint32_t tile_x = irb->draw_x & tile_mask_x;
uint32_t tile_y = irb->draw_y & tile_mask_y;
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
* Coordinate Offset X/Y":
*
* "The 3 LSBs of both offsets must be zero to ensure correct
* alignment"
*/
bool rebase = tile_x & 7 || tile_y & 7;
/* We didn't even have intra-tile offsets before g45. */
rebase |= (!devinfo->has_surface_tile_offset && (tile_x || tile_y));
if (rebase) {
perf_debug("HW workaround: blitting depth level %d to a temporary "
"to fix alignment (depth tile offset %d,%d)\n",
irb->mt_level, tile_x, tile_y);
brw_renderbuffer_move_to_temp(brw, irb, invalidate);
/* There is now only single slice miptree. */
brw->depthstencil.tile_x = 0;
brw->depthstencil.tile_y = 0;
brw->depthstencil.depth_offset = 0;
return true;
}
/* While we just tried to get everything aligned, we may have failed to do
* so in the case of rendering to array or 3D textures, where nonzero faces
* will still have an offset post-rebase. At least give an informative
* warning.
*/
WARN_ONCE((tile_x & 7) || (tile_y & 7),
"Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
"Truncating offset (%u:%u), bad rendering may occur.\n",
tile_x, tile_y);
tile_x &= ~7;
tile_y &= ~7;
brw->depthstencil.tile_x = tile_x;
brw->depthstencil.tile_y = tile_y;
brw->depthstencil.depth_offset = brw_miptree_get_aligned_offset(
irb->mt,
irb->draw_x & ~tile_mask_x,
irb->draw_y & ~tile_mask_y);
return false;
}
void
brw_workaround_depthstencil_alignment(struct brw_context *brw,
GLbitfield clear_mask)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
struct gl_framebuffer *fb = ctx->DrawBuffer;
struct brw_renderbuffer *depth_irb = brw_get_renderbuffer(fb, BUFFER_DEPTH);
struct brw_renderbuffer *stencil_irb = brw_get_renderbuffer(fb, BUFFER_STENCIL);
struct brw_mipmap_tree *depth_mt = NULL;
bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH;
bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL;
if (depth_irb)
depth_mt = depth_irb->mt;
/* Initialize brw->depthstencil to 'nop' workaround state.
*/
brw->depthstencil.tile_x = 0;
brw->depthstencil.tile_y = 0;
brw->depthstencil.depth_offset = 0;
/* Gfx6+ doesn't require the workarounds, since we always program the
* surface state at the start of the whole surface.
*/
if (devinfo->ver >= 6)
return;
/* Check if depth buffer is in depth/stencil format. If so, then it's only
* safe to invalidate it if we're also clearing stencil.
*/
if (depth_irb && invalidate_depth &&
_mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL)
invalidate_depth = invalidate_stencil && stencil_irb;
if (depth_irb) {
if (rebase_depth_stencil(brw, depth_irb, invalidate_depth)) {
/* In the case of stencil_irb being the same packed depth/stencil
* texture but not the same rb, make it point at our rebased mt, too.
*/
if (stencil_irb &&
stencil_irb != depth_irb &&
stencil_irb->mt == depth_mt) {
brw_miptree_reference(&stencil_irb->mt, depth_irb->mt);
brw_renderbuffer_set_draw_offset(stencil_irb);
}
}
if (stencil_irb) {
assert(stencil_irb->mt == depth_irb->mt);
assert(stencil_irb->mt_level == depth_irb->mt_level);
assert(stencil_irb->mt_layer == depth_irb->mt_layer);
}
}
/* If there is no depth attachment, consider if stencil needs rebase. */
if (!depth_irb && stencil_irb)
rebase_depth_stencil(brw, stencil_irb, invalidate_stencil);
}
static void
brw_emit_depth_stencil_hiz(struct brw_context *brw,
struct brw_renderbuffer *depth_irb,
struct brw_mipmap_tree *depth_mt,
struct brw_renderbuffer *stencil_irb,
struct brw_mipmap_tree *stencil_mt)
{
uint32_t tile_x = brw->depthstencil.tile_x;
uint32_t tile_y = brw->depthstencil.tile_y;
uint32_t depth_surface_type = BRW_SURFACE_NULL;
uint32_t depthbuffer_format = BRW_DEPTHFORMAT_D32_FLOAT;
uint32_t depth_offset = 0;
uint32_t width = 1, height = 1;
bool tiled_surface = true;
/* If there's a packed depth/stencil bound to stencil only, we need to
* emit the packed depth/stencil buffer packet.
*/
if (!depth_irb && stencil_irb) {
depth_irb = stencil_irb;
depth_mt = stencil_mt;
}
if (depth_irb && depth_mt) {
depthbuffer_format = brw_depthbuffer_format(brw);
depth_surface_type = BRW_SURFACE_2D;
depth_offset = brw->depthstencil.depth_offset;
width = depth_irb->Base.Base.Width;
height = depth_irb->Base.Base.Height;
tiled_surface = depth_mt->surf.tiling != ISL_TILING_LINEAR;
}
const struct intel_device_info *devinfo = &brw->screen->devinfo;
const unsigned len = (devinfo->verx10 == 45 || devinfo->ver == 5) ? 6 : 5;
BEGIN_BATCH(len);
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH((depth_mt ? depth_mt->surf.row_pitch_B - 1 : 0) |
(depthbuffer_format << 18) |
(BRW_TILEWALK_YMAJOR << 26) |
(tiled_surface << 27) |
(depth_surface_type << 29));
if (depth_mt) {
OUT_RELOC(depth_mt->bo, RELOC_WRITE, depth_offset);
} else {
OUT_BATCH(0);
}
OUT_BATCH(((width + tile_x - 1) << 6) |
((height + tile_y - 1) << 19));
OUT_BATCH(0);
if (devinfo->verx10 >= 45)
OUT_BATCH(tile_x | (tile_y << 16));
else
assert(tile_x == 0 && tile_y == 0);
if (devinfo->ver >= 6)
OUT_BATCH(0);
ADVANCE_BATCH();
}
void
brw_emit_depthbuffer(struct brw_context *brw)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
struct gl_framebuffer *fb = ctx->DrawBuffer;
/* _NEW_BUFFERS */
struct brw_renderbuffer *depth_irb = brw_get_renderbuffer(fb, BUFFER_DEPTH);
struct brw_renderbuffer *stencil_irb = brw_get_renderbuffer(fb, BUFFER_STENCIL);
struct brw_mipmap_tree *depth_mt = brw_renderbuffer_get_mt(depth_irb);
struct brw_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb);
if (depth_mt)
brw_cache_flush_for_depth(brw, depth_mt->bo);
if (stencil_mt)
brw_cache_flush_for_depth(brw, stencil_mt->bo);
if (devinfo->ver < 6) {
brw_emit_depth_stencil_hiz(brw, depth_irb, depth_mt,
stencil_irb, stencil_mt);
return;
}
/* Skip repeated NULL depth/stencil emits (think 2D rendering). */
if (!depth_mt && !stencil_mt && brw->no_depth_or_stencil) {
assert(brw->hw_ctx);
return;
}
brw_emit_depth_stall_flushes(brw);
const unsigned ds_dwords = brw->isl_dev.ds.size / 4;
brw_batch_begin(brw, ds_dwords);
uint32_t *ds_map = brw->batch.map_next;
const uint32_t ds_offset = (char *)ds_map - (char *)brw->batch.batch.map;
struct isl_view view = {
/* Some nice defaults */
.base_level = 0,
.levels = 1,
.base_array_layer = 0,
.array_len = 1,
.swizzle = ISL_SWIZZLE_IDENTITY,
};
struct isl_depth_stencil_hiz_emit_info info = {
.view = &view,
.mocs = brw_mocs(&brw->isl_dev, NULL),
};
if (depth_mt) {
view.usage |= ISL_SURF_USAGE_DEPTH_BIT;
info.depth_surf = &depth_mt->surf;
info.depth_address =
brw_batch_reloc(&brw->batch,
ds_offset + brw->isl_dev.ds.depth_offset,
depth_mt->bo, depth_mt->offset, RELOC_WRITE);
info.mocs = brw_mocs(&brw->isl_dev, depth_mt->bo);
view.base_level = depth_irb->mt_level - depth_irb->mt->first_level;
view.base_array_layer = depth_irb->mt_layer;
view.array_len = MAX2(depth_irb->layer_count, 1);
view.format = depth_mt->surf.format;
info.hiz_usage = depth_mt->aux_usage;
if (!brw_renderbuffer_has_hiz(depth_irb)) {
/* Just because a miptree has ISL_AUX_USAGE_HIZ does not mean that
* all miplevels of that miptree are guaranteed to support HiZ. See
* brw_miptree_level_enable_hiz for details.
*/
info.hiz_usage = ISL_AUX_USAGE_NONE;
}
if (info.hiz_usage == ISL_AUX_USAGE_HIZ) {
info.hiz_surf = &depth_mt->aux_buf->surf;
uint64_t hiz_offset = 0;
if (devinfo->ver == 6) {
/* HiZ surfaces on Sandy Bridge technically don't support
* mip-mapping. However, we can fake it by offsetting to the
* first slice of LOD0 in the HiZ surface.
*/
isl_surf_get_image_offset_B_tile_sa(&depth_mt->aux_buf->surf,
view.base_level, 0, 0,
&hiz_offset, NULL, NULL);
}
info.hiz_address =
brw_batch_reloc(&brw->batch,
ds_offset + brw->isl_dev.ds.hiz_offset,
depth_mt->aux_buf->bo,
depth_mt->aux_buf->offset + hiz_offset,
RELOC_WRITE);
}
info.depth_clear_value = depth_mt->fast_clear_color.f32[0];
}
if (stencil_mt) {
view.usage |= ISL_SURF_USAGE_STENCIL_BIT;
info.stencil_surf = &stencil_mt->surf;
if (!depth_mt) {
info.mocs = brw_mocs(&brw->isl_dev, stencil_mt->bo);
view.base_level = stencil_irb->mt_level - stencil_irb->mt->first_level;
view.base_array_layer = stencil_irb->mt_layer;
view.array_len = MAX2(stencil_irb->layer_count, 1);
view.format = stencil_mt->surf.format;
}
uint64_t stencil_offset = 0;
if (devinfo->ver == 6) {
/* Stencil surfaces on Sandy Bridge technically don't support
* mip-mapping. However, we can fake it by offsetting to the
* first slice of LOD0 in the stencil surface.
*/
isl_surf_get_image_offset_B_tile_sa(&stencil_mt->surf,
view.base_level, 0, 0,
&stencil_offset, NULL, NULL);
}
info.stencil_address =
brw_batch_reloc(&brw->batch,
ds_offset + brw->isl_dev.ds.stencil_offset,
stencil_mt->bo,
stencil_mt->offset + stencil_offset,
RELOC_WRITE);
}
isl_emit_depth_stencil_hiz_s(&brw->isl_dev, ds_map, &info);
brw->batch.map_next += ds_dwords;
brw_batch_advance(brw);
brw->no_depth_or_stencil = !depth_mt && !stencil_mt;
}
const struct brw_tracked_state brw_depthbuffer = {
.dirty = {
.mesa = _NEW_BUFFERS,
.brw = BRW_NEW_AUX_STATE |
BRW_NEW_BATCH |
BRW_NEW_BLORP,
},
.emit = brw_emit_depthbuffer,
};
void
brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
const bool is_965 = devinfo->verx10 == 40;
const uint32_t _3DSTATE_PIPELINE_SELECT =
is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45;
if (devinfo->ver >= 8 && devinfo->ver < 10) {
/* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
*
* Software must clear the COLOR_CALC_STATE Valid field in
* 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
* with Pipeline Select set to GPGPU.
*
* The internal hardware docs recommend the same workaround for Gfx9
* hardware too.
*/
if (pipeline == BRW_COMPUTE_PIPELINE) {
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
OUT_BATCH(0);
ADVANCE_BATCH();
brw->ctx.NewDriverState |= BRW_NEW_CC_STATE;
}
}
if (devinfo->ver == 9 && pipeline == BRW_RENDER_PIPELINE) {
/* We seem to have issues with geometry flickering when 3D and compute
* are combined in the same batch and this appears to fix it.
*/
const uint32_t maxNumberofThreads =
devinfo->max_cs_threads * devinfo->subslice_total - 1;
BEGIN_BATCH(9);
OUT_BATCH(MEDIA_VFE_STATE << 16 | (9 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(2 << 8 | maxNumberofThreads << 16);
OUT_BATCH(0);
OUT_BATCH(2 << 16);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
if (devinfo->ver >= 6) {
/* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
* PIPELINE_SELECT [DevBWR+]":
*
* Project: DEVSNB+
*
* Software must ensure all the write caches are flushed through a
* stalling PIPE_CONTROL command followed by another PIPE_CONTROL
* command to invalidate read only caches prior to programming
* MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
*/
const unsigned dc_flush =
devinfo->ver >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
brw_emit_pipe_control_flush(brw,
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
dc_flush |
PIPE_CONTROL_CS_STALL);
brw_emit_pipe_control_flush(brw,
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
PIPE_CONTROL_STATE_CACHE_INVALIDATE |
PIPE_CONTROL_INSTRUCTION_INVALIDATE);
} else {
/* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
* PIPELINE_SELECT [DevBWR+]":
*
* Project: PRE-DEVSNB
*
* Software must ensure the current pipeline is flushed via an
* MI_FLUSH or PIPE_CONTROL prior to the execution of PIPELINE_SELECT.
*/
BEGIN_BATCH(1);
OUT_BATCH(MI_FLUSH);
ADVANCE_BATCH();
}
/* Select the pipeline */
BEGIN_BATCH(1);
OUT_BATCH(_3DSTATE_PIPELINE_SELECT << 16 |
(devinfo->ver >= 9 ? (3 << 8) : 0) |
(pipeline == BRW_COMPUTE_PIPELINE ? 2 : 0));
ADVANCE_BATCH();
if (devinfo->verx10 == 70 &&
pipeline == BRW_RENDER_PIPELINE) {
/* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
* PIPELINE_SELECT [DevBWR+]":
*
* Project: DEVIVB, DEVHSW:GT3:A0
*
* Software must send a pipe_control with a CS stall and a post sync
* operation and then a dummy DRAW after every MI_SET_CONTEXT and
* after any PIPELINE_SELECT that is enabling 3D mode.
*/
gfx7_emit_cs_stall_flush(brw);
BEGIN_BATCH(7);
OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
OUT_BATCH(_3DPRIM_POINTLIST);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
if (devinfo->platform == INTEL_PLATFORM_GLK) {
/* Project: DevGLK
*
* "This chicken bit works around a hardware issue with barrier logic
* encountered when switching between GPGPU and 3D pipelines. To
* workaround the issue, this mode bit should be set after a pipeline
* is selected."
*/
const unsigned barrier_mode =
pipeline == BRW_RENDER_PIPELINE ? GLK_SCEC_BARRIER_MODE_3D_HULL
: GLK_SCEC_BARRIER_MODE_GPGPU;
brw_load_register_imm32(brw, SLICE_COMMON_ECO_CHICKEN1,
barrier_mode | GLK_SCEC_BARRIER_MODE_MASK);
}
}
/**
* Update the pixel hashing modes that determine the balancing of PS threads
* across subslices and slices.
*
* \param width Width bound of the rendering area (already scaled down if \p
* scale is greater than 1).
* \param height Height bound of the rendering area (already scaled down if \p
* scale is greater than 1).
* \param scale The number of framebuffer samples that could potentially be
* affected by an individual channel of the PS thread. This is
* typically one for single-sampled rendering, but for operations
* like CCS resolves and fast clears a single PS invocation may
* update a huge number of pixels, in which case a finer
* balancing is desirable in order to maximally utilize the
* bandwidth available. UINT_MAX can be used as shorthand for
* "finest hashing mode available".
*/
void
brw_emit_hashing_mode(struct brw_context *brw, unsigned width,
unsigned height, unsigned scale)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
if (devinfo->ver == 9) {
const uint32_t slice_hashing[] = {
/* Because all Gfx9 platforms with more than one slice require
* three-way subslice hashing, a single "normal" 16x16 slice hashing
* block is guaranteed to suffer from substantial imbalance, with one
* subslice receiving twice as much work as the other two in the
* slice.
*
* The performance impact of that would be particularly severe when
* three-way hashing is also in use for slice balancing (which is the
* case for all Gfx9 GT4 platforms), because one of the slices
* receives one every three 16x16 blocks in either direction, which
* is roughly the periodicity of the underlying subslice imbalance
* pattern ("roughly" because in reality the hardware's
* implementation of three-way hashing doesn't do exact modulo 3
* arithmetic, which somewhat decreases the magnitude of this effect
* in practice). This leads to a systematic subslice imbalance
* within that slice regardless of the size of the primitive. The
* 32x32 hashing mode guarantees that the subslice imbalance within a
* single slice hashing block is minimal, largely eliminating this
* effect.
*/
GFX9_SLICE_HASHING_32x32,
/* Finest slice hashing mode available. */
GFX9_SLICE_HASHING_NORMAL
};
const uint32_t subslice_hashing[] = {
/* The 16x16 subslice hashing mode is used on non-LLC platforms to
* match the performance of previous Mesa versions. 16x16 has a
* slight cache locality benefit especially visible in the sampler L1
* cache efficiency of low-bandwidth platforms, but it comes at the
* cost of greater subslice imbalance for primitives of dimensions
* approximately intermediate between 16x4 and 16x16.
*/
(devinfo->has_llc ? GFX9_SUBSLICE_HASHING_16x4 :
GFX9_SUBSLICE_HASHING_16x16),
/* Finest subslice hashing mode available. */
GFX9_SUBSLICE_HASHING_8x4
};
/* Dimensions of the smallest hashing block of a given hashing mode. If
* the rendering area is smaller than this there can't possibly be any
* benefit from switching to this mode, so we optimize out the
* transition.
*/
const unsigned min_size[][2] = {
{ 16, 4 },
{ 8, 4 }
};
const unsigned idx = scale > 1;
if (width > min_size[idx][0] || height > min_size[idx][1]) {
const uint32_t gt_mode =
(devinfo->num_slices == 1 ? 0 :
GFX9_SLICE_HASHING_MASK_BITS | slice_hashing[idx]) |
GFX9_SUBSLICE_HASHING_MASK_BITS | subslice_hashing[idx];
brw_emit_pipe_control_flush(brw,
PIPE_CONTROL_STALL_AT_SCOREBOARD |
PIPE_CONTROL_CS_STALL);
brw_load_register_imm32(brw, GFX7_GT_MODE, gt_mode);
brw->current_hash_scale = scale;
}
}
}
/**
* Misc invariant state packets
*/
void
brw_upload_invariant_state(struct brw_context *brw)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
const bool is_965 = devinfo->verx10 == 40;
brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
brw->last_pipeline = BRW_RENDER_PIPELINE;
if (devinfo->ver >= 8) {
BEGIN_BATCH(3);
OUT_BATCH(CMD_STATE_SIP << 16 | (3 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
} else {
BEGIN_BATCH(2);
OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* Original Gfx4 doesn't have 3DSTATE_AA_LINE_PARAMETERS. */
if (!is_965) {
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
/* use legacy aa line coverage computation */
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
}

View file

@ -1,111 +0,0 @@
/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef BRW_MULTISAMPLE_STATE_H
#define BRW_MULTISAMPLE_STATE_H
#include <stdint.h>
/**
* Note: There are no standard multisample positions defined in OpenGL
* specifications. Implementations have the freedom to pick the positions
* which give plausible results. But the Vulkan specification does define
* standard sample positions. So, we decided to pick the same pattern in
* OpenGL as in Vulkan to keep it uniform across drivers and also to avoid
* breaking applications which rely on this standard pattern.
*/
/**
* 1x MSAA has a single sample at the center: (0.5, 0.5) -> (0x8, 0x8).
*
* 2x MSAA sample positions are (0.75, 0.75) and (0.25, 0.25):
* 4 c
* 4 1
* c 0
*/
static const uint32_t
brw_multisample_positions_1x_2x = 0x008844cc;
/**
* Sample positions:
* 2 6 a e
* 2 0
* 6 1
* a 2
* e 3
*/
static const uint32_t
brw_multisample_positions_4x = 0xae2ae662;
/**
* Sample positions:
*
* From the Ivy Bridge PRM, Vol2 Part1 p304 (3DSTATE_MULTISAMPLE:
* Programming Notes):
* "When programming the sample offsets (for NUMSAMPLES_4 or _8 and
* MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3 (or 7
* for 8X) must have monotonically increasing distance from the
* pixel center. This is required to get the correct centroid
* computation in the device."
*
* Sample positions:
* 1 3 5 7 9 b d f
* 1 7
* 3 3
* 5 0
* 7 5
* 9 2
* b 1
* d 4
* f 6
*/
static const uint32_t
brw_multisample_positions_8x[] = { 0x53d97b95, 0xf1bf173d };
/**
* Sample positions:
*
* 0 1 2 3 4 5 6 7 8 9 a b c d e f
* 0 15
* 1 9
* 2 10
* 3 7
* 4 13
* 5 1
* 6 4
* 7 3
* 8 12
* 9 0
* a 2
* b 6
* c 11
* d 5
* e 8
* f 14
*/
static const uint32_t
brw_multisample_positions_16x[] = {
0xc75a7599, 0xb3dbad36, 0x2c42816e, 0x10eff408
};
#endif /* BRW_MULTISAMPLE_STATE_H */

View file

@ -1,450 +0,0 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "compiler/brw_nir.h"
#include "compiler/glsl/ir_uniform.h"
#include "compiler/nir/nir_builder.h"
#include "brw_program.h"
static void
brw_nir_setup_glsl_builtin_uniform(nir_variable *var,
const struct gl_program *prog,
struct brw_stage_prog_data *stage_prog_data,
bool is_scalar)
{
const nir_state_slot *const slots = var->state_slots;
assert(var->state_slots != NULL);
unsigned uniform_index = var->data.driver_location / 4;
for (unsigned int i = 0; i < var->num_state_slots; i++) {
/* This state reference has already been setup by ir_to_mesa, but we'll
* get the same index back here.
*/
int index = _mesa_add_state_reference(prog->Parameters,
slots[i].tokens);
/* Add each of the unique swizzles of the element as a parameter.
* This'll end up matching the expected layout of the
* array/matrix/structure we're trying to fill in.
*/
int last_swiz = -1;
for (unsigned j = 0; j < 4; j++) {
int swiz = GET_SWZ(slots[i].swizzle, j);
/* If we hit a pair of identical swizzles, this means we've hit the
* end of the builtin variable. In scalar mode, we should just quit
* and move on to the next one. In vec4, we need to continue and pad
* it out to 4 components.
*/
if (swiz == last_swiz && is_scalar)
break;
last_swiz = swiz;
stage_prog_data->param[uniform_index++] =
BRW_PARAM_PARAMETER(index, swiz);
}
}
}
static void
setup_vec4_image_param(uint32_t *params, uint32_t idx,
unsigned offset, unsigned n)
{
assert(offset % sizeof(uint32_t) == 0);
for (unsigned i = 0; i < n; ++i)
params[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
for (unsigned i = n; i < 4; ++i)
params[i] = BRW_PARAM_BUILTIN_ZERO;
}
static void
brw_setup_image_uniform_values(nir_variable *var,
struct brw_stage_prog_data *prog_data)
{
unsigned param_start_index = var->data.driver_location / 4;
uint32_t *param = &prog_data->param[param_start_index];
unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
for (unsigned i = 0; i < num_images; i++) {
const unsigned image_idx = var->data.binding + i;
/* Upload the brw_image_param structure. The order is expected to match
* the BRW_IMAGE_PARAM_*_OFFSET defines.
*/
setup_vec4_image_param(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
image_idx,
offsetof(brw_image_param, offset), 2);
setup_vec4_image_param(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
image_idx,
offsetof(brw_image_param, size), 3);
setup_vec4_image_param(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
image_idx,
offsetof(brw_image_param, stride), 4);
setup_vec4_image_param(param + BRW_IMAGE_PARAM_TILING_OFFSET,
image_idx,
offsetof(brw_image_param, tiling), 3);
setup_vec4_image_param(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
image_idx,
offsetof(brw_image_param, swizzling), 2);
param += BRW_IMAGE_PARAM_SIZE;
}
}
static unsigned
count_uniform_storage_slots(const struct glsl_type *type)
{
/* gl_uniform_storage can cope with one level of array, so if the
* type is a composite type or an array where each element occupies
* more than one slot than we need to recursively process it.
*/
if (glsl_type_is_struct_or_ifc(type)) {
unsigned location_count = 0;
for (unsigned i = 0; i < glsl_get_length(type); i++) {
const struct glsl_type *field_type = glsl_get_struct_field(type, i);
location_count += count_uniform_storage_slots(field_type);
}
return location_count;
}
if (glsl_type_is_array(type)) {
const struct glsl_type *element_type = glsl_get_array_element(type);
if (glsl_type_is_array(element_type) ||
glsl_type_is_struct_or_ifc(element_type)) {
unsigned element_count = count_uniform_storage_slots(element_type);
return element_count * glsl_get_length(type);
}
}
return 1;
}
static void
brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
const struct gl_program *prog,
struct brw_stage_prog_data *stage_prog_data,
bool is_scalar)
{
if (var->type->without_array()->is_sampler() ||
var->type->without_array()->is_image())
return;
/* The data for our (non-builtin) uniforms is stored in a series of
* gl_uniform_storage structs for each subcomponent that
* glGetUniformLocation() could name. We know it's been set up in the same
* order we'd walk the type, so walk the list of storage that matches the
* range of slots covered by this variable.
*/
unsigned uniform_index = var->data.driver_location / 4;
unsigned num_slots = count_uniform_storage_slots(var->type);
for (unsigned u = 0; u < num_slots; u++) {
struct gl_uniform_storage *storage =
&prog->sh.data->UniformStorage[var->data.location + u];
/* We already handled samplers and images via the separate top-level
* variables created by gl_nir_lower_samplers_as_deref(), but they're
* still part of the structure's storage, and so we'll see them while
* walking it to set up the other regular fields. Just skip over them.
*/
if (storage->builtin ||
storage->type->is_sampler() ||
storage->type->is_image())
continue;
gl_constant_value *components = storage->storage;
unsigned vector_count = (MAX2(storage->array_elements, 1) *
storage->type->matrix_columns);
unsigned vector_size = storage->type->vector_elements;
unsigned max_vector_size = 4;
if (storage->type->base_type == GLSL_TYPE_DOUBLE ||
storage->type->base_type == GLSL_TYPE_UINT64 ||
storage->type->base_type == GLSL_TYPE_INT64) {
vector_size *= 2;
if (vector_size > 4)
max_vector_size = 8;
}
for (unsigned s = 0; s < vector_count; s++) {
unsigned i;
for (i = 0; i < vector_size; i++) {
uint32_t idx = components - prog->sh.data->UniformDataSlots;
stage_prog_data->param[uniform_index++] = BRW_PARAM_UNIFORM(idx);
components++;
}
if (!is_scalar) {
/* Pad out with zeros if needed (only needed for vec4) */
for (; i < max_vector_size; i++) {
stage_prog_data->param[uniform_index++] =
BRW_PARAM_BUILTIN_ZERO;
}
}
}
}
}
void
brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader,
const struct gl_program *prog,
struct brw_stage_prog_data *stage_prog_data,
bool is_scalar)
{
unsigned nr_params = shader->num_uniforms / 4;
stage_prog_data->nr_params = nr_params;
stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
nir_foreach_uniform_variable(var, shader) {
/* UBO's, atomics and samplers don't take up space in the
uniform file */
if (var->interface_type != NULL || var->type->contains_atomic())
continue;
if (var->num_state_slots > 0) {
brw_nir_setup_glsl_builtin_uniform(var, prog, stage_prog_data,
is_scalar);
} else {
brw_nir_setup_glsl_uniform(shader->info.stage, var, prog,
stage_prog_data, is_scalar);
}
}
nir_foreach_image_variable(var, shader)
brw_setup_image_uniform_values(var, stage_prog_data);
}
void
brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
struct gl_program *prog,
struct brw_stage_prog_data *stage_prog_data)
{
struct gl_program_parameter_list *plist = prog->Parameters;
unsigned nr_params = plist->NumParameters * 4;
stage_prog_data->nr_params = nr_params;
stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
/* For ARB programs, prog_to_nir generates a single "parameters" variable
* for all uniform data. There may be additional sampler variables, and
* an extra uniform from nir_lower_wpos_ytransform.
*/
for (unsigned p = 0; p < plist->NumParameters; p++) {
/* Parameters should be either vec4 uniforms or single component
* constants; matrices and other larger types should have been broken
* down earlier.
*/
assert(plist->Parameters[p].Size <= 4);
unsigned i;
for (i = 0; i < plist->Parameters[p].Size; i++)
stage_prog_data->param[4 * p + i] = BRW_PARAM_PARAMETER(p, i);
for (; i < 4; i++)
stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO;
}
}
static nir_ssa_def *
get_aoa_deref_offset(nir_builder *b,
nir_deref_instr *deref,
unsigned elem_size)
{
unsigned array_size = elem_size;
nir_ssa_def *offset = nir_imm_int(b, 0);
while (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
/* This level's element size is the previous level's array size */
nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
assert(deref->arr.index.ssa);
offset = nir_iadd(b, offset,
nir_imul(b, index, nir_imm_int(b, array_size)));
deref = nir_deref_instr_parent(deref);
assert(glsl_type_is_array(deref->type));
array_size *= glsl_get_length(deref->type);
}
/* Accessing an invalid surface index with the dataport can result in a
* hang. According to the spec "if the index used to select an individual
* element is negative or greater than or equal to the size of the array,
* the results of the operation are undefined but may not lead to
* termination" -- which is one of the possible outcomes of the hang.
* Clamp the index to prevent access outside of the array bounds.
*/
return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
}
void
brw_nir_lower_gl_images(nir_shader *shader,
const struct gl_program *prog)
{
/* We put image uniforms at the end */
nir_foreach_image_variable(var, shader) {
const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
var->data.driver_location = shader->num_uniforms;
shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4;
}
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
nir_builder b;
nir_builder_init(&b, impl);
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_store:
case nir_intrinsic_image_deref_atomic_add:
case nir_intrinsic_image_deref_atomic_imin:
case nir_intrinsic_image_deref_atomic_umin:
case nir_intrinsic_image_deref_atomic_imax:
case nir_intrinsic_image_deref_atomic_umax:
case nir_intrinsic_image_deref_atomic_and:
case nir_intrinsic_image_deref_atomic_or:
case nir_intrinsic_image_deref_atomic_xor:
case nir_intrinsic_image_deref_atomic_exchange:
case nir_intrinsic_image_deref_atomic_comp_swap:
case nir_intrinsic_image_deref_size:
case nir_intrinsic_image_deref_samples:
case nir_intrinsic_image_deref_load_raw_intel:
case nir_intrinsic_image_deref_store_raw_intel: {
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
nir_variable *var = nir_deref_instr_get_variable(deref);
struct gl_uniform_storage *storage =
&prog->sh.data->UniformStorage[var->data.location];
const unsigned image_var_idx =
storage->opaque[shader->info.stage].index;
b.cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx),
get_aoa_deref_offset(&b, deref, 1));
nir_rewrite_image_intrinsic(intrin, index, false);
break;
}
case nir_intrinsic_image_deref_load_param_intel: {
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
nir_variable *var = nir_deref_instr_get_variable(deref);
const unsigned num_images =
MAX2(1, var->type->arrays_of_arrays_size());
b.cursor = nir_instr_remove(&intrin->instr);
const unsigned param = nir_intrinsic_base(intrin);
nir_ssa_def *offset =
get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4);
offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16));
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(b.shader,
nir_intrinsic_load_uniform);
nir_intrinsic_set_base(load, var->data.driver_location);
nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4);
load->src[0] = nir_src_for_ssa(offset);
load->num_components = intrin->dest.ssa.num_components;
nir_ssa_dest_init(&load->instr, &load->dest,
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size, NULL);
nir_builder_instr_insert(&b, &load->instr);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
&load->dest.ssa);
break;
}
default:
break;
}
}
}
}
void
brw_nir_lower_legacy_clipping(nir_shader *nir, int nr_userclip_plane_consts,
struct brw_stage_prog_data *prog_data)
{
if (nr_userclip_plane_consts == 0)
return;
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
nir_lower_clip_vs(nir, (1 << nr_userclip_plane_consts) - 1, true, false,
NULL);
nir_lower_io_to_temporaries(nir, impl, true, false);
nir_lower_global_vars_to_local(nir);
nir_lower_vars_to_ssa(nir);
const unsigned clip_plane_base = nir->num_uniforms;
assert(nir->num_uniforms == prog_data->nr_params * 4);
const unsigned num_clip_floats = 4 * nr_userclip_plane_consts;
uint32_t *clip_param =
brw_stage_prog_data_add_params(prog_data, num_clip_floats);
nir->num_uniforms += num_clip_floats * sizeof(float);
assert(nir->num_uniforms == prog_data->nr_params * 4);
for (unsigned i = 0; i < num_clip_floats; i++)
clip_param[i] = BRW_PARAM_BUILTIN_CLIP_PLANE(i / 4, i % 4);
nir_builder b;
nir_builder_init(&b, impl);
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_user_clip_plane)
continue;
b.cursor = nir_before_instr(instr);
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
load->num_components = 4;
load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
nir_intrinsic_set_base(load, clip_plane_base + 4 * sizeof(float) *
nir_intrinsic_ucp_id(intrin));
nir_intrinsic_set_range(load, 4 * sizeof(float));
nir_builder_instr_insert(&b, &load->instr);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
&load->dest.ssa);
nir_instr_remove(instr);
}
}
}

View file

@ -1,187 +0,0 @@
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* @file brw_object_purgeable.c
*
* The driver implementation of the GL_APPLE_object_purgeable extension.
*/
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/bufferobj.h"
#include "brw_context.h"
#include "brw_buffer_objects.h"
#include "brw_fbo.h"
#include "brw_mipmap_tree.h"
static GLenum
brw_buffer_purgeable(struct brw_bo *buffer)
{
int retained = 0;
if (buffer != NULL)
retained = brw_bo_madvise(buffer, I915_MADV_DONTNEED);
return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE;
}
static GLenum
brw_buffer_object_purgeable(struct gl_context * ctx,
struct gl_buffer_object *obj,
GLenum option)
{
struct brw_buffer_object *intel_obj = brw_buffer_object(obj);
if (intel_obj->buffer != NULL)
return brw_buffer_purgeable(intel_obj->buffer);
if (option == GL_RELEASED_APPLE) {
return GL_RELEASED_APPLE;
} else {
/* XXX Create the buffer and madvise(MADV_DONTNEED)? */
return brw_buffer_purgeable(intel_obj->buffer);
}
}
static GLenum
brw_texture_object_purgeable(struct gl_context * ctx,
struct gl_texture_object *obj,
GLenum option)
{
struct brw_texture_object *intel;
(void) ctx;
(void) option;
intel = brw_texture_object(obj);
if (intel->mt == NULL || intel->mt->bo == NULL)
return GL_RELEASED_APPLE;
return brw_buffer_purgeable(intel->mt->bo);
}
static GLenum
brw_render_object_purgeable(struct gl_context * ctx,
struct gl_renderbuffer *obj,
GLenum option)
{
struct brw_renderbuffer *intel;
(void) ctx;
(void) option;
intel = brw_renderbuffer(obj);
if (intel->mt == NULL)
return GL_RELEASED_APPLE;
return brw_buffer_purgeable(intel->mt->bo);
}
static int
brw_bo_unpurgeable(struct brw_bo *buffer)
{
int retained;
retained = 0;
if (buffer != NULL)
retained = brw_bo_madvise(buffer, I915_MADV_WILLNEED);
return retained;
}
static GLenum
brw_buffer_object_unpurgeable(struct gl_context * ctx,
struct gl_buffer_object *obj,
GLenum option)
{
struct brw_buffer_object *intel = brw_buffer_object(obj);
(void) ctx;
if (!intel->buffer)
return GL_UNDEFINED_APPLE;
if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->buffer)) {
brw_bo_unreference(intel->buffer);
intel->buffer = NULL;
return GL_UNDEFINED_APPLE;
}
return GL_RETAINED_APPLE;
}
static GLenum
brw_texture_object_unpurgeable(struct gl_context * ctx,
struct gl_texture_object *obj,
GLenum option)
{
struct brw_texture_object *intel;
(void) ctx;
intel = brw_texture_object(obj);
if (intel->mt == NULL || intel->mt->bo == NULL)
return GL_UNDEFINED_APPLE;
if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->mt->bo)) {
brw_miptree_release(&intel->mt);
return GL_UNDEFINED_APPLE;
}
return GL_RETAINED_APPLE;
}
static GLenum
brw_render_object_unpurgeable(struct gl_context * ctx,
struct gl_renderbuffer *obj,
GLenum option)
{
struct brw_renderbuffer *intel;
(void) ctx;
intel = brw_renderbuffer(obj);
if (intel->mt == NULL)
return GL_UNDEFINED_APPLE;
if (option == GL_UNDEFINED_APPLE || !brw_bo_unpurgeable(intel->mt->bo)) {
brw_miptree_release(&intel->mt);
return GL_UNDEFINED_APPLE;
}
return GL_RETAINED_APPLE;
}
void
brw_init_object_purgeable_functions(struct dd_function_table *functions)
{
functions->BufferObjectPurgeable = brw_buffer_object_purgeable;
functions->TextureObjectPurgeable = brw_texture_object_purgeable;
functions->RenderObjectPurgeable = brw_render_object_purgeable;
functions->BufferObjectUnpurgeable = brw_buffer_object_unpurgeable;
functions->TextureObjectUnpurgeable = brw_texture_object_unpurgeable;
functions->RenderObjectUnpurgeable = brw_render_object_unpurgeable;
}

View file

@ -1,533 +0,0 @@
/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* \file brw_performance_query.c
*
* Implementation of the GL_INTEL_performance_query extension.
*
* Currently there are two possible counter sources exposed here:
*
* On Gfx6+ hardware we have numerous 64bit Pipeline Statistics Registers
* that we can snapshot at the beginning and end of a query.
*
* On Gfx7.5+ we have Observability Architecture counters which are
* covered in separate document from the rest of the PRMs. It is available at:
* https://01.org/linuxgraphics/documentation/driver-documentation-prms
* => 2013 Intel Core Processor Family => Observability Performance Counters
* (This one volume covers Sandybridge, Ivybridge, Baytrail, and Haswell,
* though notably we currently only support OA counters for Haswell+)
*/
#include <limits.h>
/* put before sys/types.h to silence glibc warnings */
#ifdef MAJOR_IN_MKDEV
#include <sys/mkdev.h>
#endif
#ifdef MAJOR_IN_SYSMACROS
#include <sys/sysmacros.h>
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <xf86drm.h>
#include "drm-uapi/i915_drm.h"
#include "main/hash.h"
#include "main/macros.h"
#include "main/mtypes.h"
#include "main/performance_query.h"
#include "util/bitset.h"
#include "util/ralloc.h"
#include "util/hash_table.h"
#include "util/list.h"
#include "util/u_math.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_batch.h"
#include "perf/intel_perf.h"
#include "perf/intel_perf_regs.h"
#include "perf/intel_perf_mdapi.h"
#include "perf/intel_perf_query.h"
#define FILE_DEBUG_FLAG DEBUG_PERFMON
#define OAREPORT_REASON_MASK 0x3f
#define OAREPORT_REASON_SHIFT 19
#define OAREPORT_REASON_TIMER (1<<0)
#define OAREPORT_REASON_TRIGGER1 (1<<1)
#define OAREPORT_REASON_TRIGGER2 (1<<2)
#define OAREPORT_REASON_CTX_SWITCH (1<<3)
#define OAREPORT_REASON_GO_TRANSITION (1<<4)
struct brw_perf_query_object {
struct gl_perf_query_object base;
struct intel_perf_query_object *query;
};
/** Downcasting convenience macro. */
static inline struct brw_perf_query_object *
brw_perf_query(struct gl_perf_query_object *o)
{
return (struct brw_perf_query_object *) o;
}
#define MI_RPC_BO_SIZE 4096
#define MI_RPC_BO_END_OFFSET_BYTES (MI_RPC_BO_SIZE / 2)
#define MI_FREQ_START_OFFSET_BYTES (3072)
#define MI_FREQ_END_OFFSET_BYTES (3076)
/******************************************************************************/
static bool
brw_is_perf_query_ready(struct gl_context *ctx,
struct gl_perf_query_object *o);
static void
dump_perf_query_callback(void *query_void, void *brw_void)
{
struct brw_context *ctx = brw_void;
struct intel_perf_context *perf_ctx = ctx->perf_ctx;
struct gl_perf_query_object *o = query_void;
struct brw_perf_query_object * brw_query = brw_perf_query(o);
struct intel_perf_query_object *obj = brw_query->query;
DBG("%4d: %-6s %-8s ",
o->Id,
o->Used ? "Dirty," : "New,",
o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"));
intel_perf_dump_query(perf_ctx, obj, &ctx->batch);
}
static void
dump_perf_queries(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
intel_perf_dump_query_count(brw->perf_ctx);
_mesa_HashWalk(ctx->PerfQuery.Objects, dump_perf_query_callback, brw);
}
/**
* Driver hook for glGetPerfQueryInfoINTEL().
*/
static void
brw_get_perf_query_info(struct gl_context *ctx,
unsigned query_index,
const char **name,
GLuint *data_size,
GLuint *n_counters,
GLuint *n_active)
{
struct brw_context *brw = brw_context(ctx);
struct intel_perf_context *perf_ctx = brw->perf_ctx;
struct intel_perf_config *perf_cfg = intel_perf_config(perf_ctx);
const struct intel_perf_query_info *query = &perf_cfg->queries[query_index];
*name = query->name;
*data_size = query->data_size;
*n_counters = query->n_counters;
*n_active = intel_perf_active_queries(perf_ctx, query);
}
static GLuint
intel_counter_type_enum_to_gl_type(enum intel_perf_counter_type type)
{
switch (type) {
case INTEL_PERF_COUNTER_TYPE_EVENT: return GL_PERFQUERY_COUNTER_EVENT_INTEL;
case INTEL_PERF_COUNTER_TYPE_DURATION_NORM: return GL_PERFQUERY_COUNTER_DURATION_NORM_INTEL;
case INTEL_PERF_COUNTER_TYPE_DURATION_RAW: return GL_PERFQUERY_COUNTER_DURATION_RAW_INTEL;
case INTEL_PERF_COUNTER_TYPE_THROUGHPUT: return GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL;
case INTEL_PERF_COUNTER_TYPE_RAW: return GL_PERFQUERY_COUNTER_RAW_INTEL;
case INTEL_PERF_COUNTER_TYPE_TIMESTAMP: return GL_PERFQUERY_COUNTER_TIMESTAMP_INTEL;
default:
unreachable("Unknown counter type");
}
}
static GLuint
intel_counter_data_type_to_gl_type(enum intel_perf_counter_data_type type)
{
switch (type) {
case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32: return GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL;
case INTEL_PERF_COUNTER_DATA_TYPE_UINT32: return GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL;
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64: return GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT: return GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: return GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL;
default:
unreachable("Unknown counter data type");
}
}
/**
* Driver hook for glGetPerfCounterInfoINTEL().
*/
static void
brw_get_perf_counter_info(struct gl_context *ctx,
unsigned query_index,
unsigned counter_index,
const char **name,
const char **desc,
GLuint *offset,
GLuint *data_size,
GLuint *type_enum,
GLuint *data_type_enum,
GLuint64 *raw_max)
{
struct brw_context *brw = brw_context(ctx);
struct intel_perf_config *perf_cfg = intel_perf_config(brw->perf_ctx);
const struct intel_perf_query_info *query =
&perf_cfg->queries[query_index];
const struct intel_perf_query_counter *counter =
&query->counters[counter_index];
*name = counter->name;
*desc = counter->desc;
*offset = counter->offset;
*data_size = intel_perf_query_counter_get_size(counter);
*type_enum = intel_counter_type_enum_to_gl_type(counter->type);
*data_type_enum = intel_counter_data_type_to_gl_type(counter->data_type);
*raw_max = counter->raw_max;
}
enum OaReadStatus {
OA_READ_STATUS_ERROR,
OA_READ_STATUS_UNFINISHED,
OA_READ_STATUS_FINISHED,
};
/******************************************************************************/
/**
* Driver hook for glBeginPerfQueryINTEL().
*/
static bool
brw_begin_perf_query(struct gl_context *ctx,
struct gl_perf_query_object *o)
{
struct brw_context *brw = brw_context(ctx);
struct brw_perf_query_object *brw_query = brw_perf_query(o);
struct intel_perf_query_object *obj = brw_query->query;
struct intel_perf_context *perf_ctx = brw->perf_ctx;
/* We can assume the frontend hides mistaken attempts to Begin a
* query object multiple times before its End. Similarly if an
* application reuses a query object before results have arrived
* the frontend will wait for prior results so we don't need
* to support abandoning in-flight results.
*/
assert(!o->Active);
assert(!o->Used || o->Ready); /* no in-flight query to worry about */
DBG("Begin(%d)\n", o->Id);
bool ret = intel_perf_begin_query(perf_ctx, obj);
if (INTEL_DEBUG(DEBUG_PERFMON))
dump_perf_queries(brw);
return ret;
}
/**
* Driver hook for glEndPerfQueryINTEL().
*/
static void
brw_end_perf_query(struct gl_context *ctx,
struct gl_perf_query_object *o)
{
struct brw_context *brw = brw_context(ctx);
struct brw_perf_query_object *brw_query = brw_perf_query(o);
struct intel_perf_query_object *obj = brw_query->query;
struct intel_perf_context *perf_ctx = brw->perf_ctx;
DBG("End(%d)\n", o->Id);
intel_perf_end_query(perf_ctx, obj);
}
static void
brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o)
{
struct brw_context *brw = brw_context(ctx);
struct brw_perf_query_object *brw_query = brw_perf_query(o);
struct intel_perf_query_object *obj = brw_query->query;
assert(!o->Ready);
intel_perf_wait_query(brw->perf_ctx, obj, &brw->batch);
}
static bool
brw_is_perf_query_ready(struct gl_context *ctx,
struct gl_perf_query_object *o)
{
struct brw_context *brw = brw_context(ctx);
struct brw_perf_query_object *brw_query = brw_perf_query(o);
struct intel_perf_query_object *obj = brw_query->query;
if (o->Ready)
return true;
return intel_perf_is_query_ready(brw->perf_ctx, obj, &brw->batch);
}
/**
* Driver hook for glGetPerfQueryDataINTEL().
*/
static bool
brw_get_perf_query_data(struct gl_context *ctx,
struct gl_perf_query_object *o,
GLsizei data_size,
GLuint *data,
GLuint *bytes_written)
{
struct brw_context *brw = brw_context(ctx);
struct brw_perf_query_object *brw_query = brw_perf_query(o);
struct intel_perf_query_object *obj = brw_query->query;
assert(brw_is_perf_query_ready(ctx, o));
DBG("GetData(%d)\n", o->Id);
if (INTEL_DEBUG(DEBUG_PERFMON))
dump_perf_queries(brw);
/* We expect that the frontend only calls this hook when it knows
* that results are available.
*/
assert(o->Ready);
intel_perf_get_query_data(brw->perf_ctx, obj, &brw->batch,
data_size, data, bytes_written);
return true;
}
static struct gl_perf_query_object *
brw_new_perf_query_object(struct gl_context *ctx, unsigned query_index)
{
struct brw_context *brw = brw_context(ctx);
struct intel_perf_context *perf_ctx = brw->perf_ctx;
struct intel_perf_query_object * obj = intel_perf_new_query(perf_ctx, query_index);
if (unlikely(!obj))
return NULL;
struct brw_perf_query_object *brw_query = calloc(1, sizeof(struct brw_perf_query_object));
if (unlikely(!brw_query)) {
intel_perf_delete_query(perf_ctx, obj);
return NULL;
}
brw_query->query = obj;
return &brw_query->base;
}
/**
* Driver hook for glDeletePerfQueryINTEL().
*/
static void
brw_delete_perf_query(struct gl_context *ctx,
struct gl_perf_query_object *o)
{
struct brw_context *brw = brw_context(ctx);
struct brw_perf_query_object *brw_query = brw_perf_query(o);
struct intel_perf_query_object *obj = brw_query->query;
struct intel_perf_context *perf_ctx = brw->perf_ctx;
/* We can assume that the frontend waits for a query to complete
* before ever calling into here, so we don't have to worry about
* deleting an in-flight query object.
*/
assert(!o->Active);
assert(!o->Used || o->Ready);
DBG("Delete(%d)\n", o->Id);
intel_perf_delete_query(perf_ctx, obj);
free(brw_query);
}
/******************************************************************************/
/* intel_device_info will have incorrect default topology values for unsupported
* kernels. Verify kernel support to ensure OA metrics are accurate.
*/
static bool
oa_metrics_kernel_support(int fd, const struct intel_device_info *devinfo)
{
if (devinfo->ver >= 10) {
/* topology uAPI required for CNL+ (kernel 4.17+) make a call to the api
* to verify support
*/
struct drm_i915_query_item item = {
.query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
};
struct drm_i915_query query = {
.num_items = 1,
.items_ptr = (uintptr_t) &item,
};
/* kernel 4.17+ supports the query */
return drmIoctl(fd, DRM_IOCTL_I915_QUERY, &query) == 0;
}
if (devinfo->ver >= 8) {
/* 4.13+ api required for gfx8 - gfx9 */
int mask;
struct drm_i915_getparam gp = {
.param = I915_PARAM_SLICE_MASK,
.value = &mask,
};
/* kernel 4.13+ supports this parameter */
return drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0;
}
if (devinfo->ver == 7)
/* default topology values are correct for HSW */
return true;
/* oa not supported before gen 7*/
return false;
}
static void *
brw_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size)
{
return brw_bo_alloc(bufmgr, name, size, BRW_MEMZONE_OTHER);
}
static void
brw_oa_emit_mi_report_perf_count(void *c,
void *bo,
uint32_t offset_in_bytes,
uint32_t report_id)
{
struct brw_context *ctx = c;
ctx->vtbl.emit_mi_report_perf_count(ctx,
bo,
offset_in_bytes,
report_id);
}
typedef void (*bo_unreference_t)(void *);
typedef void *(*bo_map_t)(void *, void *, unsigned flags);
typedef void (*bo_unmap_t)(void *);
typedef void (* emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
typedef void (*emit_mi_flush_t)(void *);
static void
brw_oa_batchbuffer_flush(void *c, const char *file, int line)
{
struct brw_context *ctx = c;
_brw_batch_flush_fence(ctx, -1, NULL, file, line);
}
static void
brw_oa_emit_stall_at_pixel_scoreboard(void *c)
{
struct brw_context *brw = c;
brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_STALL_AT_SCOREBOARD);
}
static void
brw_perf_store_register(struct brw_context *brw, struct brw_bo *bo,
uint32_t reg, uint32_t reg_size,
uint32_t offset)
{
if (reg_size == 8) {
brw_store_register_mem64(brw, bo, reg, offset);
} else {
assert(reg_size == 4);
brw_store_register_mem32(brw, bo, reg, offset);
}
}
typedef void (*store_register_mem_t)(void *ctx, void *bo,
uint32_t reg, uint32_t reg_size,
uint32_t offset);
typedef bool (*batch_references_t)(void *batch, void *bo);
typedef void (*bo_wait_rendering_t)(void *bo);
typedef int (*bo_busy_t)(void *bo);
static unsigned
brw_init_perf_query_info(struct gl_context *ctx)
{
struct brw_context *brw = brw_context(ctx);
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct intel_perf_context *perf_ctx = brw->perf_ctx;
struct intel_perf_config *perf_cfg = intel_perf_config(perf_ctx);
if (perf_cfg)
return perf_cfg->n_queries;
if (!oa_metrics_kernel_support(brw->screen->fd, devinfo))
return 0;
perf_cfg = intel_perf_new(brw->mem_ctx);
perf_cfg->vtbl.bo_alloc = brw_oa_bo_alloc;
perf_cfg->vtbl.bo_unreference = (bo_unreference_t)brw_bo_unreference;
perf_cfg->vtbl.bo_map = (bo_map_t)brw_bo_map;
perf_cfg->vtbl.bo_unmap = (bo_unmap_t)brw_bo_unmap;
perf_cfg->vtbl.emit_stall_at_pixel_scoreboard =
(emit_mi_flush_t)brw_oa_emit_stall_at_pixel_scoreboard;
perf_cfg->vtbl.emit_mi_report_perf_count =
(emit_mi_report_t)brw_oa_emit_mi_report_perf_count;
perf_cfg->vtbl.batchbuffer_flush = brw_oa_batchbuffer_flush;
perf_cfg->vtbl.store_register_mem =
(store_register_mem_t) brw_perf_store_register;
perf_cfg->vtbl.batch_references = (batch_references_t)brw_batch_references;
perf_cfg->vtbl.bo_wait_rendering = (bo_wait_rendering_t)brw_bo_wait_rendering;
perf_cfg->vtbl.bo_busy = (bo_busy_t)brw_bo_busy;
intel_perf_init_metrics(perf_cfg, devinfo, brw->screen->fd,
true /* pipeline stats */,
true /* register snapshots */);
intel_perf_init_context(perf_ctx, perf_cfg, brw->mem_ctx, brw, brw->bufmgr,
devinfo, brw->hw_ctx, brw->screen->fd);
return perf_cfg->n_queries;
}
void
brw_init_performance_queries(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
ctx->Driver.InitPerfQueryInfo = brw_init_perf_query_info;
ctx->Driver.GetPerfQueryInfo = brw_get_perf_query_info;
ctx->Driver.GetPerfCounterInfo = brw_get_perf_counter_info;
ctx->Driver.NewPerfQueryObject = brw_new_perf_query_object;
ctx->Driver.DeletePerfQuery = brw_delete_perf_query;
ctx->Driver.BeginPerfQuery = brw_begin_perf_query;
ctx->Driver.EndPerfQuery = brw_end_perf_query;
ctx->Driver.WaitPerfQuery = brw_wait_perf_query;
ctx->Driver.IsPerfQueryReady = brw_is_perf_query_ready;
ctx->Driver.GetPerfQueryData = brw_get_perf_query_data;
}

View file

@ -1,454 +0,0 @@
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_state.h"
#include "brw_batch.h"
#include "brw_fbo.h"
/**
* Emit a PIPE_CONTROL with various flushing flags.
*
* The caller is responsible for deciding what flags are appropriate for the
* given generation.
*/
void
brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
if (devinfo->ver >= 6 &&
(flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
(flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
/* A pipe control command with flush and invalidate bits set
* simultaneously is an inherently racy operation on Gfx6+ if the
* contents of the flushed caches were intended to become visible from
* any of the invalidated caches. Split it in two PIPE_CONTROLs, the
* first one should stall the pipeline to make sure that the flushed R/W
* caches are coherent with memory once the specified R/O caches are
* invalidated. On pre-Gfx6 hardware the (implicit) R/O cache
* invalidation seems to happen at the bottom of the pipeline together
* with any write cache flush, so this shouldn't be a concern. In order
* to ensure a full stall, we do an end-of-pipe sync.
*/
brw_emit_end_of_pipe_sync(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS));
flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
}
brw->vtbl.emit_raw_pipe_control(brw, flags, NULL, 0, 0);
}
/**
* Emit a PIPE_CONTROL that writes to a buffer object.
*
* \p flags should contain one of the following items:
* - PIPE_CONTROL_WRITE_IMMEDIATE
* - PIPE_CONTROL_WRITE_TIMESTAMP
* - PIPE_CONTROL_WRITE_DEPTH_COUNT
*/
void
brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
struct brw_bo *bo, uint32_t offset,
uint64_t imm)
{
brw->vtbl.emit_raw_pipe_control(brw, flags, bo, offset, imm);
}
/**
* Restriction [DevSNB, DevIVB]:
*
* Prior to changing Depth/Stencil Buffer state (i.e. any combination of
* 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
* 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
* (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
* cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
* another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
* unless SW can otherwise guarantee that the pipeline from WM onwards is
* already flushed (e.g., via a preceding MI_FLUSH).
*/
void
brw_emit_depth_stall_flushes(struct brw_context *brw)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
assert(devinfo->ver >= 6);
/* Starting on BDW, these pipe controls are unnecessary.
*
* WM HW will internally manage the draining pipe and flushing of the caches
* when this command is issued. The PIPE_CONTROL restrictions are removed.
*/
if (devinfo->ver >= 8)
return;
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
}
/**
* From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input):
* "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
* stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
* 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs
* to be sent before any combination of VS associated 3DSTATE."
*/
void
gfx7_emit_vs_workaround_flush(struct brw_context *brw)
{
ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo;
assert(devinfo->ver == 7);
brw_emit_pipe_control_write(brw,
PIPE_CONTROL_WRITE_IMMEDIATE
| PIPE_CONTROL_DEPTH_STALL,
brw->workaround_bo,
brw->workaround_bo_offset, 0);
}
/**
* From the PRM, Volume 2a:
*
* "Indirect State Pointers Disable
*
* At the completion of the post-sync operation associated with this pipe
* control packet, the indirect state pointers in the hardware are
* considered invalid; the indirect pointers are not saved in the context.
* If any new indirect state commands are executed in the command stream
* while the pipe control is pending, the new indirect state commands are
* preserved.
*
* [DevIVB+]: Using Invalidate State Pointer (ISP) only inhibits context
* restoring of Push Constant (3DSTATE_CONSTANT_*) commands. Push Constant
* commands are only considered as Indirect State Pointers. Once ISP is
* issued in a context, SW must initialize by programming push constant
* commands for all the shaders (at least to zero length) before attempting
* any rendering operation for the same context."
*
* 3DSTATE_CONSTANT_* packets are restored during a context restore,
* even though they point to a BO that has been already unreferenced at
* the end of the previous batch buffer. This has been fine so far since
* we are protected by these scratch page (every address not covered by
* a BO should be pointing to the scratch page). But on CNL, it is
* causing a GPU hang during context restore at the 3DSTATE_CONSTANT_*
* instruction.
*
* The flag "Indirect State Pointers Disable" in PIPE_CONTROL tells the
* hardware to ignore previous 3DSTATE_CONSTANT_* packets during a
* context restore, so the mentioned hang doesn't happen. However,
* software must program push constant commands for all stages prior to
* rendering anything, so we flag them as dirty.
*
* Finally, we also make sure to stall at pixel scoreboard to make sure the
* constants have been loaded into the EUs prior to disable the push constants
* so that it doesn't hang a previous 3DPRIMITIVE.
*/
void
gfx7_emit_isp_disable(struct brw_context *brw)
{
brw->vtbl.emit_raw_pipe_control(brw,
PIPE_CONTROL_STALL_AT_SCOREBOARD |
PIPE_CONTROL_CS_STALL,
NULL, 0, 0);
brw->vtbl.emit_raw_pipe_control(brw,
PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |
PIPE_CONTROL_CS_STALL,
NULL, 0, 0);
brw->vs.base.push_constants_dirty = true;
brw->tcs.base.push_constants_dirty = true;
brw->tes.base.push_constants_dirty = true;
brw->gs.base.push_constants_dirty = true;
brw->wm.base.push_constants_dirty = true;
}
/**
* Emit a PIPE_CONTROL command for gfx7 with the CS Stall bit set.
*/
void
gfx7_emit_cs_stall_flush(struct brw_context *brw)
{
brw_emit_pipe_control_write(brw,
PIPE_CONTROL_CS_STALL
| PIPE_CONTROL_WRITE_IMMEDIATE,
brw->workaround_bo,
brw->workaround_bo_offset, 0);
}
/**
* Emits a PIPE_CONTROL with a non-zero post-sync operation, for
* implementing two workarounds on gfx6. From section 1.4.7.1
* "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
*
* [DevSNB-C+{W/A}] Before any depth stall flush (including those
* produced by non-pipelined state commands), software needs to first
* send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
* 0.
*
* [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
* =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
*
* And the workaround for these two requires this workaround first:
*
* [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
* BEFORE the pipe-control with a post-sync op and no write-cache
* flushes.
*
* And this last workaround is tricky because of the requirements on
* that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
* volume 2 part 1:
*
* "1 of the following must also be set:
* - Render Target Cache Flush Enable ([12] of DW1)
* - Depth Cache Flush Enable ([0] of DW1)
* - Stall at Pixel Scoreboard ([1] of DW1)
* - Depth Stall ([13] of DW1)
* - Post-Sync Operation ([13] of DW1)
* - Notify Enable ([8] of DW1)"
*
* The cache flushes require the workaround flush that triggered this
* one, so we can't use it. Depth stall would trigger the same.
* Post-sync nonzero is what triggered this second workaround, so we
* can't use that one either. Notify enable is IRQs, which aren't
* really our business. That leaves only stall at scoreboard.
*/
void
brw_emit_post_sync_nonzero_flush(struct brw_context *brw)
{
brw_emit_pipe_control_flush(brw,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_STALL_AT_SCOREBOARD);
brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
brw->workaround_bo,
brw->workaround_bo_offset, 0);
}
/*
* From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization":
*
* Write synchronization is a special case of end-of-pipe
* synchronization that requires that the render cache and/or depth
* related caches are flushed to memory, where the data will become
* globally visible. This type of synchronization is required prior to
* SW (CPU) actually reading the result data from memory, or initiating
* an operation that will use as a read surface (such as a texture
* surface) a previous render target and/or depth/stencil buffer
*
*
* From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
*
* Exercising the write cache flush bits (Render Target Cache Flush
* Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only
* ensures the write caches are flushed and doesn't guarantee the data
* is globally visible.
*
* SW can track the completion of the end-of-pipe-synchronization by
* using "Notify Enable" and "PostSync Operation - Write Immediate
* Data" in the PIPE_CONTROL command.
*/
void
brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
if (devinfo->ver >= 6) {
/* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
*
* "The most common action to perform upon reaching a synchronization
* point is to write a value out to memory. An immediate value
* (included with the synchronization command) may be written."
*
*
* From Broadwell PRM, volume 7, "End-of-Pipe Synchronization":
*
* "In case the data flushed out by the render engine is to be read
* back in to the render engine in coherent manner, then the render
* engine has to wait for the fence completion before accessing the
* flushed data. This can be achieved by following means on various
* products: PIPE_CONTROL command with CS Stall and the required
* write caches flushed with Post-Sync-Operation as Write Immediate
* Data.
*
* Example:
* - Workload-1 (3D/GPGPU/MEDIA)
* - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate
* Data, Required Write Cache Flush bits set)
* - Workload-2 (Can use the data produce or output by Workload-1)
*/
brw_emit_pipe_control_write(brw,
flags | PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_WRITE_IMMEDIATE,
brw->workaround_bo,
brw->workaround_bo_offset, 0);
if (devinfo->platform == INTEL_PLATFORM_HSW) {
/* Haswell needs addition work-arounds:
*
* From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
*
* Option 1:
* PIPE_CONTROL command with the CS Stall and the required write
* caches flushed with Post-SyncOperation as Write Immediate Data
* followed by eight dummy MI_STORE_DATA_IMM (write to scratch
* spce) commands.
*
* Example:
* - Workload-1
* - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write
* Immediate Data, Required Write Cache Flush bits set)
* - MI_STORE_DATA_IMM (8 times) (Dummy data, Scratch Address)
* - Workload-2 (Can use the data produce or output by
* Workload-1)
*
* Unfortunately, both the PRMs and the internal docs are a bit
* out-of-date in this regard. What the windows driver does (and
* this appears to actually work) is to emit a register read from the
* memory address written by the pipe control above.
*
* What register we load into doesn't matter. We choose an indirect
* rendering register because we know it always exists and it's one
* of the first registers the command parser allows us to write. If
* you don't have command parser support in your kernel (pre-4.2),
* this will get turned into MI_NOOP and you won't get the
* workaround. Unfortunately, there's just not much we can do in
* that case. This register is perfectly safe to write since we
* always re-load all of the indirect draw registers right before
* 3DPRIMITIVE when needed anyway.
*/
brw_load_register_mem(brw, GFX7_3DPRIM_START_INSTANCE,
brw->workaround_bo, brw->workaround_bo_offset);
}
} else {
/* On gfx4-5, a regular pipe control seems to suffice. */
brw_emit_pipe_control_flush(brw, flags);
}
}
/* Emit a pipelined flush to either flush render and texture cache for
* reading from a FBO-drawn texture, or flush so that frontbuffer
* render appears on the screen in DRI1.
*
* This is also used for the always_flush_cache driconf debug option.
*/
void
brw_emit_mi_flush(struct brw_context *brw)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH;
if (devinfo->ver >= 6) {
flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
PIPE_CONTROL_DATA_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_VF_CACHE_INVALIDATE |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_CS_STALL;
}
brw_emit_pipe_control_flush(brw, flags);
}
static bool
init_identifier_bo(struct brw_context *brw)
{
void *bo_map;
if (!can_do_exec_capture(brw->screen))
return true;
bo_map = brw_bo_map(NULL, brw->workaround_bo, MAP_READ | MAP_WRITE);
if (!bo_map)
return false;
brw->workaround_bo->kflags |= EXEC_OBJECT_CAPTURE;
brw->workaround_bo_offset =
ALIGN(intel_debug_write_identifiers(bo_map, 4096, "i965") + 8, 8);
brw_bo_unmap(brw->workaround_bo);
return true;
}
int
brw_init_pipe_control(struct brw_context *brw,
const struct intel_device_info *devinfo)
{
switch (devinfo->ver) {
case 11:
brw->vtbl.emit_raw_pipe_control = gfx11_emit_raw_pipe_control;
break;
case 9:
brw->vtbl.emit_raw_pipe_control = gfx9_emit_raw_pipe_control;
break;
case 8:
brw->vtbl.emit_raw_pipe_control = gfx8_emit_raw_pipe_control;
break;
case 7:
brw->vtbl.emit_raw_pipe_control =
devinfo->verx10 == 75 ?
gfx75_emit_raw_pipe_control : gfx7_emit_raw_pipe_control;
break;
case 6:
brw->vtbl.emit_raw_pipe_control = gfx6_emit_raw_pipe_control;
break;
case 5:
brw->vtbl.emit_raw_pipe_control = gfx5_emit_raw_pipe_control;
break;
case 4:
brw->vtbl.emit_raw_pipe_control =
devinfo->verx10 == 45 ?
gfx45_emit_raw_pipe_control : gfx4_emit_raw_pipe_control;
break;
default:
unreachable("Unhandled Gen.");
}
if (devinfo->ver < 6)
return 0;
/* We can't just use brw_state_batch to get a chunk of space for
* the gfx6 workaround because it involves actually writing to
* the buffer, and the kernel doesn't let us write to the batch.
*/
brw->workaround_bo = brw_bo_alloc(brw->bufmgr, "workaround", 4096,
BRW_MEMZONE_OTHER);
if (brw->workaround_bo == NULL)
return -ENOMEM;
if (!init_identifier_bo(brw))
return -ENOMEM; /* Couldn't map workaround_bo?? */
brw->workaround_bo_offset = 0;
brw->pipe_controls_since_last_cs_stall = 0;
return 0;
}
void
brw_fini_pipe_control(struct brw_context *brw)
{
brw_bo_unreference(brw->workaround_bo);
}

View file

@ -1,95 +0,0 @@
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef BRW_PIPE_CONTROL_DOT_H
#define BRW_PIPE_CONTROL_DOT_H
struct brw_context;
struct intel_device_info;
struct brw_bo;
/** @{
*
* PIPE_CONTROL operation, a combination MI_FLUSH and register write with
* additional flushing control.
*
* The bits here are not the actual hardware values. The actual values
* shift around a bit per-generation, so we just have flags for each
* potential operation, and use genxml to encode the actual packet.
*/
enum pipe_control_flags
{
PIPE_CONTROL_FLUSH_LLC = (1 << 1),
PIPE_CONTROL_LRI_POST_SYNC_OP = (1 << 2),
PIPE_CONTROL_STORE_DATA_INDEX = (1 << 3),
PIPE_CONTROL_CS_STALL = (1 << 4),
PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET = (1 << 5),
PIPE_CONTROL_SYNC_GFDT = (1 << 6),
PIPE_CONTROL_TLB_INVALIDATE = (1 << 7),
PIPE_CONTROL_MEDIA_STATE_CLEAR = (1 << 8),
PIPE_CONTROL_WRITE_IMMEDIATE = (1 << 9),
PIPE_CONTROL_WRITE_DEPTH_COUNT = (1 << 10),
PIPE_CONTROL_WRITE_TIMESTAMP = (1 << 11),
PIPE_CONTROL_DEPTH_STALL = (1 << 12),
PIPE_CONTROL_RENDER_TARGET_FLUSH = (1 << 13),
PIPE_CONTROL_INSTRUCTION_INVALIDATE = (1 << 14),
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE = (1 << 15),
PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
PIPE_CONTROL_NOTIFY_ENABLE = (1 << 17),
PIPE_CONTROL_FLUSH_ENABLE = (1 << 18),
PIPE_CONTROL_DATA_CACHE_FLUSH = (1 << 19),
PIPE_CONTROL_VF_CACHE_INVALIDATE = (1 << 20),
PIPE_CONTROL_CONST_CACHE_INVALIDATE = (1 << 21),
PIPE_CONTROL_STATE_CACHE_INVALIDATE = (1 << 22),
PIPE_CONTROL_STALL_AT_SCOREBOARD = (1 << 23),
PIPE_CONTROL_DEPTH_CACHE_FLUSH = (1 << 24),
};
#define PIPE_CONTROL_CACHE_FLUSH_BITS \
(PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \
PIPE_CONTROL_RENDER_TARGET_FLUSH)
#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \
(PIPE_CONTROL_STATE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | \
PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
PIPE_CONTROL_INSTRUCTION_INVALIDATE)
/** @} */
int brw_init_pipe_control(struct brw_context *brw,
const struct intel_device_info *info);
void brw_fini_pipe_control(struct brw_context *brw);
void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
struct brw_bo *bo, uint32_t offset,
uint64_t imm);
void brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags);
void brw_emit_mi_flush(struct brw_context *brw);
void brw_emit_post_sync_nonzero_flush(struct brw_context *brw);
void brw_emit_depth_stall_flushes(struct brw_context *brw);
void gfx7_emit_vs_workaround_flush(struct brw_context *brw);
void gfx7_emit_cs_stall_flush(struct brw_context *brw);
void gfx7_emit_isp_disable(struct brw_context *brw);
#endif

View file

@ -1,133 +0,0 @@
/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portionsalloc
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "main/accum.h"
#include "main/enums.h"
#include "main/state.h"
#include "main/stencil.h"
#include "main/bufferobj.h"
#include "main/context.h"
#include "swrast/swrast.h"
#include "brw_context.h"
#include "brw_pixel.h"
#define FILE_DEBUG_FLAG DEBUG_PIXEL
static GLenum
effective_func(GLenum func, bool src_alpha_is_one)
{
if (src_alpha_is_one) {
if (func == GL_SRC_ALPHA)
return GL_ONE;
if (func == GL_ONE_MINUS_SRC_ALPHA)
return GL_ZERO;
}
return func;
}
/**
* Check if any fragment operations are in effect which might effect
* glDraw/CopyPixels.
*/
bool
brw_check_blit_fragment_ops(struct gl_context * ctx, bool src_alpha_is_one)
{
if (ctx->NewState)
_mesa_update_state(ctx);
if (_mesa_arb_fragment_program_enabled(ctx)) {
DBG("fallback due to fragment program\n");
return false;
}
if (ctx->Color.BlendEnabled &&
(effective_func(ctx->Color.Blend[0].SrcRGB, src_alpha_is_one) != GL_ONE ||
effective_func(ctx->Color.Blend[0].DstRGB, src_alpha_is_one) != GL_ZERO ||
ctx->Color.Blend[0].EquationRGB != GL_FUNC_ADD ||
effective_func(ctx->Color.Blend[0].SrcA, src_alpha_is_one) != GL_ONE ||
effective_func(ctx->Color.Blend[0].DstA, src_alpha_is_one) != GL_ZERO ||
ctx->Color.Blend[0].EquationA != GL_FUNC_ADD)) {
DBG("fallback due to blend\n");
return false;
}
if (ctx->Texture._MaxEnabledTexImageUnit != -1) {
DBG("fallback due to texturing\n");
return false;
}
if (GET_COLORMASK(ctx->Color.ColorMask, 0) != 0xf) {
DBG("fallback due to color masking\n");
return false;
}
if (ctx->Color.AlphaEnabled) {
DBG("fallback due to alpha\n");
return false;
}
if (ctx->Depth.Test) {
DBG("fallback due to depth test\n");
return false;
}
if (ctx->Fog.Enabled) {
DBG("fallback due to fog\n");
return false;
}
if (ctx->_ImageTransferState) {
DBG("fallback due to image transfer\n");
return false;
}
if (_mesa_stencil_is_enabled(ctx)) {
DBG("fallback due to image stencil\n");
return false;
}
if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) {
DBG("fallback due to pixel zoom\n");
return false;
}
if (ctx->RenderMode != GL_RENDER) {
DBG("fallback due to render mode\n");
return false;
}
return true;
}
void
brw_init_pixel_functions(struct dd_function_table *functions)
{
functions->Bitmap = brw_bitmap;
functions->CopyPixels = brw_copypixels;
functions->DrawPixels = brw_drawpixels;
functions->ReadPixels = brw_readpixels;
}

View file

@ -1,61 +0,0 @@
/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef BRW_PIXEL_H
#define BRW_PIXEL_H
#include "main/mtypes.h"
void brw_init_pixel_functions(struct dd_function_table *functions);
bool brw_check_blit_fragment_ops(struct gl_context *ctx,
bool src_alpha_is_one);
void brw_readpixels(struct gl_context *ctx,
GLint x, GLint y,
GLsizei width, GLsizei height,
GLenum format, GLenum type,
const struct gl_pixelstore_attrib *pack,
GLvoid *pixels);
void brw_drawpixels(struct gl_context *ctx,
GLint x, GLint y,
GLsizei width, GLsizei height,
GLenum format,
GLenum type,
const struct gl_pixelstore_attrib *unpack,
const GLvoid *pixels);
void brw_copypixels(struct gl_context *ctx,
GLint srcx, GLint srcy,
GLsizei width, GLsizei height,
GLint destx, GLint desty, GLenum type);
void brw_bitmap(struct gl_context *ctx,
GLint x, GLint y,
GLsizei width, GLsizei height,
const struct gl_pixelstore_attrib *unpack,
const GLubyte *pixels);
#endif

View file

@ -1,363 +0,0 @@
/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portionsalloc
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "main/blend.h"
#include "main/enums.h"
#include "main/image.h"
#include "main/colormac.h"
#include "main/condrender.h"
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/pbo.h"
#include "main/bufferobj.h"
#include "main/state.h"
#include "main/texobj.h"
#include "main/context.h"
#include "main/fbobject.h"
#include "swrast/swrast.h"
#include "drivers/common/meta.h"
#include "brw_context.h"
#include "brw_screen.h"
#include "brw_batch.h"
#include "brw_blit.h"
#include "brw_fbo.h"
#include "brw_image.h"
#include "brw_buffers.h"
#include "brw_pixel.h"
#define FILE_DEBUG_FLAG DEBUG_PIXEL
/* Unlike the other intel_pixel_* functions, the expectation here is
* that the incoming data is not in a PBO. With the XY_TEXT blit
* method, there's no benefit haveing it in a PBO, but we could
* implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit
* PBO bitmaps. I think they are probably pretty rare though - I
* wonder if Xgl uses them?
*/
static const GLubyte *
map_pbo(struct gl_context *ctx,
GLsizei width, GLsizei height,
const struct gl_pixelstore_attrib *unpack,
const GLubyte *bitmap)
{
GLubyte *buf;
if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
GL_COLOR_INDEX, GL_BITMAP,
INT_MAX, (const GLvoid *) bitmap)) {
_mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)");
return NULL;
}
buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
GL_MAP_READ_BIT,
unpack->BufferObj,
MAP_INTERNAL);
if (!buf) {
_mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
return NULL;
}
return ADD_POINTERS(buf, bitmap);
}
static bool test_bit( const GLubyte *src, GLuint bit )
{
return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
}
static void set_bit( GLubyte *dest, GLuint bit )
{
dest[bit/8] |= 1 << (bit % 8);
}
/* Extract a rectangle's worth of data from the bitmap. Called
* per chunk of HW-sized bitmap.
*/
static GLuint
get_bitmap_rect(GLsizei width, GLsizei height,
const struct gl_pixelstore_attrib *unpack,
const GLubyte *bitmap,
GLuint x, GLuint y,
GLuint w, GLuint h,
GLubyte *dest,
GLuint row_align,
bool invert)
{
GLuint src_offset = (x + unpack->SkipPixels) & 0x7;
GLuint mask = unpack->LsbFirst ? 0 : 7;
GLuint bit = 0;
GLint row, col;
GLint first, last;
GLint incr;
GLuint count = 0;
DBG("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n",
__func__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask);
if (invert) {
first = h-1;
last = 0;
incr = -1;
}
else {
first = 0;
last = h-1;
incr = 1;
}
/* Require that dest be pre-zero'd.
*/
for (row = first; row != (last+incr); row += incr) {
const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap,
width, height,
GL_COLOR_INDEX, GL_BITMAP,
y + row, x);
for (col = 0; col < w; col++, bit++) {
if (test_bit(rowsrc, (col + src_offset) ^ mask)) {
set_bit(dest, bit ^ 7);
count++;
}
}
if (row_align)
bit = ALIGN(bit, row_align);
}
return count;
}
/**
* Returns the low Y value of the vertical range given, flipped according to
* whether the framebuffer is or not.
*/
static inline int
y_flip(struct gl_framebuffer *fb, int y, int height)
{
if (fb->FlipY)
return fb->Height - y - height;
else
return y;
}
/*
* Render a bitmap.
*/
static bool
do_blit_bitmap(struct gl_context *ctx,
GLint dstx, GLint dsty,
GLsizei width, GLsizei height,
const struct gl_pixelstore_attrib *unpack,
const GLubyte *bitmap)
{
struct brw_context *brw = brw_context(ctx);
struct gl_framebuffer *fb = ctx->DrawBuffer;
struct brw_renderbuffer *irb;
GLfloat tmpColor[4];
GLubyte ubcolor[4];
GLuint color;
GLsizei bitmap_width = width;
GLsizei bitmap_height = height;
GLint px, py;
GLuint stipple[32];
GLint orig_dstx = dstx;
GLint orig_dsty = dsty;
/* Update draw buffer bounds */
_mesa_update_state(ctx);
if (ctx->Depth.Test) {
/* The blit path produces incorrect results when depth testing is on.
* It seems the blit Z coord is always 1.0 (the far plane) so fragments
* will likely be obscured by other, closer geometry.
*/
return false;
}
brw_prepare_render(brw);
if (fb->_NumColorDrawBuffers != 1) {
perf_debug("accelerated glBitmap() only supports rendering to a "
"single color buffer\n");
return false;
}
irb = brw_renderbuffer(fb->_ColorDrawBuffers[0]);
if (unpack->BufferObj) {
bitmap = map_pbo(ctx, width, height, unpack, bitmap);
if (bitmap == NULL)
return true; /* even though this is an error, we're done */
}
COPY_4V(tmpColor, ctx->Current.RasterColor);
if (_mesa_need_secondary_color(ctx)) {
ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor);
}
UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[0], tmpColor[0]);
UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[1], tmpColor[1]);
UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]);
UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]);
switch (_mesa_get_render_format(ctx, brw_rb_format(irb))) {
case MESA_FORMAT_B8G8R8A8_UNORM:
case MESA_FORMAT_B8G8R8X8_UNORM:
color = PACK_COLOR_8888(ubcolor[3], ubcolor[0], ubcolor[1], ubcolor[2]);
break;
case MESA_FORMAT_B5G6R5_UNORM:
color = PACK_COLOR_565(ubcolor[0], ubcolor[1], ubcolor[2]);
break;
default:
perf_debug("Unsupported format %s in accelerated glBitmap()\n",
_mesa_get_format_name(irb->mt->format));
return false;
}
if (!brw_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F))
return false;
/* Clip to buffer bounds and scissor. */
if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
fb->_Xmax, fb->_Ymax,
&dstx, &dsty, &width, &height))
goto out;
dsty = y_flip(fb, dsty, height);
#define DY 32
#define DX 32
/* The blitter has no idea about fast color clears, so we need to resolve
* the miptree before we do anything.
*/
brw_miptree_access_raw(brw, irb->mt, irb->mt_level, irb->mt_layer, true);
/* Chop it all into chunks that can be digested by hardware: */
for (py = 0; py < height; py += DY) {
for (px = 0; px < width; px += DX) {
int h = MIN2(DY, height - py);
int w = MIN2(DX, width - px);
GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8;
const enum gl_logicop_mode logic_op = ctx->Color.ColorLogicOpEnabled ?
ctx->Color._LogicOp : COLOR_LOGICOP_COPY;
assert(sz <= sizeof(stipple));
memset(stipple, 0, sz);
/* May need to adjust this when padding has been introduced in
* sz above:
*
* Have to translate destination coordinates back into source
* coordinates.
*/
int count = get_bitmap_rect(bitmap_width, bitmap_height, unpack,
bitmap,
-orig_dstx + (dstx + px),
-orig_dsty + y_flip(fb, dsty + py, h),
w, h,
(GLubyte *)stipple,
8,
fb->FlipY);
if (count == 0)
continue;
if (!brw_emit_immediate_color_expand_blit(brw,
irb->mt->cpp,
(GLubyte *)stipple,
sz,
color,
irb->mt->surf.row_pitch_B,
irb->mt->bo,
irb->mt->offset,
irb->mt->surf.tiling,
dstx + px,
dsty + py,
w, h,
logic_op)) {
return false;
}
if (ctx->Query.CurrentOcclusionObject)
ctx->Query.CurrentOcclusionObject->Result += count;
}
}
out:
if (INTEL_DEBUG(DEBUG_SYNC))
brw_batch_flush(brw);
if (unpack->BufferObj) {
/* done with PBO so unmap it now */
ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj, MAP_INTERNAL);
}
return true;
}
/* There are a large number of possible ways to implement bitmap on
* this hardware, most of them have some sort of drawback. Here are a
* few that spring to mind:
*
* Blit:
* - XY_MONO_SRC_BLT_CMD
* - use XY_SETUP_CLIP_BLT for cliprect clipping.
* - XY_TEXT_BLT
* - XY_TEXT_IMMEDIATE_BLT
* - blit per cliprect, subject to maximum immediate data size.
* - XY_COLOR_BLT
* - per pixel or run of pixels
* - XY_PIXEL_BLT
* - good for sparse bitmaps
*
* 3D engine:
* - Point per pixel
* - Translate bitmap to an alpha texture and render as a quad
* - Chop bitmap up into 32x32 squares and render w/polygon stipple.
*/
void
brw_bitmap(struct gl_context * ctx,
GLint x, GLint y,
GLsizei width, GLsizei height,
const struct gl_pixelstore_attrib *unpack,
const GLubyte * pixels)
{
struct brw_context *brw = brw_context(ctx);
if (!_mesa_check_conditional_render(ctx))
return;
if (brw->screen->devinfo.ver < 6 &&
do_blit_bitmap(ctx, x, y, width, height, unpack, pixels))
return;
_mesa_meta_Bitmap(ctx, x, y, width, height, unpack, pixels);
}

View file

@ -1,212 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "main/image.h"
#include "main/state.h"
#include "main/stencil.h"
#include "main/mtypes.h"
#include "main/condrender.h"
#include "main/fbobject.h"
#include "drivers/common/meta.h"
#include "brw_context.h"
#include "brw_buffers.h"
#include "brw_mipmap_tree.h"
#include "brw_pixel.h"
#include "brw_fbo.h"
#include "brw_blit.h"
#include "brw_batch.h"
#define FILE_DEBUG_FLAG DEBUG_PIXEL
/**
* CopyPixels with the blitter. Don't support zooming, pixel transfer, etc.
*/
static bool
do_blit_copypixels(struct gl_context * ctx,
GLint srcx, GLint srcy,
GLsizei width, GLsizei height,
GLint dstx, GLint dsty, GLenum type)
{
struct brw_context *brw = brw_context(ctx);
struct gl_framebuffer *fb = ctx->DrawBuffer;
struct gl_framebuffer *read_fb = ctx->ReadBuffer;
GLint orig_dstx;
GLint orig_dsty;
GLint orig_srcx;
GLint orig_srcy;
struct brw_renderbuffer *draw_irb = NULL;
struct brw_renderbuffer *read_irb = NULL;
/* Update draw buffer bounds */
_mesa_update_state(ctx);
brw_prepare_render(brw);
switch (type) {
case GL_COLOR:
if (fb->_NumColorDrawBuffers != 1) {
perf_debug("glCopyPixels() fallback: MRT\n");
return false;
}
draw_irb = brw_renderbuffer(fb->_ColorDrawBuffers[0]);
read_irb = brw_renderbuffer(read_fb->_ColorReadBuffer);
break;
case GL_DEPTH_STENCIL_EXT:
draw_irb = brw_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer);
read_irb =
brw_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
break;
case GL_DEPTH:
perf_debug("glCopyPixels() fallback: GL_DEPTH\n");
return false;
case GL_STENCIL:
perf_debug("glCopyPixels() fallback: GL_STENCIL\n");
return false;
default:
perf_debug("glCopyPixels(): Unknown type\n");
return false;
}
if (!draw_irb) {
perf_debug("glCopyPixels() fallback: missing draw buffer\n");
return false;
}
if (!read_irb) {
perf_debug("glCopyPixels() fallback: missing read buffer\n");
return false;
}
if (draw_irb->mt->surf.samples > 1 || read_irb->mt->surf.samples > 1) {
perf_debug("glCopyPixels() fallback: multisampled buffers\n");
return false;
}
if (ctx->_ImageTransferState) {
perf_debug("glCopyPixels(): Unsupported image transfer state\n");
return false;
}
if (ctx->Depth.Test) {
perf_debug("glCopyPixels(): Unsupported depth test state\n");
return false;
}
if (brw->stencil_enabled) {
perf_debug("glCopyPixels(): Unsupported stencil test state\n");
return false;
}
if (ctx->Fog.Enabled ||
ctx->Texture._MaxEnabledTexImageUnit != -1 ||
_mesa_arb_fragment_program_enabled(ctx)) {
perf_debug("glCopyPixels(): Unsupported fragment shader state\n");
return false;
}
if (ctx->Color.AlphaEnabled ||
ctx->Color.BlendEnabled) {
perf_debug("glCopyPixels(): Unsupported blend state\n");
return false;
}
if (GET_COLORMASK(ctx->Color.ColorMask, 0) != 0xf) {
perf_debug("glCopyPixels(): Unsupported color mask state\n");
return false;
}
if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) {
perf_debug("glCopyPixels(): Unsupported pixel zoom\n");
return false;
}
brw_batch_flush(brw);
/* Clip to destination buffer. */
orig_dstx = dstx;
orig_dsty = dsty;
if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
fb->_Xmax, fb->_Ymax,
&dstx, &dsty, &width, &height))
goto out;
/* Adjust src coords for our post-clipped destination origin */
srcx += dstx - orig_dstx;
srcy += dsty - orig_dsty;
/* Clip to source buffer. */
orig_srcx = srcx;
orig_srcy = srcy;
if (!_mesa_clip_to_region(0, 0,
read_fb->Width, read_fb->Height,
&srcx, &srcy, &width, &height))
goto out;
/* Adjust dst coords for our post-clipped source origin */
dstx += srcx - orig_srcx;
dsty += srcy - orig_srcy;
if (!brw_miptree_blit(brw,
read_irb->mt, read_irb->mt_level, read_irb->mt_layer,
srcx, srcy, read_fb->FlipY,
draw_irb->mt, draw_irb->mt_level, draw_irb->mt_layer,
dstx, dsty, fb->FlipY,
width, height,
(ctx->Color.ColorLogicOpEnabled ?
ctx->Color._LogicOp : COLOR_LOGICOP_COPY))) {
DBG("%s: blit failure\n", __func__);
return false;
}
if (ctx->Query.CurrentOcclusionObject)
ctx->Query.CurrentOcclusionObject->Result += width * height;
out:
DBG("%s: success\n", __func__);
return true;
}
void
brw_copypixels(struct gl_context *ctx,
GLint srcx, GLint srcy,
GLsizei width, GLsizei height,
GLint destx, GLint desty, GLenum type)
{
struct brw_context *brw = brw_context(ctx);
DBG("%s\n", __func__);
if (!_mesa_check_conditional_render(ctx))
return;
if (brw->screen->devinfo.ver < 6 &&
do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
return;
/* this will use swrast if needed */
_mesa_meta_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
}

View file

@ -1,178 +0,0 @@
/*
* Copyright 2006 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portionsalloc
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "main/enums.h"
#include "main/image.h"
#include "main/glformats.h"
#include "main/mtypes.h"
#include "main/condrender.h"
#include "main/fbobject.h"
#include "main/teximage.h"
#include "main/texobj.h"
#include "main/texstate.h"
#include "main/bufferobj.h"
#include "swrast/swrast.h"
#include "drivers/common/meta.h"
#include "brw_context.h"
#include "brw_screen.h"
#include "brw_blit.h"
#include "brw_buffers.h"
#include "brw_fbo.h"
#include "brw_mipmap_tree.h"
#include "brw_pixel.h"
#include "brw_buffer_objects.h"
#define FILE_DEBUG_FLAG DEBUG_PIXEL
static bool
do_blit_drawpixels(struct gl_context * ctx,
GLint x, GLint y, GLsizei width, GLsizei height,
GLenum format, GLenum type,
const struct gl_pixelstore_attrib *unpack,
const GLvoid * pixels)
{
struct brw_context *brw = brw_context(ctx);
struct brw_buffer_object *src = brw_buffer_object(unpack->BufferObj);
GLuint src_offset;
struct brw_bo *src_buffer;
DBG("%s\n", __func__);
if (!brw_check_blit_fragment_ops(ctx, false))
return false;
if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
DBG("%s: fallback due to MRT\n", __func__);
return false;
}
brw_prepare_render(brw);
struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
struct brw_renderbuffer *irb = brw_renderbuffer(rb);
mesa_format src_format = _mesa_format_from_format_and_type(format, type);
if (_mesa_format_is_mesa_array_format(src_format))
src_format = _mesa_format_from_array_format(src_format);
mesa_format dst_format = irb->mt->format;
/* We can safely discard sRGB encode/decode for the DrawPixels interface */
src_format = _mesa_get_srgb_format_linear(src_format);
dst_format = _mesa_get_srgb_format_linear(dst_format);
if (!brw_miptree_blit_compatible_formats(src_format, dst_format)) {
DBG("%s: bad format for blit\n", __func__);
return false;
}
if (unpack->SwapBytes || unpack->LsbFirst ||
unpack->SkipPixels || unpack->SkipRows) {
DBG("%s: bad packing params\n", __func__);
return false;
}
int src_stride = _mesa_image_row_stride(unpack, width, format, type);
bool src_flip = false;
/* Mesa flips the src_stride for unpack->Invert, but we want our mt to have
* a normal src_stride.
*/
if (unpack->Invert) {
src_stride = -src_stride;
src_flip = true;
}
src_offset = (GLintptr)pixels;
src_offset += _mesa_image_offset(2, unpack, width, height,
format, type, 0, 0, 0);
src_buffer = brw_bufferobj_buffer(brw, src, src_offset,
height * src_stride, false);
struct brw_mipmap_tree *pbo_mt =
brw_miptree_create_for_bo(brw,
src_buffer,
irb->mt->format,
src_offset,
width, height, 1,
src_stride,
ISL_TILING_LINEAR,
MIPTREE_CREATE_DEFAULT);
if (!pbo_mt)
return false;
if (!brw_miptree_blit(brw,
pbo_mt, 0, 0,
0, 0, src_flip,
irb->mt, irb->mt_level, irb->mt_layer,
x, y, ctx->DrawBuffer->FlipY,
width, height, COLOR_LOGICOP_COPY)) {
DBG("%s: blit failed\n", __func__);
brw_miptree_release(&pbo_mt);
return false;
}
brw_miptree_release(&pbo_mt);
if (ctx->Query.CurrentOcclusionObject)
ctx->Query.CurrentOcclusionObject->Result += width * height;
DBG("%s: success\n", __func__);
return true;
}
void
brw_drawpixels(struct gl_context *ctx,
GLint x, GLint y,
GLsizei width, GLsizei height,
GLenum format,
GLenum type,
const struct gl_pixelstore_attrib *unpack,
const GLvoid *pixels)
{
struct brw_context *brw = brw_context(ctx);
if (!_mesa_check_conditional_render(ctx))
return;
if (format == GL_STENCIL_INDEX) {
_swrast_DrawPixels(ctx, x, y, width, height, format, type,
unpack, pixels);
return;
}
if (brw->screen->devinfo.ver < 6 &&
unpack->BufferObj) {
if (do_blit_drawpixels(ctx, x, y, width, height, format, type, unpack,
pixels)) {
return;
}
perf_debug("%s: fallback to generic code in PBO case\n", __func__);
}
_mesa_meta_DrawPixels(ctx, x, y, width, height, format, type,
unpack, pixels);
}

View file

@ -1,300 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "main/enums.h"
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/fbobject.h"
#include "main/image.h"
#include "main/bufferobj.h"
#include "main/readpix.h"
#include "main/state.h"
#include "main/glformats.h"
#include "program/prog_instruction.h"
#include "drivers/common/meta.h"
#include "brw_context.h"
#include "brw_blorp.h"
#include "brw_screen.h"
#include "brw_batch.h"
#include "brw_buffers.h"
#include "brw_fbo.h"
#include "brw_mipmap_tree.h"
#include "brw_pixel.h"
#include "brw_buffer_objects.h"
#define FILE_DEBUG_FLAG DEBUG_PIXEL
/**
* \brief A fast path for glReadPixels
*
* This fast path is taken when the source format is BGRA, RGBA,
* A or L and when the texture memory is X- or Y-tiled. It downloads
* the source data by directly mapping the memory without a GTT fence.
* This then needs to be de-tiled on the CPU before presenting the data to
* the user in the linear fasion.
*
* This is a performance win over the conventional texture download path.
* In the conventional texture download path, the texture is either mapped
* through the GTT or copied to a linear buffer with the blitter before
* handing off to a software path. This allows us to avoid round-tripping
* through the GPU (in the case where we would be blitting) and do only a
* single copy operation.
*/
static bool
brw_readpixels_tiled_memcpy(struct gl_context *ctx,
GLint xoffset, GLint yoffset,
GLsizei width, GLsizei height,
GLenum format, GLenum type,
GLvoid * pixels,
const struct gl_pixelstore_attrib *pack)
{
struct brw_context *brw = brw_context(ctx);
struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
const struct intel_device_info *devinfo = &brw->screen->devinfo;
/* This path supports reading from color buffers only */
if (rb == NULL)
return false;
struct brw_renderbuffer *irb = brw_renderbuffer(rb);
int dst_pitch;
/* The miptree's buffer. */
struct brw_bo *bo;
uint32_t cpp;
isl_memcpy_type copy_type;
/* This fastpath is restricted to specific renderbuffer types:
* a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
* more types.
*/
if (!devinfo->has_llc ||
!(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
pixels == NULL ||
pack->BufferObj ||
pack->Alignment > 4 ||
pack->SkipPixels > 0 ||
pack->SkipRows > 0 ||
(pack->RowLength != 0 && pack->RowLength != width) ||
pack->SwapBytes ||
pack->LsbFirst ||
pack->Invert)
return false;
/* Only a simple blit, no scale, bias or other mapping. */
if (ctx->_ImageTransferState)
return false;
/* It is possible that the renderbuffer (or underlying texture) is
* multisampled. Since ReadPixels from a multisampled buffer requires a
* multisample resolve, we can't handle this here
*/
if (rb->NumSamples > 1)
return false;
/* We can't handle copying from RGBX or BGRX because the tiled_memcpy
* function doesn't set the last channel to 1. Note this checks BaseFormat
* rather than TexFormat in case the RGBX format is being simulated with an
* RGBA format.
*/
if (rb->_BaseFormat == GL_RGB)
return false;
copy_type = brw_miptree_get_memcpy_type(rb->Format, format, type, &cpp);
if (copy_type == ISL_MEMCPY_INVALID)
return false;
if (!irb->mt ||
(irb->mt->surf.tiling != ISL_TILING_X &&
irb->mt->surf.tiling != ISL_TILING_Y0)) {
/* The algorithm is written only for X- or Y-tiled memory. */
return false;
}
/* tiled_to_linear() assumes that if the object is swizzled, it is using
* I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y. This is only
* true on gfx5 and above.
*
* The killer on top is that some gfx4 have an L-shaped swizzle mode, where
* parts of the memory aren't swizzled at all. Userspace just can't handle
* that.
*/
if (devinfo->ver < 5 && devinfo->has_bit6_swizzle)
return false;
/* Since we are going to read raw data to the miptree, we need to resolve
* any pending fast color clears before we start.
*/
brw_miptree_access_raw(brw, irb->mt, irb->mt_level, irb->mt_layer, false);
bo = irb->mt->bo;
if (brw_batch_references(&brw->batch, bo)) {
perf_debug("Flushing before mapping a referenced bo.\n");
brw_batch_flush(brw);
}
void *map = brw_bo_map(brw, bo, MAP_READ | MAP_RAW);
if (map == NULL) {
DBG("%s: failed to map bo\n", __func__);
return false;
}
unsigned slice_offset_x, slice_offset_y;
brw_miptree_get_image_offset(irb->mt, irb->mt_level, irb->mt_layer,
&slice_offset_x, &slice_offset_y);
xoffset += slice_offset_x;
yoffset += slice_offset_y;
dst_pitch = _mesa_image_row_stride(pack, width, format, type);
/* For a window-system renderbuffer, the buffer is actually flipped
* vertically, so we need to handle that. Since the detiling function
* can only really work in the forwards direction, we have to be a
* little creative. First, we compute the Y-offset of the first row of
* the renderbuffer (in renderbuffer coordinates). We then match that
* with the last row of the client's data. Finally, we give
* tiled_to_linear a negative pitch so that it walks through the
* client's data backwards as it walks through the renderbufer forwards.
*/
if (ctx->ReadBuffer->FlipY) {
yoffset = rb->Height - yoffset - height;
pixels += (ptrdiff_t) (height - 1) * dst_pitch;
dst_pitch = -dst_pitch;
}
/* We postponed printing this message until having committed to executing
* the function.
*/
DBG("%s: x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
"mesa_format=0x%x tiling=%d "
"pack=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n",
__func__, xoffset, yoffset, width, height,
format, type, rb->Format, irb->mt->surf.tiling,
pack->Alignment, pack->RowLength, pack->SkipPixels,
pack->SkipRows);
isl_memcpy_tiled_to_linear(
xoffset * cpp, (xoffset + width) * cpp,
yoffset, yoffset + height,
pixels,
map + irb->mt->offset,
dst_pitch, irb->mt->surf.row_pitch_B,
devinfo->has_bit6_swizzle,
irb->mt->surf.tiling,
copy_type
);
brw_bo_unmap(bo);
return true;
}
static bool
brw_readpixels_blorp(struct gl_context *ctx,
unsigned x, unsigned y,
unsigned w, unsigned h,
GLenum format, GLenum type, const void *pixels,
const struct gl_pixelstore_attrib *packing)
{
struct brw_context *brw = brw_context(ctx);
struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
if (!rb)
return false;
struct brw_renderbuffer *irb = brw_renderbuffer(rb);
/* _mesa_get_readpixels_transfer_ops() includes the cases of read
* color clamping along with the ctx->_ImageTransferState.
*/
if (_mesa_get_readpixels_transfer_ops(ctx, rb->Format, format,
type, GL_FALSE))
return false;
GLenum dst_base_format = _mesa_unpack_format_to_base_format(format);
if (_mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat,
dst_base_format))
return false;
unsigned swizzle;
if (irb->Base.Base._BaseFormat == GL_RGB) {
swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
} else {
swizzle = SWIZZLE_XYZW;
}
return brw_blorp_download_miptree(brw, irb->mt, rb->Format, swizzle,
irb->mt_level, x, y, irb->mt_layer,
w, h, 1, GL_TEXTURE_2D, format, type,
ctx->ReadBuffer->FlipY, pixels, packing);
}
void
brw_readpixels(struct gl_context *ctx,
GLint x, GLint y, GLsizei width, GLsizei height,
GLenum format, GLenum type,
const struct gl_pixelstore_attrib *pack, GLvoid *pixels)
{
bool ok;
struct brw_context *brw = brw_context(ctx);
bool dirty;
DBG("%s\n", __func__);
/* Reading pixels wont dirty the front buffer, so reset the dirty
* flag after calling brw_prepare_render().
*/
dirty = brw->front_buffer_dirty;
brw_prepare_render(brw);
brw->front_buffer_dirty = dirty;
if (pack->BufferObj) {
if (brw_readpixels_blorp(ctx, x, y, width, height,
format, type, pixels, pack))
return;
perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
}
ok = brw_readpixels_tiled_memcpy(ctx, x, y, width, height,
format, type, pixels, pack);
if(ok)
return;
/* Update Mesa state before calling _mesa_readpixels().
* XXX this may not be needed since ReadPixels no longer uses the
* span code.
*/
if (ctx->NewState)
_mesa_update_state(ctx);
_mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
/* There's an brw_prepare_render() call in intelSpanRenderStart(). */
brw->front_buffer_dirty = dirty;
}

View file

@ -1,462 +0,0 @@
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Jordan Justen <jordan.l.justen@intel.com>
*
*/
#include "main/bufferobj.h"
#include "main/varray.h"
#include "vbo/vbo.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_draw.h"
#include "brw_batch.h"
#define UPDATE_MIN2(a, b) (a) = MIN2((a), (b))
#define UPDATE_MAX2(a, b) (a) = MAX2((a), (b))
/*
* Notes on primitive restart:
* The code below is used when the driver does not fully support primitive
* restart (for example, if it only does restart index of ~0).
*
* We map the index buffer, find the restart indexes, unmap
* the index buffer then draw the sub-primitives delineated by the restarts.
*
* A couple possible optimizations:
* 1. Save the list of sub-primitive (start, count) values in a list attached
* to the index buffer for re-use in subsequent draws. The list would be
* invalidated when the contents of the buffer changed.
* 2. If drawing triangle strips or quad strips, create a new index buffer
* that uses duplicated vertices to render the disjoint strips as one
* long strip. We'd have to be careful to avoid using too much memory
* for this.
*
* Finally, some apps might perform better if they don't use primitive restart
* at all rather than this fallback path. Set MESA_EXTENSION_OVERRIDE to
* "-GL_NV_primitive_restart" to test that.
*/
struct sub_primitive
{
GLuint start;
GLuint count;
GLuint min_index;
GLuint max_index;
};
/**
* Scan the elements array to find restart indexes. Return an array
* of struct sub_primitive to indicate how to draw the sub-primitives
* are delineated by the restart index.
*/
static struct sub_primitive *
find_sub_primitives(const void *elements, unsigned element_size,
unsigned start, unsigned end, unsigned restart_index,
unsigned *num_sub_prims)
{
const unsigned max_prims = end - start;
struct sub_primitive *sub_prims;
unsigned i, cur_start, cur_count;
GLuint scan_index;
unsigned scan_num;
sub_prims =
malloc(max_prims * sizeof(struct sub_primitive));
if (!sub_prims) {
*num_sub_prims = 0;
return NULL;
}
cur_start = start;
cur_count = 0;
scan_num = 0;
#define IB_INDEX_READ(TYPE, INDEX) (((const GL##TYPE *) elements)[INDEX])
#define SCAN_ELEMENTS(TYPE) \
sub_prims[scan_num].min_index = (GL##TYPE) 0xffffffff; \
sub_prims[scan_num].max_index = 0; \
for (i = start; i < end; i++) { \
scan_index = IB_INDEX_READ(TYPE, i); \
if (scan_index == restart_index) { \
if (cur_count > 0) { \
assert(scan_num < max_prims); \
sub_prims[scan_num].start = cur_start; \
sub_prims[scan_num].count = cur_count; \
scan_num++; \
sub_prims[scan_num].min_index = (GL##TYPE) 0xffffffff; \
sub_prims[scan_num].max_index = 0; \
} \
cur_start = i + 1; \
cur_count = 0; \
} \
else { \
UPDATE_MIN2(sub_prims[scan_num].min_index, scan_index); \
UPDATE_MAX2(sub_prims[scan_num].max_index, scan_index); \
cur_count++; \
} \
} \
if (cur_count > 0) { \
assert(scan_num < max_prims); \
sub_prims[scan_num].start = cur_start; \
sub_prims[scan_num].count = cur_count; \
scan_num++; \
}
switch (element_size) {
case 1:
SCAN_ELEMENTS(ubyte);
break;
case 2:
SCAN_ELEMENTS(ushort);
break;
case 4:
SCAN_ELEMENTS(uint);
break;
default:
assert(0 && "bad index_size in find_sub_primitives()");
}
#undef SCAN_ELEMENTS
*num_sub_prims = scan_num;
return sub_prims;
}
/**
* Handle primitive restart in software.
*
* This function breaks up calls into the driver so primitive restart
* support is not required in the driver.
*/
static void
vbo_sw_primitive_restart_common_start(struct gl_context *ctx,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLuint num_instances,
GLuint base_instance,
struct gl_buffer_object *indirect,
GLsizeiptr indirect_offset,
bool primitive_restart,
unsigned restart_index)
{
GLuint prim_num;
struct _mesa_prim new_prim;
struct _mesa_index_buffer new_ib;
struct sub_primitive *sub_prims;
struct sub_primitive *sub_prim;
GLuint num_sub_prims;
GLuint sub_prim_num;
GLuint end_index;
GLuint sub_end_index;
struct _mesa_prim temp_prim;
GLboolean map_ib = ib->obj && !ib->obj->Mappings[MAP_INTERNAL].Pointer;
const void *ptr;
/* If there is an indirect buffer, map it and extract the draw params */
if (indirect) {
const uint32_t *indirect_params;
if (!ctx->Driver.MapBufferRange(ctx, 0, indirect->Size, GL_MAP_READ_BIT,
indirect, MAP_INTERNAL)) {
/* something went wrong with mapping, give up */
_mesa_error(ctx, GL_OUT_OF_MEMORY,
"failed to map indirect buffer for sw primitive restart");
return;
}
assert(nr_prims == 1);
new_prim = prims[0];
indirect_params = (const uint32_t *)
ADD_POINTERS(indirect->Mappings[MAP_INTERNAL].Pointer,
indirect_offset);
new_prim.count = indirect_params[0];
new_prim.start = indirect_params[2];
new_prim.basevertex = indirect_params[3];
num_instances = indirect_params[1];
base_instance = indirect_params[4];
new_ib = *ib;
new_ib.count = new_prim.count;
prims = &new_prim;
ib = &new_ib;
ctx->Driver.UnmapBuffer(ctx, indirect, MAP_INTERNAL);
}
/* Find the sub-primitives. These are regions in the index buffer which
* are split based on the primitive restart index value.
*/
if (map_ib) {
ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
ib->obj, MAP_INTERNAL);
}
if (ib->obj)
ptr = ADD_POINTERS(ib->obj->Mappings[MAP_INTERNAL].Pointer, ib->ptr);
else
ptr = ib->ptr;
sub_prims = find_sub_primitives(ptr, 1 << ib->index_size_shift,
prims[0].start, prims[0].start + ib->count,
restart_index, &num_sub_prims);
if (map_ib) {
ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
}
/* Loop over the primitives, and use the located sub-primitives to draw
* each primitive with a break to implement each primitive restart.
*/
for (prim_num = 0; prim_num < nr_prims; prim_num++) {
end_index = prims[prim_num].start + prims[prim_num].count;
memcpy(&temp_prim, &prims[prim_num], sizeof (temp_prim));
/* Loop over the sub-primitives drawing sub-ranges of the primitive. */
for (sub_prim_num = 0; sub_prim_num < num_sub_prims; sub_prim_num++) {
sub_prim = &sub_prims[sub_prim_num];
sub_end_index = sub_prim->start + sub_prim->count;
if (prims[prim_num].start <= sub_prim->start) {
temp_prim.start = MAX2(prims[prim_num].start, sub_prim->start);
temp_prim.count = MIN2(sub_end_index, end_index) - temp_prim.start;
if ((temp_prim.start == sub_prim->start) &&
(temp_prim.count == sub_prim->count)) {
ctx->Driver.Draw(ctx, &temp_prim, 1, ib, true, false, 0,
sub_prim->min_index, sub_prim->max_index,
num_instances, base_instance);
} else {
ctx->Driver.Draw(ctx, &temp_prim, 1, ib,
false, false, 0, -1, -1,
num_instances, base_instance);
}
}
if (sub_end_index >= end_index) {
break;
}
}
}
free(sub_prims);
}
static void
vbo_sw_primitive_restart(struct gl_context *ctx,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLuint num_instances,
GLuint base_instance,
struct gl_buffer_object *indirect,
GLsizeiptr indirect_offset,
bool primitive_restart,
unsigned restart_index)
{
unsigned i;
for (i = 1; i < nr_prims; i++) {
if (prims[i].start != prims[0].start)
break;
}
vbo_sw_primitive_restart_common_start(ctx, &prims[0], i, ib,
num_instances, base_instance,
indirect, indirect_offset,
primitive_restart,
restart_index);
if (i != nr_prims) {
vbo_sw_primitive_restart(ctx, &prims[i], nr_prims - i, ib,
num_instances, base_instance,
indirect, indirect_offset,
primitive_restart,
restart_index);
}
}
/**
* Check if the hardware's cut index support can handle the primitive
* restart index value (pre-Haswell only).
*/
static bool
can_cut_index_handle_restart_index(struct gl_context *ctx,
const struct _mesa_index_buffer *ib,
unsigned restart_index)
{
/* The FixedIndex variant means 0xFF, 0xFFFF, or 0xFFFFFFFF based on
* the index buffer type, which corresponds exactly to the hardware.
*/
if (ctx->Array.PrimitiveRestartFixedIndex)
return true;
bool cut_index_will_work;
switch (ib->index_size_shift) {
case 0:
cut_index_will_work = restart_index == 0xff;
break;
case 1:
cut_index_will_work = restart_index == 0xffff;
break;
case 2:
cut_index_will_work = restart_index == 0xffffffff;
break;
default:
unreachable("not reached");
}
return cut_index_will_work;
}
/**
* Check if the hardware's cut index support can handle the primitive
* restart case.
*/
static bool
can_cut_index_handle_prims(struct gl_context *ctx,
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
unsigned restart_index)
{
struct brw_context *brw = brw_context(ctx);
const struct intel_device_info *devinfo = &brw->screen->devinfo;
/* Otherwise Haswell can do it all. */
if (devinfo->verx10 >= 75)
return true;
if (!can_cut_index_handle_restart_index(ctx, ib, restart_index)) {
/* The primitive restart index can't be handled, so take
* the software path
*/
return false;
}
for (unsigned i = 0; i < nr_prims; i++) {
switch (prim[i].mode) {
case GL_POINTS:
case GL_LINES:
case GL_LINE_STRIP:
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
/* Cut index supports these primitive types */
break;
default:
/* Cut index does not support these primitive types */
//case GL_LINE_LOOP:
//case GL_TRIANGLE_FAN:
//case GL_QUADS:
//case GL_QUAD_STRIP:
//case GL_POLYGON:
return false;
}
}
return true;
}
/**
* Check if primitive restart is enabled, and if so, handle it properly.
*
* In some cases the support will be handled in software. When available
* hardware will handle primitive restart.
*/
GLboolean
brw_handle_primitive_restart(struct gl_context *ctx,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLuint num_instances, GLuint base_instance,
bool primitive_restart,
unsigned restart_index)
{
struct brw_context *brw = brw_context(ctx);
/* We only need to handle cases where there is an index buffer. */
if (ib == NULL) {
return GL_FALSE;
}
/* If we have set the in_progress flag, then we are in the middle
* of handling the primitive restart draw.
*/
if (brw->prim_restart.in_progress) {
return GL_FALSE;
}
/* If PrimitiveRestart is not enabled, then we aren't concerned about
* handling this draw.
*/
if (!primitive_restart) {
return GL_FALSE;
}
/* Signal that we are in the process of handling the
* primitive restart draw
*/
brw->prim_restart.in_progress = true;
if (can_cut_index_handle_prims(ctx, prims, nr_prims, ib, restart_index)) {
/* Cut index should work for primitive restart, so use it
*/
brw->prim_restart.enable_cut_index = true;
brw->prim_restart.restart_index = restart_index;
brw_draw_prims(ctx, prims, nr_prims, ib, false, primitive_restart,
restart_index, -1, -1,
num_instances, base_instance);
brw->prim_restart.enable_cut_index = false;
} else {
/* Not all the primitive draw modes are supported by the cut index,
* so take the software path
*/
struct gl_buffer_object *indirect_data = brw->draw.draw_indirect_data;
/* Clear this to make the draw direct. */
brw->draw.draw_indirect_data = NULL;
vbo_sw_primitive_restart(ctx, prims, nr_prims, ib, num_instances,
base_instance, indirect_data,
brw->draw.draw_indirect_offset,
primitive_restart, restart_index);
}
brw->prim_restart.in_progress = false;
/* The primitive restart draw was completed, so return true. */
return GL_TRUE;
}

View file

@ -1,888 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#include <pthread.h>
#include "main/glspirv.h"
#include "program/prog_parameter.h"
#include "program/prog_print.h"
#include "program/prog_to_nir.h"
#include "program/program.h"
#include "program/programopt.h"
#include "tnl/tnl.h"
#include "util/ralloc.h"
#include "compiler/glsl/ir.h"
#include "compiler/glsl/program.h"
#include "compiler/glsl/gl_nir.h"
#include "compiler/glsl/glsl_to_nir.h"
#include "brw_program.h"
#include "brw_context.h"
#include "compiler/brw_nir.h"
#include "brw_defines.h"
#include "brw_batch.h"
#include "brw_cs.h"
#include "brw_gs.h"
#include "brw_vs.h"
#include "brw_wm.h"
#include "brw_state.h"
#include "main/shaderapi.h"
#include "main/shaderobj.h"
static bool
brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
{
if (is_scalar) {
nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
type_size_scalar_bytes);
return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
} else {
nir_assign_var_locations(nir, nir_var_uniform, &nir->num_uniforms,
type_size_vec4_bytes);
return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
}
}
static struct gl_program *brw_new_program(struct gl_context *ctx,
gl_shader_stage stage,
GLuint id, bool is_arb_asm);
nir_shader *
brw_create_nir(struct brw_context *brw,
const struct gl_shader_program *shader_prog,
struct gl_program *prog,
gl_shader_stage stage,
bool is_scalar)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
const nir_shader_compiler_options *options =
ctx->Const.ShaderCompilerOptions[stage].NirOptions;
nir_shader *nir;
/* First, lower the GLSL/Mesa IR or SPIR-V to NIR */
if (shader_prog) {
if (shader_prog->data->spirv) {
nir = _mesa_spirv_to_nir(ctx, shader_prog, stage, options);
} else {
nir = glsl_to_nir(ctx, shader_prog, stage, options);
/* Remap the locations to slots so those requiring two slots will
* occupy two locations. For instance, if we have in the IR code a
* dvec3 attr0 in location 0 and vec4 attr1 in location 1, in NIR attr0
* will use locations/slots 0 and 1, and attr1 will use location/slot 2
*/
if (nir->info.stage == MESA_SHADER_VERTEX)
nir_remap_dual_slot_attributes(nir, &prog->DualSlotInputs);
}
assert (nir);
nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out,
NULL);
nir_validate_shader(nir, "after glsl_to_nir or spirv_to_nir");
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
nir_shader_get_entrypoint(nir), true, false);
} else {
nir = prog_to_nir(prog, options);
NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
}
nir_validate_shader(nir, "before brw_preprocess_nir");
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
if (!ctx->SoftFP64 && ((nir->info.bit_sizes_int | nir->info.bit_sizes_float) & 64) &&
(options->lower_doubles_options & nir_lower_fp64_full_software)) {
ctx->SoftFP64 = glsl_float64_funcs_to_nir(ctx, options);
}
brw_preprocess_nir(brw->screen->compiler, nir, ctx->SoftFP64);
if (stage == MESA_SHADER_TESS_CTRL) {
/* Lower gl_PatchVerticesIn from a sys. value to a uniform on Gfx8+. */
static const gl_state_index16 tokens[STATE_LENGTH] =
{ STATE_TCS_PATCH_VERTICES_IN };
nir_lower_patch_vertices(nir, 0, devinfo->ver >= 8 ? tokens : NULL);
}
if (stage == MESA_SHADER_TESS_EVAL) {
/* Lower gl_PatchVerticesIn to a constant if we have a TCS, or
* a uniform if we don't.
*/
struct gl_linked_shader *tcs =
shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
uint32_t static_patch_vertices =
tcs ? tcs->Program->nir->info.tess.tcs_vertices_out : 0;
static const gl_state_index16 tokens[STATE_LENGTH] =
{ STATE_TES_PATCH_VERTICES_IN };
nir_lower_patch_vertices(nir, static_patch_vertices, tokens);
}
if (stage == MESA_SHADER_FRAGMENT) {
static const struct nir_lower_wpos_ytransform_options wpos_options = {
.state_tokens = {STATE_FB_WPOS_Y_TRANSFORM, 0, 0},
.fs_coord_pixel_center_integer = 1,
.fs_coord_origin_upper_left = 1,
};
bool progress = false;
NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
if (progress) {
_mesa_add_state_reference(prog->Parameters,
wpos_options.state_tokens);
}
}
return nir;
}
static void
shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
{
assert(glsl_type_is_vector_or_scalar(type));
uint32_t comp_size = glsl_type_is_boolean(type)
? 4 : glsl_get_bit_size(type) / 8;
unsigned length = glsl_get_vector_elements(type);
*size = comp_size * length,
*align = comp_size * (length == 3 ? 4 : length);
}
void
brw_nir_lower_resources(nir_shader *nir, struct gl_shader_program *shader_prog,
struct gl_program *prog,
const struct intel_device_info *devinfo)
{
NIR_PASS_V(nir, brw_nir_lower_uniforms, nir->options->lower_to_scalar);
NIR_PASS_V(prog->nir, gl_nir_lower_samplers, shader_prog);
BITSET_COPY(prog->info.textures_used, prog->nir->info.textures_used);
BITSET_COPY(prog->info.textures_used_by_txf, prog->nir->info.textures_used_by_txf);
NIR_PASS_V(prog->nir, brw_nir_lower_storage_image, devinfo);
if (prog->nir->info.stage == MESA_SHADER_COMPUTE &&
shader_prog->data->spirv) {
NIR_PASS_V(prog->nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared, shared_type_info);
NIR_PASS_V(prog->nir, nir_lower_explicit_io,
nir_var_mem_shared, nir_address_format_32bit_offset);
}
NIR_PASS_V(prog->nir, gl_nir_lower_buffers, shader_prog);
/* Do a round of constant folding to clean up address calculations */
NIR_PASS_V(prog->nir, nir_opt_constant_folding);
}
void
brw_shader_gather_info(nir_shader *nir, struct gl_program *prog)
{
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
/* Copy the info we just generated back into the gl_program */
const char *prog_name = prog->info.name;
const char *prog_label = prog->info.label;
prog->info = nir->info;
prog->info.name = prog_name;
prog->info.label = prog_label;
}
static unsigned
get_new_program_id(struct brw_screen *screen)
{
return p_atomic_inc_return(&screen->program_id);
}
static struct gl_program *
brw_new_program(struct gl_context *ctx,
gl_shader_stage stage,
GLuint id, bool is_arb_asm)
{
struct brw_context *brw = brw_context(ctx);
struct brw_program *prog = rzalloc(NULL, struct brw_program);
if (prog) {
prog->id = get_new_program_id(brw->screen);
return _mesa_init_gl_program(&prog->program, stage, id, is_arb_asm);
}
return NULL;
}
static void
brw_delete_program(struct gl_context *ctx, struct gl_program *prog)
{
struct brw_context *brw = brw_context(ctx);
/* Beware! prog's refcount has reached zero, and it's about to be freed.
*
* In brw_upload_pipeline_state(), we compare brw->programs[i] to
* ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
* pointer has changed.
*
* We cannot leave brw->programs[i] as a dangling pointer to the dead
* program. malloc() may allocate the same memory for a new gl_program,
* causing us to see matching pointers...but totally different programs.
*
* We cannot set brw->programs[i] to NULL, either. If we've deleted the
* active program, Mesa may set ctx->FooProgram._Current to NULL. That
* would cause us to see matching pointers (NULL == NULL), and fail to
* detect that a program has changed since our last draw.
*
* So, set it to a bogus gl_program pointer that will never match,
* causing us to properly reevaluate the state on our next draw.
*
* Getting this wrong causes heisenbugs which are very hard to catch,
* as you need a very specific allocation pattern to hit the problem.
*/
static const struct gl_program deleted_program;
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
if (brw->programs[i] == prog)
brw->programs[i] = (struct gl_program *) &deleted_program;
}
_mesa_delete_program( ctx, prog );
}
static GLboolean
brw_program_string_notify(struct gl_context *ctx,
GLenum target,
struct gl_program *prog)
{
assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
struct brw_context *brw = brw_context(ctx);
const struct brw_compiler *compiler = brw->screen->compiler;
switch (target) {
case GL_FRAGMENT_PROGRAM_ARB: {
struct brw_program *newFP = brw_program(prog);
const struct brw_program *curFP =
brw_program_const(brw->programs[MESA_SHADER_FRAGMENT]);
if (newFP == curFP)
brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
_mesa_program_fragment_position_to_sysval(&newFP->program);
newFP->id = get_new_program_id(brw->screen);
prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
brw_shader_gather_info(prog->nir, prog);
brw_fs_precompile(ctx, prog);
break;
}
case GL_VERTEX_PROGRAM_ARB: {
struct brw_program *newVP = brw_program(prog);
const struct brw_program *curVP =
brw_program_const(brw->programs[MESA_SHADER_VERTEX]);
if (newVP == curVP)
brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
if (newVP->program.arb.IsPositionInvariant) {
_mesa_insert_mvp_code(ctx, &newVP->program);
}
newVP->id = get_new_program_id(brw->screen);
/* Also tell tnl about it:
*/
_tnl_program_string(ctx, target, prog);
prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
compiler->scalar_stage[MESA_SHADER_VERTEX]);
brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
brw_shader_gather_info(prog->nir, prog);
brw_vs_precompile(ctx, prog);
break;
}
default:
/*
* driver->ProgramStringNotify is only called for ARB programs, fixed
* function vertex programs, and ir_to_mesa (which isn't used by the
* i965 back-end). Therefore, even after geometry shaders are added,
* this function should only ever be called with a target of
* GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
*/
unreachable("Unexpected target in brwProgramStringNotify");
}
return true;
}
static void
brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
{
struct brw_context *brw = brw_context(ctx);
const struct intel_device_info *devinfo = &brw->screen->devinfo;
unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL;
assert(devinfo->ver >= 7 && devinfo->ver <= 11);
if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
GL_ELEMENT_ARRAY_BARRIER_BIT |
GL_COMMAND_BARRIER_BIT))
bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
if (barriers & GL_UNIFORM_BARRIER_BIT)
bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_CONST_CACHE_INVALIDATE);
if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
if (barriers & (GL_TEXTURE_UPDATE_BARRIER_BIT |
GL_PIXEL_BUFFER_BARRIER_BIT))
bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_RENDER_TARGET_FLUSH);
if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_RENDER_TARGET_FLUSH);
/* Typed surface messages are handled by the render cache on IVB, so we
* need to flush it too.
*/
if (devinfo->verx10 == 70)
bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
brw_emit_pipe_control_flush(brw, bits);
}
static void
brw_framebuffer_fetch_barrier(struct gl_context *ctx)
{
struct brw_context *brw = brw_context(ctx);
const struct intel_device_info *devinfo = &brw->screen->devinfo;
if (!ctx->Extensions.EXT_shader_framebuffer_fetch) {
if (devinfo->ver >= 6) {
brw_emit_pipe_control_flush(brw,
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_CS_STALL);
brw_emit_pipe_control_flush(brw,
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
} else {
brw_emit_pipe_control_flush(brw,
PIPE_CONTROL_RENDER_TARGET_FLUSH);
}
}
}
void
brw_get_scratch_bo(struct brw_context *brw,
struct brw_bo **scratch_bo, int size)
{
struct brw_bo *old_bo = *scratch_bo;
if (old_bo && old_bo->size < size) {
brw_bo_unreference(old_bo);
old_bo = NULL;
}
if (!old_bo) {
*scratch_bo =
brw_bo_alloc(brw->bufmgr, "scratch bo", size, BRW_MEMZONE_SCRATCH);
}
}
/**
* Reserve enough scratch space for the given stage to hold \p per_thread_size
* bytes times the given \p thread_count.
*/
void
brw_alloc_stage_scratch(struct brw_context *brw,
struct brw_stage_state *stage_state,
unsigned per_thread_size)
{
if (stage_state->per_thread_scratch >= per_thread_size)
return;
stage_state->per_thread_scratch = per_thread_size;
if (stage_state->scratch_bo)
brw_bo_unreference(stage_state->scratch_bo);
const struct intel_device_info *devinfo = &brw->screen->devinfo;
assert(stage_state->stage < ARRAY_SIZE(devinfo->max_scratch_ids));
unsigned max_ids = devinfo->max_scratch_ids[stage_state->stage];
stage_state->scratch_bo =
brw_bo_alloc(brw->bufmgr, "shader scratch space",
per_thread_size * max_ids, BRW_MEMZONE_SCRATCH);
}
void
brw_init_frag_prog_functions(struct dd_function_table *functions)
{
assert(functions->ProgramStringNotify == _tnl_program_string);
functions->NewProgram = brw_new_program;
functions->DeleteProgram = brw_delete_program;
functions->ProgramStringNotify = brw_program_string_notify;
functions->LinkShader = brw_link_shader;
functions->MemoryBarrier = brw_memory_barrier;
functions->FramebufferFetchBarrier = brw_framebuffer_fetch_barrier;
}
struct shader_times {
uint64_t time;
uint64_t written;
uint64_t reset;
};
void
brw_init_shader_time(struct brw_context *brw)
{
const int max_entries = 2048;
brw->shader_time.bo =
brw_bo_alloc(brw->bufmgr, "shader time",
max_entries * BRW_SHADER_TIME_STRIDE * 3,
BRW_MEMZONE_OTHER);
brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
max_entries);
brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
max_entries);
brw->shader_time.max_entries = max_entries;
}
static int
compare_time(const void *a, const void *b)
{
uint64_t * const *a_val = a;
uint64_t * const *b_val = b;
/* We don't just subtract because we're turning the value to an int. */
if (**a_val < **b_val)
return -1;
else if (**a_val == **b_val)
return 0;
else
return 1;
}
static void
print_shader_time_line(const char *stage, const char *name,
int shader_num, uint64_t time, uint64_t total)
{
fprintf(stderr, "%-6s%-18s", stage, name);
if (shader_num != 0)
fprintf(stderr, "%4d: ", shader_num);
else
fprintf(stderr, " : ");
fprintf(stderr, "%16lld (%7.2f Gcycles) %4.1f%%\n",
(long long)time,
(double)time / 1000000000.0,
(double)time / total * 100.0);
}
static void
brw_report_shader_time(struct brw_context *brw)
{
if (!brw->shader_time.bo || !brw->shader_time.num_entries)
return;
uint64_t scaled[brw->shader_time.num_entries];
uint64_t *sorted[brw->shader_time.num_entries];
uint64_t total_by_type[ST_CS + 1];
memset(total_by_type, 0, sizeof(total_by_type));
double total = 0;
for (int i = 0; i < brw->shader_time.num_entries; i++) {
uint64_t written = 0, reset = 0;
enum shader_time_shader_type type = brw->shader_time.types[i];
sorted[i] = &scaled[i];
switch (type) {
case ST_VS:
case ST_TCS:
case ST_TES:
case ST_GS:
case ST_FS8:
case ST_FS16:
case ST_FS32:
case ST_CS:
written = brw->shader_time.cumulative[i].written;
reset = brw->shader_time.cumulative[i].reset;
break;
default:
/* I sometimes want to print things that aren't the 3 shader times.
* Just print the sum in that case.
*/
written = 1;
reset = 0;
break;
}
uint64_t time = brw->shader_time.cumulative[i].time;
if (written) {
scaled[i] = time / written * (written + reset);
} else {
scaled[i] = time;
}
switch (type) {
case ST_VS:
case ST_TCS:
case ST_TES:
case ST_GS:
case ST_FS8:
case ST_FS16:
case ST_FS32:
case ST_CS:
total_by_type[type] += scaled[i];
break;
default:
break;
}
total += scaled[i];
}
if (total == 0) {
fprintf(stderr, "No shader time collected yet\n");
return;
}
qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
fprintf(stderr, "\n");
fprintf(stderr, "type ID cycles spent %% of total\n");
for (int s = 0; s < brw->shader_time.num_entries; s++) {
const char *stage;
/* Work back from the sorted pointers times to a time to print. */
int i = sorted[s] - scaled;
if (scaled[i] == 0)
continue;
int shader_num = brw->shader_time.ids[i];
const char *shader_name = brw->shader_time.names[i];
switch (brw->shader_time.types[i]) {
case ST_VS:
stage = "vs";
break;
case ST_TCS:
stage = "tcs";
break;
case ST_TES:
stage = "tes";
break;
case ST_GS:
stage = "gs";
break;
case ST_FS8:
stage = "fs8";
break;
case ST_FS16:
stage = "fs16";
break;
case ST_FS32:
stage = "fs32";
break;
case ST_CS:
stage = "cs";
break;
default:
stage = "other";
break;
}
print_shader_time_line(stage, shader_name, shader_num,
scaled[i], total);
}
fprintf(stderr, "\n");
print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
print_shader_time_line("total", "fs32", 0, total_by_type[ST_FS32], total);
print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
}
static void
brw_collect_shader_time(struct brw_context *brw)
{
if (!brw->shader_time.bo)
return;
/* This probably stalls on the last rendering. We could fix that by
* delaying reading the reports, but it doesn't look like it's a big
* overhead compared to the cost of tracking the time in the first place.
*/
void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
for (int i = 0; i < brw->shader_time.num_entries; i++) {
uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
}
/* Zero the BO out to clear it out for our next collection.
*/
memset(bo_map, 0, brw->shader_time.bo->size);
brw_bo_unmap(brw->shader_time.bo);
}
void
brw_collect_and_report_shader_time(struct brw_context *brw)
{
brw_collect_shader_time(brw);
if (brw->shader_time.report_time == 0 ||
get_time() - brw->shader_time.report_time >= 1.0) {
brw_report_shader_time(brw);
brw->shader_time.report_time = get_time();
}
}
/**
* Chooses an index in the shader_time buffer and sets up tracking information
* for our printouts.
*
* Note that this holds on to references to the underlying programs, which may
* change their lifetimes compared to normal operation.
*/
int
brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
enum shader_time_shader_type type, bool is_glsl_sh)
{
int shader_time_index = brw->shader_time.num_entries++;
assert(shader_time_index < brw->shader_time.max_entries);
brw->shader_time.types[shader_time_index] = type;
const char *name;
if (prog->Id == 0) {
name = "ff";
} else if (is_glsl_sh) {
name = prog->info.label ?
ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
} else {
name = "prog";
}
brw->shader_time.names[shader_time_index] = name;
brw->shader_time.ids[shader_time_index] = prog->Id;
return shader_time_index;
}
void
brw_destroy_shader_time(struct brw_context *brw)
{
brw_bo_unreference(brw->shader_time.bo);
brw->shader_time.bo = NULL;
}
void
brw_stage_prog_data_free(const void *p)
{
struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
ralloc_free(prog_data->param);
ralloc_free(prog_data->pull_param);
}
void
brw_dump_arb_asm(const char *stage, struct gl_program *prog)
{
fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
stage, prog->Id, stage);
_mesa_print_program(prog);
}
void
brw_setup_tex_for_precompile(const struct intel_device_info *devinfo,
struct brw_sampler_prog_key_data *tex,
const struct gl_program *prog)
{
const bool has_shader_channel_select = devinfo->verx10 >= 75;
unsigned sampler_count = util_last_bit(prog->SamplersUsed);
for (unsigned i = 0; i < sampler_count; i++) {
if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
/* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
tex->swizzles[i] =
MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
} else {
/* Color sampler: assume no swizzling. */
tex->swizzles[i] = SWIZZLE_XYZW;
}
}
}
/**
* Sets up the starting offsets for the groups of binding table entries
* common to all pipeline stages.
*
* Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
* unused but also make sure that addition of small offsets to them will
* trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
*/
uint32_t
brw_assign_common_binding_table_offsets(const struct intel_device_info *devinfo,
const struct gl_program *prog,
struct brw_stage_prog_data *stage_prog_data,
uint32_t next_binding_table_offset)
{
int num_textures = util_last_bit(prog->SamplersUsed);
stage_prog_data->binding_table.texture_start = next_binding_table_offset;
next_binding_table_offset += num_textures;
if (prog->info.num_ubos) {
assert(prog->info.num_ubos <= BRW_MAX_UBO);
stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
next_binding_table_offset += prog->info.num_ubos;
} else {
stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
}
if (prog->info.num_ssbos || prog->info.num_abos) {
assert(prog->info.num_abos <= BRW_MAX_ABO);
assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
next_binding_table_offset += prog->info.num_abos + prog->info.num_ssbos;
} else {
stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
}
if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
next_binding_table_offset++;
} else {
stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
}
if (prog->info.uses_texture_gather) {
if (devinfo->ver >= 8) {
stage_prog_data->binding_table.gather_texture_start =
stage_prog_data->binding_table.texture_start;
} else {
stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
next_binding_table_offset += num_textures;
}
} else {
stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
}
if (prog->info.num_images) {
stage_prog_data->binding_table.image_start = next_binding_table_offset;
next_binding_table_offset += prog->info.num_images;
} else {
stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
}
/* This may or may not be used depending on how the compile goes. */
stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
next_binding_table_offset++;
/* Plane 0 is just the regular texture section */
stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
next_binding_table_offset += num_textures;
stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
next_binding_table_offset += num_textures;
/* Set the binding table size. Some callers may append new entries
* and increase this accordingly.
*/
stage_prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
assert(next_binding_table_offset <= BRW_MAX_SURFACES);
return next_binding_table_offset;
}
void
brw_populate_default_key(const struct brw_compiler *compiler,
union brw_any_prog_key *prog_key,
struct gl_shader_program *sh_prog,
struct gl_program *prog)
{
switch (prog->info.stage) {
case MESA_SHADER_VERTEX:
brw_vs_populate_default_key(compiler, &prog_key->vs, prog);
break;
case MESA_SHADER_TESS_CTRL:
brw_tcs_populate_default_key(compiler, &prog_key->tcs, sh_prog, prog);
break;
case MESA_SHADER_TESS_EVAL:
brw_tes_populate_default_key(compiler, &prog_key->tes, sh_prog, prog);
break;
case MESA_SHADER_GEOMETRY:
brw_gs_populate_default_key(compiler, &prog_key->gs, prog);
break;
case MESA_SHADER_FRAGMENT:
brw_wm_populate_default_key(compiler, &prog_key->wm, prog);
break;
case MESA_SHADER_COMPUTE:
brw_cs_populate_default_key(compiler, &prog_key->cs, prog);
break;
default:
unreachable("Unsupported stage!");
}
}
void
brw_debug_recompile(struct brw_context *brw,
gl_shader_stage stage,
unsigned api_id,
struct brw_base_prog_key *key)
{
const struct brw_compiler *compiler = brw->screen->compiler;
enum brw_cache_id cache_id = brw_stage_cache_id(stage);
brw_shader_perf_log(compiler, brw, "Recompiling %s shader for program %d\n",
_mesa_shader_stage_to_string(stage), api_id);
const void *old_key =
brw_find_previous_compile(&brw->cache, cache_id, key->program_string_id);
brw_debug_key_recompile(compiler, brw, stage, old_key, key);
}

View file

@ -1,145 +0,0 @@
/*
* Copyright © 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef BRW_PROGRAM_H
#define BRW_PROGRAM_H
#include "compiler/brw_compiler.h"
#include "nir.h"
#ifdef __cplusplus
extern "C" {
#endif
struct brw_context;
struct blob;
struct blob_reader;
enum brw_param_domain {
BRW_PARAM_DOMAIN_BUILTIN = 0,
BRW_PARAM_DOMAIN_PARAMETER,
BRW_PARAM_DOMAIN_UNIFORM,
BRW_PARAM_DOMAIN_IMAGE,
};
#define BRW_PARAM(domain, val) (BRW_PARAM_DOMAIN_##domain << 24 | (val))
#define BRW_PARAM_DOMAIN(param) ((uint32_t)(param) >> 24)
#define BRW_PARAM_VALUE(param) ((uint32_t)(param) & 0x00ffffff)
#define BRW_PARAM_PARAMETER(idx, comp) \
BRW_PARAM(PARAMETER, ((idx) << 2) | (comp))
#define BRW_PARAM_PARAMETER_IDX(param) (BRW_PARAM_VALUE(param) >> 2)
#define BRW_PARAM_PARAMETER_COMP(param) (BRW_PARAM_VALUE(param) & 0x3)
#define BRW_PARAM_UNIFORM(idx) BRW_PARAM(UNIFORM, (idx))
#define BRW_PARAM_UNIFORM_IDX(param) BRW_PARAM_VALUE(param)
#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset))
#define BRW_PARAM_IMAGE_IDX(value) (BRW_PARAM_VALUE(value) >> 8)
#define BRW_PARAM_IMAGE_OFFSET(value) (BRW_PARAM_VALUE(value) & 0xf)
struct nir_shader *brw_create_nir(struct brw_context *brw,
const struct gl_shader_program *shader_prog,
struct gl_program *prog,
gl_shader_stage stage,
bool is_scalar);
void brw_nir_lower_resources(nir_shader *nir,
struct gl_shader_program *shader_prog,
struct gl_program *prog,
const struct intel_device_info *devinfo);
void brw_shader_gather_info(nir_shader *nir, struct gl_program *prog);
void brw_setup_tex_for_precompile(const struct intel_device_info *devinfo,
struct brw_sampler_prog_key_data *tex,
const struct gl_program *prog);
void brw_populate_base_prog_key(struct gl_context *ctx,
const struct brw_program *prog,
struct brw_base_prog_key *key);
void brw_populate_default_base_prog_key(const struct intel_device_info *devinfo,
const struct brw_program *prog,
struct brw_base_prog_key *key);
void brw_debug_recompile(struct brw_context *brw, gl_shader_stage stage,
unsigned api_id, struct brw_base_prog_key *key);
uint32_t
brw_assign_common_binding_table_offsets(const struct intel_device_info *devinfo,
const struct gl_program *prog,
struct brw_stage_prog_data *stage_prog_data,
uint32_t next_binding_table_offset);
void
brw_populate_default_key(const struct brw_compiler *compiler,
union brw_any_prog_key *prog_key,
struct gl_shader_program *sh_prog,
struct gl_program *prog);
void
brw_stage_prog_data_free(const void *prog_data);
void
brw_dump_arb_asm(const char *stage, struct gl_program *prog);
bool brw_vs_precompile(struct gl_context *ctx, struct gl_program *prog);
bool brw_tcs_precompile(struct gl_context *ctx,
struct gl_shader_program *shader_prog,
struct gl_program *prog);
bool brw_tes_precompile(struct gl_context *ctx,
struct gl_shader_program *shader_prog,
struct gl_program *prog);
bool brw_gs_precompile(struct gl_context *ctx, struct gl_program *prog);
bool brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog);
bool brw_cs_precompile(struct gl_context *ctx, struct gl_program *prog);
GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
void brw_upload_tcs_prog(struct brw_context *brw);
void brw_tcs_populate_key(struct brw_context *brw,
struct brw_tcs_prog_key *key);
void brw_tcs_populate_default_key(const struct brw_compiler *compiler,
struct brw_tcs_prog_key *key,
struct gl_shader_program *sh_prog,
struct gl_program *prog);
void brw_upload_tes_prog(struct brw_context *brw);
void brw_tes_populate_key(struct brw_context *brw,
struct brw_tes_prog_key *key);
void brw_tes_populate_default_key(const struct brw_compiler *compiler,
struct brw_tes_prog_key *key,
struct gl_shader_program *sh_prog,
struct gl_program *prog);
void brw_write_blob_program_data(struct blob *binary, gl_shader_stage stage,
const void *program,
struct brw_stage_prog_data *prog_data);
bool brw_read_blob_program_data(struct blob_reader *binary,
struct gl_program *prog, gl_shader_stage stage,
const uint8_t **program,
struct brw_stage_prog_data *prog_data);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

View file

@ -1,353 +0,0 @@
/*
* Copyright (c) 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <stdint.h>
#include "compiler/nir/nir_serialize.h"
#include "util/build_id.h"
#include "util/mesa-sha1.h"
#include "brw_context.h"
#include "brw_program.h"
#include "brw_state.h"
static uint8_t driver_sha1[20];
void
brw_program_binary_init(unsigned device_id)
{
const struct build_id_note *note =
build_id_find_nhdr_for_addr(brw_program_binary_init);
assert(note);
/**
* With Mesa's megadrivers, taking the sha1 of i965_dri.so may not be
* unique. Therefore, we make a sha1 of the "i965" string and the sha1
* build id from i965_dri.so.
*/
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);
char renderer[10];
assert(device_id < 0x10000);
int len = snprintf(renderer, sizeof(renderer), "i965_%04x", device_id);
assert(len == sizeof(renderer) - 1);
_mesa_sha1_update(&ctx, renderer, len);
_mesa_sha1_update(&ctx, build_id_data(note), build_id_length(note));
_mesa_sha1_final(&ctx, driver_sha1);
}
void
brw_get_program_binary_driver_sha1(struct gl_context *ctx, uint8_t *sha1)
{
memcpy(sha1, driver_sha1, sizeof(uint8_t) * 20);
}
enum driver_cache_blob_part {
END_PART,
INTEL_PART,
NIR_PART,
};
static bool
blob_parts_valid(void *blob, uint32_t size)
{
struct blob_reader reader;
blob_reader_init(&reader, blob, size);
do {
uint32_t part_type = blob_read_uint32(&reader);
if (reader.overrun)
return false;
if (part_type == END_PART)
return reader.current == reader.end;
switch ((enum driver_cache_blob_part)part_type) {
case INTEL_PART:
case NIR_PART:
/* Read the uint32_t part-size and skip over it */
blob_skip_bytes(&reader, blob_read_uint32(&reader));
if (reader.overrun)
return false;
break;
default:
return false;
}
} while (true);
}
static bool
blob_has_part(void *blob, uint32_t size, enum driver_cache_blob_part part)
{
struct blob_reader reader;
blob_reader_init(&reader, blob, size);
assert(blob_parts_valid(blob, size));
do {
uint32_t part_type = blob_read_uint32(&reader);
if (part_type == END_PART)
return false;
if (part_type == part)
return true;
blob_skip_bytes(&reader, blob_read_uint32(&reader));
} while (true);
}
static bool
driver_blob_is_ready(void *blob, uint32_t size, bool with_intel_program)
{
if (!blob) {
return false;
} else if (!blob_parts_valid(blob, size)) {
unreachable("Driver blob format is bad!");
return false;
} else if (blob_has_part(blob, size, INTEL_PART) == with_intel_program) {
return true;
} else {
return false;
}
}
static void
serialize_nir_part(struct blob *writer, struct gl_program *prog)
{
blob_write_uint32(writer, NIR_PART);
intptr_t size_offset = blob_reserve_uint32(writer);
size_t nir_start = writer->size;
nir_serialize(writer, prog->nir, false);
blob_overwrite_uint32(writer, size_offset, writer->size - nir_start);
}
void
brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog)
{
if (driver_blob_is_ready(prog->driver_cache_blob,
prog->driver_cache_blob_size, false))
return;
if (prog->driver_cache_blob)
ralloc_free(prog->driver_cache_blob);
struct blob writer;
blob_init(&writer);
serialize_nir_part(&writer, prog);
blob_write_uint32(&writer, END_PART);
prog->driver_cache_blob = ralloc_size(NULL, writer.size);
memcpy(prog->driver_cache_blob, writer.data, writer.size);
prog->driver_cache_blob_size = writer.size;
blob_finish(&writer);
}
static bool
deserialize_intel_program(struct blob_reader *reader, struct gl_context *ctx,
struct gl_program *prog, gl_shader_stage stage)
{
struct brw_context *brw = brw_context(ctx);
union brw_any_prog_key prog_key;
blob_copy_bytes(reader, &prog_key, brw_prog_key_size(stage));
prog_key.base.program_string_id = brw_program(prog)->id;
enum brw_cache_id cache_id = brw_stage_cache_id(stage);
const uint8_t *program;
struct brw_stage_prog_data *prog_data =
ralloc_size(NULL, sizeof(union brw_any_prog_data));
if (!brw_read_blob_program_data(reader, prog, stage, &program, prog_data)) {
ralloc_free(prog_data);
return false;
}
uint32_t offset;
void *out_prog_data;
brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage),
program, prog_data->program_size, prog_data,
brw_prog_data_size(stage), &offset, &out_prog_data);
ralloc_free(prog_data);
return true;
}
void
brw_program_deserialize_driver_blob(struct gl_context *ctx,
struct gl_program *prog,
gl_shader_stage stage)
{
if (!prog->driver_cache_blob)
return;
struct blob_reader reader;
blob_reader_init(&reader, prog->driver_cache_blob,
prog->driver_cache_blob_size);
do {
uint32_t part_type = blob_read_uint32(&reader);
if ((enum driver_cache_blob_part)part_type == END_PART)
break;
switch ((enum driver_cache_blob_part)part_type) {
case INTEL_PART: {
ASSERTED uint32_t gen_size = blob_read_uint32(&reader);
assert(!reader.overrun &&
(uintptr_t)(reader.end - reader.current) > gen_size);
deserialize_intel_program(&reader, ctx, prog, stage);
break;
}
case NIR_PART: {
ASSERTED uint32_t nir_size = blob_read_uint32(&reader);
assert(!reader.overrun &&
(uintptr_t)(reader.end - reader.current) > nir_size);
const struct nir_shader_compiler_options *options =
ctx->Const.ShaderCompilerOptions[stage].NirOptions;
prog->nir = nir_deserialize(NULL, options, &reader);
break;
}
default:
unreachable("Unsupported blob part type!");
break;
}
} while (true);
ralloc_free(prog->driver_cache_blob);
prog->driver_cache_blob = NULL;
prog->driver_cache_blob_size = 0;
}
/* This is just a wrapper around brw_program_deserialize_nir() as i965
* doesn't need gl_shader_program like other drivers do.
*/
void
brw_deserialize_program_binary(struct gl_context *ctx,
struct gl_shader_program *shProg,
struct gl_program *prog)
{
brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage);
}
static void
serialize_intel_part(struct blob *writer, struct gl_context *ctx,
struct gl_shader_program *sh_prog,
struct gl_program *prog)
{
struct brw_context *brw = brw_context(ctx);
union brw_any_prog_key key;
brw_populate_default_key(brw->screen->compiler, &key, sh_prog, prog);
const gl_shader_stage stage = prog->info.stage;
uint32_t offset = 0;
void *prog_data = NULL;
if (brw_search_cache(&brw->cache, brw_stage_cache_id(stage), &key,
brw_prog_key_size(stage), &offset, &prog_data,
false)) {
const void *program_map = brw->cache.map + offset;
/* TODO: Improve perf for non-LLC. It would be best to save it at
* program generation time when the program is in normal memory
* accessible with cache to the CPU. Another easier change would be to
* use _mesa_streaming_load_memcpy to read from the program mapped
* memory.
*/
blob_write_uint32(writer, INTEL_PART);
intptr_t size_offset = blob_reserve_uint32(writer);
size_t gen_start = writer->size;
blob_write_bytes(writer, &key, brw_prog_key_size(stage));
brw_write_blob_program_data(writer, stage, program_map, prog_data);
blob_overwrite_uint32(writer, size_offset, writer->size - gen_start);
}
}
void
brw_serialize_program_binary(struct gl_context *ctx,
struct gl_shader_program *sh_prog,
struct gl_program *prog)
{
if (driver_blob_is_ready(prog->driver_cache_blob,
prog->driver_cache_blob_size, true))
return;
if (prog->driver_cache_blob) {
if (!prog->nir) {
/* If we loaded from the disk shader cache, then the nir might not
* have been deserialized yet.
*/
brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage);
}
ralloc_free(prog->driver_cache_blob);
}
struct blob writer;
blob_init(&writer);
serialize_nir_part(&writer, prog);
serialize_intel_part(&writer, ctx, sh_prog, prog);
blob_write_uint32(&writer, END_PART);
prog->driver_cache_blob = ralloc_size(NULL, writer.size);
memcpy(prog->driver_cache_blob, writer.data, writer.size);
prog->driver_cache_blob_size = writer.size;
blob_finish(&writer);
}
void
brw_write_blob_program_data(struct blob *binary, gl_shader_stage stage,
const void *program,
struct brw_stage_prog_data *prog_data)
{
/* Write prog_data to blob. */
blob_write_bytes(binary, prog_data, brw_prog_data_size(stage));
/* Write program to blob. */
blob_write_bytes(binary, program, prog_data->program_size);
/* Write push params */
blob_write_bytes(binary, prog_data->param,
sizeof(uint32_t) * prog_data->nr_params);
/* Write pull params */
blob_write_bytes(binary, prog_data->pull_param,
sizeof(uint32_t) * prog_data->nr_pull_params);
}
bool
brw_read_blob_program_data(struct blob_reader *binary, struct gl_program *prog,
gl_shader_stage stage, const uint8_t **program,
struct brw_stage_prog_data *prog_data)
{
/* Read shader prog_data from blob. */
blob_copy_bytes(binary, prog_data, brw_prog_data_size(stage));
if (binary->overrun)
return false;
/* Read shader program from blob. */
*program = blob_read_bytes(binary, prog_data->program_size);
/* Read push params */
prog_data->param = rzalloc_array(NULL, uint32_t, prog_data->nr_params);
blob_copy_bytes(binary, prog_data->param,
sizeof(uint32_t) * prog_data->nr_params);
/* Read pull params */
prog_data->pull_param = rzalloc_array(NULL, uint32_t,
prog_data->nr_pull_params);
blob_copy_bytes(binary, prog_data->pull_param,
sizeof(uint32_t) * prog_data->nr_pull_params);
return !binary->overrun;
}

View file

@ -1,523 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
/** @file brw_program_cache.c
*
* This file implements a simple program cache for 965. The consumers can
* query the hash table of programs using a cache_id and program key, and
* receive the corresponding program buffer object (plus associated auxiliary
* data) in return. Objects in the cache may not have relocations
* (pointers to other BOs) in them.
*
* The inner workings are a simple hash table based on a FNV-1a of the
* key data.
*
* Replacement is not implemented. Instead, when the cache gets too
* big we throw out all of the cache data and let it get regenerated.
*/
#include "main/streaming-load-memcpy.h"
#include "x86/common_x86_asm.h"
#include "brw_batch.h"
#include "brw_state.h"
#include "brw_wm.h"
#include "brw_gs.h"
#include "brw_cs.h"
#include "brw_program.h"
#include "compiler/brw_eu.h"
#include "util/u_memory.h"
#define XXH_INLINE_ALL
#include "util/xxhash.h"
#define FILE_DEBUG_FLAG DEBUG_STATE
struct brw_cache_item {
/**
* Effectively part of the key, cache_id identifies what kind of state
* buffer is involved, and also which dirty flag should set.
*/
enum brw_cache_id cache_id;
/** 32-bit hash of the key data */
GLuint hash;
/** for variable-sized keys */
GLuint key_size;
GLuint prog_data_size;
const struct brw_base_prog_key *key;
uint32_t offset;
uint32_t size;
struct brw_cache_item *next;
};
enum brw_cache_id
brw_stage_cache_id(gl_shader_stage stage)
{
static const enum brw_cache_id stage_ids[] = {
BRW_CACHE_VS_PROG,
BRW_CACHE_TCS_PROG,
BRW_CACHE_TES_PROG,
BRW_CACHE_GS_PROG,
BRW_CACHE_FS_PROG,
BRW_CACHE_CS_PROG,
};
assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_ids));
return stage_ids[stage];
}
static GLuint
hash_key(struct brw_cache_item *item)
{
uint32_t hash = 0;
hash = XXH32(&item->cache_id, sizeof(item->cache_id), hash);
hash = XXH32(item->key, item->key_size, hash);
return hash;
}
static int
brw_cache_item_equals(const struct brw_cache_item *a,
const struct brw_cache_item *b)
{
return a->cache_id == b->cache_id &&
a->hash == b->hash &&
a->key_size == b->key_size &&
(memcmp(a->key, b->key, a->key_size) == 0);
}
static struct brw_cache_item *
search_cache(struct brw_cache *cache, GLuint hash,
struct brw_cache_item *lookup)
{
struct brw_cache_item *c;
#if 0
int bucketcount = 0;
for (c = cache->items[hash % cache->size]; c; c = c->next)
bucketcount++;
fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
cache->size, bucketcount, cache->n_items);
#endif
for (c = cache->items[hash % cache->size]; c; c = c->next) {
if (brw_cache_item_equals(lookup, c))
return c;
}
return NULL;
}
static void
rehash(struct brw_cache *cache)
{
struct brw_cache_item **items;
struct brw_cache_item *c, *next;
GLuint size, i;
size = cache->size * 3;
items = calloc(size, sizeof(*items));
for (i = 0; i < cache->size; i++)
for (c = cache->items[i]; c; c = next) {
next = c->next;
c->next = items[c->hash % size];
items[c->hash % size] = c;
}
free(cache->items);
cache->items = items;
cache->size = size;
}
/**
* Returns the buffer object matching cache_id and key, or NULL.
*/
bool
brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
const void *key, GLuint key_size, uint32_t *inout_offset,
void *inout_prog_data, bool flag_state)
{
struct brw_cache_item *item;
struct brw_cache_item lookup;
GLuint hash;
lookup.cache_id = cache_id;
lookup.key = key;
lookup.key_size = key_size;
hash = hash_key(&lookup);
lookup.hash = hash;
item = search_cache(cache, hash, &lookup);
if (item == NULL)
return false;
void *prog_data = ((char *) item->key) + item->key_size;
if (item->offset != *inout_offset ||
prog_data != *((void **) inout_prog_data)) {
if (likely(flag_state))
cache->brw->ctx.NewDriverState |= (1 << cache_id);
*inout_offset = item->offset;
*((void **) inout_prog_data) = prog_data;
}
return true;
}
static void
brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
{
struct brw_context *brw = cache->brw;
struct brw_bo *new_bo;
perf_debug("Copying to larger program cache: %u kB -> %u kB\n",
(unsigned) cache->bo->size / 1024, new_size / 1024);
new_bo = brw_bo_alloc(brw->bufmgr, "program cache", new_size,
BRW_MEMZONE_SHADER);
if (can_do_exec_capture(brw->screen))
new_bo->kflags |= EXEC_OBJECT_CAPTURE;
void *map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE |
MAP_ASYNC | MAP_PERSISTENT);
/* Copy any existing data that needs to be saved. */
if (cache->next_offset != 0) {
#ifdef USE_SSE41
if (!cache->bo->cache_coherent && cpu_has_sse4_1)
_mesa_streaming_load_memcpy(map, cache->map, cache->next_offset);
else
#endif
memcpy(map, cache->map, cache->next_offset);
}
brw_bo_unmap(cache->bo);
brw_bo_unreference(cache->bo);
cache->bo = new_bo;
cache->map = map;
/* Since we have a new BO in place, we need to signal the units
* that depend on it (state base address on gfx5+, or unit state before).
*/
brw->ctx.NewDriverState |= BRW_NEW_PROGRAM_CACHE;
brw->batch.state_base_address_emitted = false;
}
/**
* Attempts to find an item in the cache with identical data.
*/
static const struct brw_cache_item *
brw_lookup_prog(const struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *data, unsigned data_size)
{
unsigned i;
const struct brw_cache_item *item;
for (i = 0; i < cache->size; i++) {
for (item = cache->items[i]; item; item = item->next) {
if (item->cache_id != cache_id || item->size != data_size ||
memcmp(cache->map + item->offset, data, item->size) != 0)
continue;
return item;
}
}
return NULL;
}
static uint32_t
brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
{
uint32_t offset;
/* Allocate space in the cache BO for our new program. */
if (cache->next_offset + size > cache->bo->size) {
uint32_t new_size = cache->bo->size * 2;
while (cache->next_offset + size > new_size)
new_size *= 2;
brw_cache_new_bo(cache, new_size);
}
offset = cache->next_offset;
/* Programs are always 64-byte aligned, so set up the next one now */
cache->next_offset = ALIGN(offset + size, 64);
return offset;
}
const void *
brw_find_previous_compile(struct brw_cache *cache,
enum brw_cache_id cache_id,
unsigned program_string_id)
{
for (unsigned i = 0; i < cache->size; i++) {
for (struct brw_cache_item *c = cache->items[i]; c; c = c->next) {
if (c->cache_id == cache_id &&
c->key->program_string_id == program_string_id) {
return c->key;
}
}
}
return NULL;
}
void
brw_upload_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
GLuint key_size,
const void *data,
GLuint data_size,
const void *prog_data,
GLuint prog_data_size,
uint32_t *out_offset,
void *out_prog_data)
{
struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
const struct brw_cache_item *matching_data =
brw_lookup_prog(cache, cache_id, data, data_size);
GLuint hash;
void *tmp;
item->cache_id = cache_id;
item->size = data_size;
item->key = key;
item->key_size = key_size;
item->prog_data_size = prog_data_size;
hash = hash_key(item);
item->hash = hash;
/* If we can find a matching prog in the cache already, then reuse the
* existing stuff without creating new copy into the underlying buffer
* object. This is notably useful for programs generating shaders at
* runtime, where multiple shaders may compile to the same thing in our
* backend.
*/
if (matching_data) {
item->offset = matching_data->offset;
} else {
item->offset = brw_alloc_item_data(cache, data_size);
/* Copy data to the buffer */
memcpy(cache->map + item->offset, data, data_size);
}
/* Set up the memory containing the key and prog_data */
tmp = malloc(key_size + prog_data_size);
memcpy(tmp, key, key_size);
memcpy(tmp + key_size, prog_data, prog_data_size);
item->key = tmp;
if (cache->n_items > cache->size * 1.5f)
rehash(cache);
hash %= cache->size;
item->next = cache->items[hash];
cache->items[hash] = item;
cache->n_items++;
*out_offset = item->offset;
*(void **)out_prog_data = (void *)((char *)item->key + item->key_size);
cache->brw->ctx.NewDriverState |= 1 << cache_id;
}
void
brw_init_caches(struct brw_context *brw)
{
struct brw_cache *cache = &brw->cache;
cache->brw = brw;
cache->size = 7;
cache->n_items = 0;
cache->items =
calloc(cache->size, sizeof(struct brw_cache_item *));
cache->bo = brw_bo_alloc(brw->bufmgr, "program cache", 16384,
BRW_MEMZONE_SHADER);
if (can_do_exec_capture(brw->screen))
cache->bo->kflags |= EXEC_OBJECT_CAPTURE;
cache->map = brw_bo_map(brw, cache->bo, MAP_READ | MAP_WRITE |
MAP_ASYNC | MAP_PERSISTENT);
}
static void
brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
{
struct brw_cache_item *c, *next;
GLuint i;
DBG("%s\n", __func__);
for (i = 0; i < cache->size; i++) {
for (c = cache->items[i]; c; c = next) {
next = c->next;
if (c->cache_id == BRW_CACHE_VS_PROG ||
c->cache_id == BRW_CACHE_TCS_PROG ||
c->cache_id == BRW_CACHE_TES_PROG ||
c->cache_id == BRW_CACHE_GS_PROG ||
c->cache_id == BRW_CACHE_FS_PROG ||
c->cache_id == BRW_CACHE_CS_PROG) {
const void *item_prog_data = ((char *)c->key) + c->key_size;
brw_stage_prog_data_free(item_prog_data);
}
free((void *)c->key);
free(c);
}
cache->items[i] = NULL;
}
cache->n_items = 0;
/* Start putting programs into the start of the BO again, since
* we'll never find the old results.
*/
cache->next_offset = 0;
/* We need to make sure that the programs get regenerated, since
* any offsets leftover in brw_context will no longer be valid.
*/
brw->NewGLState = ~0;
brw->ctx.NewDriverState = ~0ull;
brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0;
brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull;
brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0;
brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull;
/* Also, NULL out any stale program pointers. */
brw->vs.base.prog_data = NULL;
brw->tcs.base.prog_data = NULL;
brw->tes.base.prog_data = NULL;
brw->gs.base.prog_data = NULL;
brw->wm.base.prog_data = NULL;
brw->cs.base.prog_data = NULL;
brw_batch_flush(brw);
}
void
brw_program_cache_check_size(struct brw_context *brw)
{
/* un-tuned guess. Each object is generally a page, so 2000 of them is 8 MB of
* state cache.
*/
if (brw->cache.n_items > 2000) {
perf_debug("Exceeded state cache size limit. Clearing the set "
"of compiled programs, which will trigger recompiles\n");
brw_clear_cache(brw, &brw->cache);
brw_cache_new_bo(&brw->cache, brw->cache.bo->size);
}
}
static void
brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
{
DBG("%s\n", __func__);
/* This can be NULL if context creation failed early on */
if (cache->bo) {
brw_bo_unmap(cache->bo);
brw_bo_unreference(cache->bo);
cache->bo = NULL;
cache->map = NULL;
}
brw_clear_cache(brw, cache);
free(cache->items);
cache->items = NULL;
cache->size = 0;
}
void
brw_destroy_caches(struct brw_context *brw)
{
brw_destroy_cache(brw, &brw->cache);
}
static const char *
cache_name(enum brw_cache_id cache_id)
{
switch (cache_id) {
case BRW_CACHE_VS_PROG:
return "VS kernel";
case BRW_CACHE_TCS_PROG:
return "TCS kernel";
case BRW_CACHE_TES_PROG:
return "TES kernel";
case BRW_CACHE_FF_GS_PROG:
return "Fixed-function GS kernel";
case BRW_CACHE_GS_PROG:
return "GS kernel";
case BRW_CACHE_CLIP_PROG:
return "CLIP kernel";
case BRW_CACHE_SF_PROG:
return "SF kernel";
case BRW_CACHE_FS_PROG:
return "FS kernel";
case BRW_CACHE_CS_PROG:
return "CS kernel";
default:
return "unknown";
}
}
void
brw_print_program_cache(struct brw_context *brw)
{
const struct brw_cache *cache = &brw->cache;
struct brw_cache_item *item;
for (unsigned i = 0; i < cache->size; i++) {
for (item = cache->items[i]; item; item = item->next) {
fprintf(stderr, "%s:\n", cache_name(i));
brw_disassemble_with_labels(&brw->screen->devinfo, cache->map,
item->offset, item->size, stderr);
}
}
}

View file

@ -1,621 +0,0 @@
/*
* Copyright © 2008 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
*
*/
/** @file brw_queryobj.c
*
* Support for query objects (GL_ARB_occlusion_query, GL_ARB_timer_query,
* GL_EXT_transform_feedback, and friends).
*
* The hardware provides a PIPE_CONTROL command that can report the number of
* fragments that passed the depth test, or the hardware timer. They are
* appropriately synced with the stage of the pipeline for our extensions'
* needs.
*/
#include "main/queryobj.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_state.h"
#include "brw_batch.h"
/* As best we know currently, the Gen HW timestamps are 36bits across
* all platforms, which we need to account for when calculating a
* delta to measure elapsed time.
*
* The timestamps read via glGetTimestamp() / brw_get_timestamp() sometimes
* only have 32bits due to a kernel bug and so in that case we make sure to
* treat all raw timestamps as 32bits so they overflow consistently and remain
* comparable. (Note: the timestamps being passed here are not from the kernel
* so we don't need to be taking the upper 32bits in this buggy kernel case we
* are just clipping to 32bits here for consistency.)
*/
uint64_t
brw_raw_timestamp_delta(struct brw_context *brw, uint64_t time0, uint64_t time1)
{
if (brw->screen->hw_has_timestamp == 2) {
/* Kernel clips timestamps to 32bits in this case, so we also clip
* PIPE_CONTROL timestamps for consistency.
*/
return (uint32_t)time1 - (uint32_t)time0;
} else {
if (time0 > time1) {
return (1ULL << 36) + time1 - time0;
} else {
return time1 - time0;
}
}
}
/**
* Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer.
*/
void
brw_write_timestamp(struct brw_context *brw, struct brw_bo *query_bo, int idx)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
if (devinfo->ver == 6) {
/* Emit Sandybridge workaround flush: */
brw_emit_pipe_control_flush(brw,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_STALL_AT_SCOREBOARD);
}
uint32_t flags = PIPE_CONTROL_WRITE_TIMESTAMP;
if (devinfo->ver == 9 && devinfo->gt == 4)
flags |= PIPE_CONTROL_CS_STALL;
brw_emit_pipe_control_write(brw, flags,
query_bo, idx * sizeof(uint64_t), 0);
}
/**
* Emit PIPE_CONTROLs to write the PS_DEPTH_COUNT register into a buffer.
*/
void
brw_write_depth_count(struct brw_context *brw, struct brw_bo *query_bo, int idx)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
uint32_t flags = PIPE_CONTROL_WRITE_DEPTH_COUNT | PIPE_CONTROL_DEPTH_STALL;
if (devinfo->ver == 9 && devinfo->gt == 4)
flags |= PIPE_CONTROL_CS_STALL;
if (devinfo->ver >= 10) {
/* "Driver must program PIPE_CONTROL with only Depth Stall Enable bit set
* prior to programming a PIPE_CONTROL with Write PS Depth Count Post sync
* operation."
*/
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
}
brw_emit_pipe_control_write(brw, flags,
query_bo, idx * sizeof(uint64_t), 0);
}
/**
* Wait on the query object's BO and calculate the final result.
*/
static void
brw_queryobj_get_results(struct gl_context *ctx,
struct brw_query_object *query)
{
struct brw_context *brw = brw_context(ctx);
UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
int i;
uint64_t *results;
assert(devinfo->ver < 6);
if (query->bo == NULL)
return;
/* If the application has requested the query result, but this batch is
* still contributing to it, flush it now so the results will be present
* when mapped.
*/
if (brw_batch_references(&brw->batch, query->bo))
brw_batch_flush(brw);
if (unlikely(brw->perf_debug)) {
if (brw_bo_busy(query->bo)) {
perf_debug("Stalling on the GPU waiting for a query object.\n");
}
}
results = brw_bo_map(brw, query->bo, MAP_READ);
switch (query->Base.Target) {
case GL_TIME_ELAPSED_EXT:
/* The query BO contains the starting and ending timestamps.
* Subtract the two and convert to nanoseconds.
*/
query->Base.Result = brw_raw_timestamp_delta(brw, results[0], results[1]);
query->Base.Result = intel_device_info_timebase_scale(devinfo, query->Base.Result);
break;
case GL_TIMESTAMP:
/* The query BO contains a single timestamp value in results[0]. */
query->Base.Result = intel_device_info_timebase_scale(devinfo, results[0]);
/* Ensure the scaled timestamp overflows according to
* GL_QUERY_COUNTER_BITS
*/
query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1;
break;
case GL_SAMPLES_PASSED_ARB:
/* Loop over pairs of values from the BO, which are the PS_DEPTH_COUNT
* value at the start and end of the batchbuffer. Subtract them to
* get the number of fragments which passed the depth test in each
* individual batch, and add those differences up to get the number
* of fragments for the entire query.
*
* Note that query->Base.Result may already be non-zero. We may have
* run out of space in the query's BO and allocated a new one. If so,
* this function was already called to accumulate the results so far.
*/
for (i = 0; i < query->last_index; i++) {
query->Base.Result += results[i * 2 + 1] - results[i * 2];
}
break;
case GL_ANY_SAMPLES_PASSED:
case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
/* If the starting and ending PS_DEPTH_COUNT from any of the batches
* differ, then some fragments passed the depth test.
*/
for (i = 0; i < query->last_index; i++) {
if (results[i * 2 + 1] != results[i * 2]) {
query->Base.Result = GL_TRUE;
break;
}
}
break;
default:
unreachable("Unrecognized query target in brw_queryobj_get_results()");
}
brw_bo_unmap(query->bo);
/* Now that we've processed the data stored in the query's buffer object,
* we can release it.
*/
brw_bo_unreference(query->bo);
query->bo = NULL;
}
/**
* The NewQueryObject() driver hook.
*
* Allocates and initializes a new query object.
*/
static struct gl_query_object *
brw_new_query_object(struct gl_context *ctx, GLuint id)
{
struct brw_query_object *query;
query = calloc(1, sizeof(struct brw_query_object));
query->Base.Id = id;
query->Base.Result = 0;
query->Base.Active = false;
query->Base.Ready = true;
return &query->Base;
}
/**
* The DeleteQuery() driver hook.
*/
static void
brw_delete_query(struct gl_context *ctx, struct gl_query_object *q)
{
struct brw_query_object *query = (struct brw_query_object *)q;
brw_bo_unreference(query->bo);
_mesa_delete_query(ctx, q);
}
/**
* Gfx4-5 driver hook for glBeginQuery().
*
* Initializes driver structures and emits any GPU commands required to begin
* recording data for the query.
*/
static void
brw_begin_query(struct gl_context *ctx, struct gl_query_object *q)
{
struct brw_context *brw = brw_context(ctx);
struct brw_query_object *query = (struct brw_query_object *)q;
UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
assert(devinfo->ver < 6);
switch (query->Base.Target) {
case GL_TIME_ELAPSED_EXT:
/* For timestamp queries, we record the starting time right away so that
* we measure the full time between BeginQuery and EndQuery. There's
* some debate about whether this is the right thing to do. Our decision
* is based on the following text from the ARB_timer_query extension:
*
* "(5) Should the extension measure total time elapsed between the full
* completion of the BeginQuery and EndQuery commands, or just time
* spent in the graphics library?
*
* RESOLVED: This extension will measure the total time elapsed
* between the full completion of these commands. Future extensions
* may implement a query to determine time elapsed at different stages
* of the graphics pipeline."
*
* We write a starting timestamp now (at index 0). At EndQuery() time,
* we'll write a second timestamp (at index 1), and subtract the two to
* obtain the time elapsed. Notably, this includes time elapsed while
* the system was doing other work, such as running other applications.
*/
brw_bo_unreference(query->bo);
query->bo =
brw_bo_alloc(brw->bufmgr, "timer query", 4096, BRW_MEMZONE_OTHER);
brw_write_timestamp(brw, query->bo, 0);
break;
case GL_ANY_SAMPLES_PASSED:
case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
case GL_SAMPLES_PASSED_ARB:
/* For occlusion queries, we delay taking an initial sample until the
* first drawing occurs in this batch. See the reasoning in the comments
* for brw_emit_query_begin() below.
*
* Since we're starting a new query, we need to be sure to throw away
* any previous occlusion query results.
*/
brw_bo_unreference(query->bo);
query->bo = NULL;
query->last_index = -1;
brw->query.obj = query;
/* Depth statistics on Gfx4 require strange workarounds, so we try to
* avoid them when necessary. They're required for occlusion queries,
* so turn them on now.
*/
brw->stats_wm++;
brw->ctx.NewDriverState |= BRW_NEW_STATS_WM;
break;
default:
unreachable("Unrecognized query target in brw_begin_query()");
}
}
/**
* Gfx4-5 driver hook for glEndQuery().
*
* Emits GPU commands to record a final query value, ending any data capturing.
* However, the final result isn't necessarily available until the GPU processes
* those commands. brw_queryobj_get_results() processes the captured data to
* produce the final result.
*/
static void
brw_end_query(struct gl_context *ctx, struct gl_query_object *q)
{
struct brw_context *brw = brw_context(ctx);
struct brw_query_object *query = (struct brw_query_object *)q;
UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
assert(devinfo->ver < 6);
switch (query->Base.Target) {
case GL_TIME_ELAPSED_EXT:
/* Write the final timestamp. */
brw_write_timestamp(brw, query->bo, 1);
break;
case GL_ANY_SAMPLES_PASSED:
case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
case GL_SAMPLES_PASSED_ARB:
/* No query->bo means that EndQuery was called after BeginQuery with no
* intervening drawing. Rather than doing nothing at all here in this
* case, we emit the query_begin and query_end state to the
* hardware. This is to guarantee that waiting on the result of this
* empty state will cause all previous queries to complete at all, as
* required by the OpenGL 4.3 (Core Profile) spec, section 4.2.1:
*
* "It must always be true that if any query object returns
* a result available of TRUE, all queries of the same type
* issued prior to that query must also return TRUE."
*/
if (!query->bo) {
brw_emit_query_begin(brw);
}
assert(query->bo);
brw_emit_query_end(brw);
brw->query.obj = NULL;
brw->stats_wm--;
brw->ctx.NewDriverState |= BRW_NEW_STATS_WM;
break;
default:
unreachable("Unrecognized query target in brw_end_query()");
}
}
/**
* The Gfx4-5 WaitQuery() driver hook.
*
* Wait for a query result to become available and return it. This is the
* backing for glGetQueryObjectiv() with the GL_QUERY_RESULT pname.
*/
static void brw_wait_query(struct gl_context *ctx, struct gl_query_object *q)
{
struct brw_query_object *query = (struct brw_query_object *)q;
UNUSED const struct intel_device_info *devinfo =
&brw_context(ctx)->screen->devinfo;
assert(devinfo->ver < 6);
brw_queryobj_get_results(ctx, query);
query->Base.Ready = true;
}
/**
* The Gfx4-5 CheckQuery() driver hook.
*
* Checks whether a query result is ready yet. If not, flushes.
* This is the backing for glGetQueryObjectiv()'s QUERY_RESULT_AVAILABLE pname.
*/
static void brw_check_query(struct gl_context *ctx, struct gl_query_object *q)
{
struct brw_context *brw = brw_context(ctx);
struct brw_query_object *query = (struct brw_query_object *)q;
UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
assert(devinfo->ver < 6);
/* From the GL_ARB_occlusion_query spec:
*
* "Instead of allowing for an infinite loop, performing a
* QUERY_RESULT_AVAILABLE_ARB will perform a flush if the result is
* not ready yet on the first time it is queried. This ensures that
* the async query will return true in finite time.
*/
if (query->bo && brw_batch_references(&brw->batch, query->bo))
brw_batch_flush(brw);
if (query->bo == NULL || !brw_bo_busy(query->bo)) {
brw_queryobj_get_results(ctx, query);
query->Base.Ready = true;
}
}
/**
* Ensure there query's BO has enough space to store a new pair of values.
*
* If not, gather the existing BO's results and create a new buffer of the
* same size.
*/
static void
ensure_bo_has_space(struct gl_context *ctx, struct brw_query_object *query)
{
struct brw_context *brw = brw_context(ctx);
UNUSED const struct intel_device_info *devinfo = &brw->screen->devinfo;
assert(devinfo->ver < 6);
if (!query->bo || query->last_index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
if (query->bo != NULL) {
/* The old query BO did not have enough space, so we allocated a new
* one. Gather the results so far (adding up the differences) and
* release the old BO.
*/
brw_queryobj_get_results(ctx, query);
}
query->bo = brw_bo_alloc(brw->bufmgr, "query", 4096, BRW_MEMZONE_OTHER);
query->last_index = 0;
}
}
/**
* Record the PS_DEPTH_COUNT value (for occlusion queries) just before
* primitive drawing.
*
* In a pre-hardware context world, the single PS_DEPTH_COUNT register is
* shared among all applications using the GPU. However, our query value
* needs to only include fragments generated by our application/GL context.
*
* To accommodate this, we record PS_DEPTH_COUNT at the start and end of
* each batchbuffer (technically, the first primitive drawn and flush time).
* Subtracting each pair of values calculates the change in PS_DEPTH_COUNT
* caused by a batchbuffer. Since there is no preemption inside batches,
* this is guaranteed to only measure the effects of our current application.
*
* Adding each of these differences (in case drawing is done over many batches)
* produces the final expected value.
*
* In a world with hardware contexts, PS_DEPTH_COUNT is saved and restored
* as part of the context state, so this is unnecessary, and skipped.
*/
void
brw_emit_query_begin(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
struct brw_query_object *query = brw->query.obj;
/* Skip if we're not doing any queries, or we've already recorded the
* initial query value for this batchbuffer.
*/
if (!query || brw->query.begin_emitted)
return;
ensure_bo_has_space(ctx, query);
brw_write_depth_count(brw, query->bo, query->last_index * 2);
brw->query.begin_emitted = true;
}
/**
* Called at batchbuffer flush to get an ending PS_DEPTH_COUNT
* (for non-hardware context platforms).
*
* See the explanation in brw_emit_query_begin().
*/
void
brw_emit_query_end(struct brw_context *brw)
{
struct brw_query_object *query = brw->query.obj;
if (!brw->query.begin_emitted)
return;
brw_write_depth_count(brw, query->bo, query->last_index * 2 + 1);
brw->query.begin_emitted = false;
query->last_index++;
}
/**
* Driver hook for glQueryCounter().
*
* This handles GL_TIMESTAMP queries, which perform a pipelined read of the
* current GPU time. This is unlike GL_TIME_ELAPSED, which measures the
* time while the query is active.
*/
void
brw_query_counter(struct gl_context *ctx, struct gl_query_object *q)
{
struct brw_context *brw = brw_context(ctx);
struct brw_query_object *query = (struct brw_query_object *) q;
assert(q->Target == GL_TIMESTAMP);
brw_bo_unreference(query->bo);
query->bo =
brw_bo_alloc(brw->bufmgr, "timestamp query", 4096, BRW_MEMZONE_OTHER);
brw_write_timestamp(brw, query->bo, 0);
query->flushed = false;
}
/**
* Read the TIMESTAMP register immediately (in a non-pipelined fashion).
*
* This is used to implement the GetTimestamp() driver hook.
*/
static uint64_t
brw_get_timestamp(struct gl_context *ctx)
{
struct brw_context *brw = brw_context(ctx);
const struct intel_device_info *devinfo = &brw->screen->devinfo;
uint64_t result = 0;
switch (brw->screen->hw_has_timestamp) {
case 3: /* New kernel, always full 36bit accuracy */
brw_reg_read(brw->bufmgr, TIMESTAMP | 1, &result);
break;
case 2: /* 64bit kernel, result is left-shifted by 32bits, losing 4bits */
brw_reg_read(brw->bufmgr, TIMESTAMP, &result);
result = result >> 32;
break;
case 1: /* 32bit kernel, result is 36bit wide but may be inaccurate! */
brw_reg_read(brw->bufmgr, TIMESTAMP, &result);
break;
}
/* Scale to nanosecond units */
result = intel_device_info_timebase_scale(devinfo, result);
/* Ensure the scaled timestamp overflows according to
* GL_QUERY_COUNTER_BITS. Technically this isn't required if
* querying GL_TIMESTAMP via glGetInteger but it seems best to keep
* QueryObject and GetInteger timestamps consistent.
*/
result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1;
return result;
}
/**
* Is this type of query written by PIPE_CONTROL?
*/
bool
brw_is_query_pipelined(struct brw_query_object *query)
{
switch (query->Base.Target) {
case GL_TIMESTAMP:
case GL_TIME_ELAPSED:
case GL_ANY_SAMPLES_PASSED:
case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
case GL_SAMPLES_PASSED_ARB:
return true;
case GL_PRIMITIVES_GENERATED:
case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
case GL_VERTICES_SUBMITTED_ARB:
case GL_PRIMITIVES_SUBMITTED_ARB:
case GL_VERTEX_SHADER_INVOCATIONS_ARB:
case GL_GEOMETRY_SHADER_INVOCATIONS:
case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
return false;
default:
unreachable("Unrecognized query target in is_query_pipelined()");
}
}
/* Initialize query object functions used on all generations. */
void brw_init_common_queryobj_functions(struct dd_function_table *functions)
{
functions->NewQueryObject = brw_new_query_object;
functions->DeleteQuery = brw_delete_query;
functions->GetTimestamp = brw_get_timestamp;
}
/* Initialize Gfx4/5-specific query object functions. */
void gfx4_init_queryobj_functions(struct dd_function_table *functions)
{
functions->BeginQuery = brw_begin_query;
functions->EndQuery = brw_end_query;
functions->CheckQuery = brw_check_query;
functions->WaitQuery = brw_wait_query;
functions->QueryCounter = brw_query_counter;
}

View file

@ -1,86 +0,0 @@
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "main/context.h"
#include <xf86drm.h>
#include "brw_context.h"
/**
* Query information about GPU resets observed by this context
*
* Called via \c dd_function_table::GetGraphicsResetStatus.
*/
GLenum
brw_get_graphics_reset_status(struct gl_context *ctx)
{
struct brw_context *brw = brw_context(ctx);
struct drm_i915_reset_stats stats = { .ctx_id = brw->hw_ctx };
/* If hardware contexts are not being used (or
* DRM_IOCTL_I915_GET_RESET_STATS is not supported), this function should
* not be accessible.
*/
assert(brw->hw_ctx != 0);
/* A reset status other than NO_ERROR was returned last time. I915 returns
* nonzero active/pending only if reset has been encountered and completed.
* Return NO_ERROR from now on.
*/
if (brw->reset_count != 0)
return GL_NO_ERROR;
if (drmIoctl(brw->screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats) != 0)
return GL_NO_ERROR;
/* A reset was observed while a batch from this context was executing.
* Assume that this context was at fault.
*/
if (stats.batch_active != 0) {
brw->reset_count = stats.reset_count;
return GL_GUILTY_CONTEXT_RESET_ARB;
}
/* A reset was observed while a batch from this context was in progress,
* but the batch was not executing. In this case, assume that the context
* was not at fault.
*/
if (stats.batch_pending != 0) {
brw->reset_count = stats.reset_count;
return GL_INNOCENT_CONTEXT_RESET_ARB;
}
return GL_NO_ERROR;
}
void
brw_check_for_reset(struct brw_context *brw)
{
struct drm_i915_reset_stats stats = { .ctx_id = brw->hw_ctx };
if (drmIoctl(brw->screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats) != 0)
return;
if (stats.batch_active > 0 || stats.batch_pending > 0)
_mesa_set_context_lost_dispatch(&brw->ctx);
}

File diff suppressed because it is too large Load diff

View file

@ -1,173 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _INTEL_INIT_H_
#define _INTEL_INIT_H_
#include <stdbool.h>
#include <sys/time.h>
#include <GL/internal/dri_interface.h>
#include "isl/isl.h"
#include "dri_util.h"
#include "brw_bufmgr.h"
#include "dev/intel_device_info.h"
#include "drm-uapi/i915_drm.h"
#include "util/xmlconfig.h"
#include "isl/isl.h"
#ifdef __cplusplus
extern "C" {
#endif
struct brw_screen
{
int deviceID;
struct intel_device_info devinfo;
__DRIscreen *driScrnPriv;
uint64_t max_gtt_map_object_size;
/** Bytes of aperture usage beyond which execbuf is likely to fail. */
uint64_t aperture_threshold;
/** DRM fd associated with this screen. Not owned by this object. Do not close. */
int fd;
bool has_exec_fence; /**< I915_PARAM_HAS_EXEC_FENCE */
int hw_has_timestamp;
struct isl_device isl_dev;
/**
* Does the kernel support context reset notifications?
*/
bool has_context_reset_notification;
/**
* Does the kernel support features such as pipelined register access to
* specific registers?
*/
unsigned kernel_features;
#define KERNEL_ALLOWS_SOL_OFFSET_WRITES (1<<0)
#define KERNEL_ALLOWS_PREDICATE_WRITES (1<<1)
#define KERNEL_ALLOWS_MI_MATH_AND_LRR (1<<2)
#define KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3 (1<<3)
#define KERNEL_ALLOWS_COMPUTE_DISPATCH (1<<4)
#define KERNEL_ALLOWS_EXEC_CAPTURE (1<<5)
#define KERNEL_ALLOWS_EXEC_BATCH_FIRST (1<<6)
#define KERNEL_ALLOWS_CONTEXT_ISOLATION (1<<7)
struct brw_bufmgr *bufmgr;
/**
* A unique ID for shader programs.
*/
unsigned program_id;
int winsys_msaa_samples_override;
struct brw_compiler *compiler;
/**
* Configuration cache with default values for all contexts
*/
driOptionCache optionCache;
/**
* Version of the command parser reported by the
* I915_PARAM_CMD_PARSER_VERSION parameter
*/
int cmd_parser_version;
bool mesa_format_supports_texture[MESA_FORMAT_COUNT];
bool mesa_format_supports_render[MESA_FORMAT_COUNT];
enum isl_format mesa_to_isl_render_format[MESA_FORMAT_COUNT];
struct disk_cache *disk_cache;
};
extern void brw_destroy_context(__DRIcontext *driContextPriv);
extern GLboolean brw_unbind_context(__DRIcontext *driContextPriv);
PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void);
extern const __DRI2fenceExtension brwFenceExtension;
extern GLboolean
brw_make_current(__DRIcontext *driContextPriv,
__DRIdrawable *driDrawPriv,
__DRIdrawable *driReadPriv);
double get_time(void);
const int*
brw_supported_msaa_modes(const struct brw_screen *screen);
static inline bool
can_do_pipelined_register_writes(const struct brw_screen *screen)
{
return screen->kernel_features & KERNEL_ALLOWS_SOL_OFFSET_WRITES;
}
static inline bool
can_do_hsw_l3_atomics(const struct brw_screen *screen)
{
return screen->kernel_features & KERNEL_ALLOWS_HSW_SCRATCH1_AND_ROW_CHICKEN3;
}
static inline bool
can_do_mi_math_and_lrr(const struct brw_screen *screen)
{
return screen->kernel_features & KERNEL_ALLOWS_MI_MATH_AND_LRR;
}
static inline bool
can_do_compute_dispatch(const struct brw_screen *screen)
{
return screen->kernel_features & KERNEL_ALLOWS_COMPUTE_DISPATCH;
}
static inline bool
can_do_predicate_writes(const struct brw_screen *screen)
{
return screen->kernel_features & KERNEL_ALLOWS_PREDICATE_WRITES;
}
static inline bool
can_do_exec_capture(const struct brw_screen *screen)
{
return screen->kernel_features & KERNEL_ALLOWS_EXEC_CAPTURE;
}
#ifdef __cplusplus
}
#endif
#endif

View file

@ -1,171 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#include "compiler/nir/nir.h"
#include "main/macros.h"
#include "main/mtypes.h"
#include "main/enums.h"
#include "main/fbobject.h"
#include "main/state.h"
#include "brw_batch.h"
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_util.h"
#include "brw_state.h"
#include "compiler/brw_eu.h"
#include "util/ralloc.h"
static void
compile_sf_prog(struct brw_context *brw, struct brw_sf_prog_key *key)
{
const unsigned *program;
void *mem_ctx;
unsigned program_size;
mem_ctx = ralloc_context(NULL);
struct brw_sf_prog_data prog_data;
program = brw_compile_sf(brw->screen->compiler, mem_ctx, key, &prog_data,
&brw->vue_map_geom_out, &program_size);
brw_upload_cache(&brw->cache, BRW_CACHE_SF_PROG,
key, sizeof(*key),
program, program_size,
&prog_data, sizeof(prog_data),
&brw->sf.prog_offset, &brw->sf.prog_data);
ralloc_free(mem_ctx);
}
/* Calculate interpolants for triangle and line rasterization.
*/
void
brw_upload_sf_prog(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
struct brw_sf_prog_key key;
if (!brw_state_dirty(brw,
_NEW_BUFFERS |
_NEW_HINT |
_NEW_LIGHT |
_NEW_POINT |
_NEW_POLYGON |
_NEW_PROGRAM |
_NEW_TRANSFORM,
BRW_NEW_BLORP |
BRW_NEW_FS_PROG_DATA |
BRW_NEW_REDUCED_PRIMITIVE |
BRW_NEW_VUE_MAP_GEOM_OUT))
return;
/* _NEW_BUFFERS */
bool flip_y = ctx->DrawBuffer->FlipY;
memset(&key, 0, sizeof(key));
/* Populate the key, noting state dependencies:
*/
/* BRW_NEW_VUE_MAP_GEOM_OUT */
key.attrs = brw->vue_map_geom_out.slots_valid;
/* BRW_NEW_REDUCED_PRIMITIVE */
switch (brw->reduced_primitive) {
case GL_TRIANGLES:
/* NOTE: We just use the edgeflag attribute as an indicator that
* unfilled triangles are active. We don't actually do the
* edgeflag testing here, it is already done in the clip
* program.
*/
if (key.attrs & BITFIELD64_BIT(VARYING_SLOT_EDGE))
key.primitive = BRW_SF_PRIM_UNFILLED_TRIS;
else
key.primitive = BRW_SF_PRIM_TRIANGLES;
break;
case GL_LINES:
key.primitive = BRW_SF_PRIM_LINES;
break;
case GL_POINTS:
key.primitive = BRW_SF_PRIM_POINTS;
break;
}
/* _NEW_TRANSFORM */
key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
/* _NEW_POINT */
key.do_point_sprite = ctx->Point.PointSprite;
if (key.do_point_sprite) {
key.point_sprite_coord_replace = ctx->Point.CoordReplace & 0xff;
}
if (brw->programs[MESA_SHADER_FRAGMENT]->info.inputs_read &
BITFIELD64_BIT(VARYING_SLOT_PNTC)) {
key.do_point_coord = 1;
}
/*
* Window coordinates in a FBO are inverted, which means point
* sprite origin must be inverted, too.
*/
if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y)
key.sprite_origin_lower_left = true;
/* BRW_NEW_FS_PROG_DATA */
const struct brw_wm_prog_data *wm_prog_data =
brw_wm_prog_data(brw->wm.base.prog_data);
if (wm_prog_data) {
key.contains_flat_varying = wm_prog_data->contains_flat_varying;
STATIC_ASSERT(sizeof(key.interp_mode) ==
sizeof(wm_prog_data->interp_mode));
memcpy(key.interp_mode, wm_prog_data->interp_mode,
sizeof(key.interp_mode));
}
/* _NEW_LIGHT | _NEW_PROGRAM */
key.do_twoside_color = _mesa_vertex_program_two_side_enabled(ctx);
/* _NEW_POLYGON */
if (key.do_twoside_color) {
/* If we're rendering to a FBO, we have to invert the polygon
* face orientation, just as we invert the viewport in
* sf_unit_create_from_key().
*/
key.frontface_ccw = brw->polygon_front_bit != flip_y;
}
if (!brw_search_cache(&brw->cache, BRW_CACHE_SF_PROG, &key, sizeof(key),
&brw->sf.prog_offset, &brw->sf.prog_data, true)) {
compile_sf_prog( brw, &key );
}
}

View file

@ -1,119 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "main/context.h"
#include "main/macros.h"
#include "main/enums.h"
#include "main/dd.h"
#include "brw_screen.h"
#include "brw_context.h"
#include "brw_defines.h"
int
brw_translate_shadow_compare_func(GLenum func)
{
/* GL specifies the result of shadow comparisons as:
* 1 if ref <op> texel,
* 0 otherwise.
*
* The hardware does:
* 0 if texel <op> ref,
* 1 otherwise.
*
* So, these look a bit strange because there's both a negation
* and swapping of the arguments involved.
*/
switch (func) {
case GL_NEVER:
return BRW_COMPAREFUNCTION_ALWAYS;
case GL_LESS:
return BRW_COMPAREFUNCTION_LEQUAL;
case GL_LEQUAL:
return BRW_COMPAREFUNCTION_LESS;
case GL_GREATER:
return BRW_COMPAREFUNCTION_GEQUAL;
case GL_GEQUAL:
return BRW_COMPAREFUNCTION_GREATER;
case GL_NOTEQUAL:
return BRW_COMPAREFUNCTION_EQUAL;
case GL_EQUAL:
return BRW_COMPAREFUNCTION_NOTEQUAL;
case GL_ALWAYS:
return BRW_COMPAREFUNCTION_NEVER;
}
unreachable("Invalid shadow comparison function.");
}
int
brw_translate_compare_func(GLenum func)
{
switch (func) {
case GL_NEVER:
return BRW_COMPAREFUNCTION_NEVER;
case GL_LESS:
return BRW_COMPAREFUNCTION_LESS;
case GL_LEQUAL:
return BRW_COMPAREFUNCTION_LEQUAL;
case GL_GREATER:
return BRW_COMPAREFUNCTION_GREATER;
case GL_GEQUAL:
return BRW_COMPAREFUNCTION_GEQUAL;
case GL_NOTEQUAL:
return BRW_COMPAREFUNCTION_NOTEQUAL;
case GL_EQUAL:
return BRW_COMPAREFUNCTION_EQUAL;
case GL_ALWAYS:
return BRW_COMPAREFUNCTION_ALWAYS;
}
unreachable("Invalid comparison function.");
}
int
brw_translate_stencil_op(GLenum op)
{
switch (op) {
case GL_KEEP:
return BRW_STENCILOP_KEEP;
case GL_ZERO:
return BRW_STENCILOP_ZERO;
case GL_REPLACE:
return BRW_STENCILOP_REPLACE;
case GL_INCR:
return BRW_STENCILOP_INCRSAT;
case GL_DECR:
return BRW_STENCILOP_DECRSAT;
case GL_INCR_WRAP:
return BRW_STENCILOP_INCR;
case GL_DECR_WRAP:
return BRW_STENCILOP_DECR;
case GL_INVERT:
return BRW_STENCILOP_INVERT;
default:
return BRW_STENCILOP_ZERO;
}
}

View file

@ -1,370 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#ifndef BRW_STATE_H
#define BRW_STATE_H
#include "brw_context.h"
#ifdef __cplusplus
extern "C" {
#endif
enum intel_msaa_layout;
extern const struct brw_tracked_state brw_blend_constant_color;
extern const struct brw_tracked_state brw_clip_unit;
extern const struct brw_tracked_state brw_vs_pull_constants;
extern const struct brw_tracked_state brw_tcs_pull_constants;
extern const struct brw_tracked_state brw_tes_pull_constants;
extern const struct brw_tracked_state brw_gs_pull_constants;
extern const struct brw_tracked_state brw_wm_pull_constants;
extern const struct brw_tracked_state brw_cs_pull_constants;
extern const struct brw_tracked_state brw_constant_buffer;
extern const struct brw_tracked_state brw_curbe_offsets;
extern const struct brw_tracked_state brw_binding_table_pointers;
extern const struct brw_tracked_state brw_depthbuffer;
extern const struct brw_tracked_state brw_recalculate_urb_fence;
extern const struct brw_tracked_state brw_sf_vp;
extern const struct brw_tracked_state brw_cs_texture_surfaces;
extern const struct brw_tracked_state brw_vs_ubo_surfaces;
extern const struct brw_tracked_state brw_vs_image_surfaces;
extern const struct brw_tracked_state brw_tcs_ubo_surfaces;
extern const struct brw_tracked_state brw_tcs_image_surfaces;
extern const struct brw_tracked_state brw_tes_ubo_surfaces;
extern const struct brw_tracked_state brw_tes_image_surfaces;
extern const struct brw_tracked_state brw_gs_ubo_surfaces;
extern const struct brw_tracked_state brw_gs_image_surfaces;
extern const struct brw_tracked_state brw_renderbuffer_surfaces;
extern const struct brw_tracked_state brw_renderbuffer_read_surfaces;
extern const struct brw_tracked_state brw_texture_surfaces;
extern const struct brw_tracked_state brw_wm_binding_table;
extern const struct brw_tracked_state brw_gs_binding_table;
extern const struct brw_tracked_state brw_tes_binding_table;
extern const struct brw_tracked_state brw_tcs_binding_table;
extern const struct brw_tracked_state brw_vs_binding_table;
extern const struct brw_tracked_state brw_wm_ubo_surfaces;
extern const struct brw_tracked_state brw_wm_image_surfaces;
extern const struct brw_tracked_state brw_cs_ubo_surfaces;
extern const struct brw_tracked_state brw_cs_image_surfaces;
extern const struct brw_tracked_state brw_psp_urb_cbs;
extern const struct brw_tracked_state brw_indices;
extern const struct brw_tracked_state brw_index_buffer;
extern const struct brw_tracked_state gfx7_cs_push_constants;
extern const struct brw_tracked_state gfx6_binding_table_pointers;
extern const struct brw_tracked_state gfx6_gs_binding_table;
extern const struct brw_tracked_state gfx6_renderbuffer_surfaces;
extern const struct brw_tracked_state gfx6_sampler_state;
extern const struct brw_tracked_state gfx6_sol_surface;
extern const struct brw_tracked_state gfx6_sf_vp;
extern const struct brw_tracked_state gfx6_urb;
extern const struct brw_tracked_state gfx7_l3_state;
extern const struct brw_tracked_state gfx7_push_constant_space;
extern const struct brw_tracked_state gfx7_urb;
extern const struct brw_tracked_state gfx8_pma_fix;
extern const struct brw_tracked_state brw_cs_work_groups_surface;
void gfx4_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
struct brw_bo *bo, uint32_t offset,
uint64_t imm);
void gfx45_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
struct brw_bo *bo, uint32_t offset,
uint64_t imm);
void gfx5_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
struct brw_bo *bo, uint32_t offset,
uint64_t imm);
void gfx6_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
struct brw_bo *bo, uint32_t offset,
uint64_t imm);
void gfx7_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
struct brw_bo *bo, uint32_t offset,
uint64_t imm);
void gfx75_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
struct brw_bo *bo, uint32_t offset,
uint64_t imm);
void gfx8_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
struct brw_bo *bo, uint32_t offset,
uint64_t imm);
void gfx9_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
struct brw_bo *bo, uint32_t offset,
uint64_t imm);
void gfx11_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
struct brw_bo *bo, uint32_t offset,
uint64_t imm);
static inline bool
brw_state_dirty(const struct brw_context *brw,
GLuint mesa_flags, uint64_t brw_flags)
{
return ((brw->NewGLState & mesa_flags) |
(brw->ctx.NewDriverState & brw_flags)) != 0;
}
/* brw_binding_tables.c */
void brw_upload_binding_table(struct brw_context *brw,
uint32_t packet_name,
const struct brw_stage_prog_data *prog_data,
struct brw_stage_state *stage_state);
/* brw_misc_state.c */
void brw_upload_invariant_state(struct brw_context *brw);
uint32_t
brw_depthbuffer_format(struct brw_context *brw);
/* gfx8_depth_state.c */
void gfx8_write_pma_stall_bits(struct brw_context *brw,
uint32_t pma_stall_bits);
/* brw_disk_cache.c */
void brw_disk_cache_init(struct brw_screen *screen);
bool brw_disk_cache_upload_program(struct brw_context *brw,
gl_shader_stage stage);
void brw_disk_cache_write_compute_program(struct brw_context *brw);
void brw_disk_cache_write_render_programs(struct brw_context *brw);
/***********************************************************************
* brw_state_upload.c
*/
void brw_upload_render_state(struct brw_context *brw);
void brw_render_state_finished(struct brw_context *brw);
void brw_upload_compute_state(struct brw_context *brw);
void brw_compute_state_finished(struct brw_context *brw);
void brw_init_state(struct brw_context *brw);
void brw_destroy_state(struct brw_context *brw);
void brw_emit_select_pipeline(struct brw_context *brw,
enum brw_pipeline pipeline);
void brw_enable_obj_preemption(struct brw_context *brw, bool enable);
static inline void
brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
{
if (unlikely(brw->last_pipeline != pipeline)) {
assert(pipeline < BRW_NUM_PIPELINES);
brw_emit_select_pipeline(brw, pipeline);
brw->last_pipeline = pipeline;
}
}
/***********************************************************************
* brw_program_cache.c
*/
void brw_upload_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
GLuint key_sz,
const void *data,
GLuint data_sz,
const void *aux,
GLuint aux_sz,
uint32_t *out_offset, void *out_aux);
bool brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
const void *key, GLuint key_size, uint32_t *inout_offset,
void *inout_aux, bool flag_state);
const void *brw_find_previous_compile(struct brw_cache *cache,
enum brw_cache_id cache_id,
unsigned program_string_id);
void brw_program_cache_check_size(struct brw_context *brw);
void brw_init_caches( struct brw_context *brw );
void brw_destroy_caches( struct brw_context *brw );
void brw_print_program_cache(struct brw_context *brw);
enum brw_cache_id brw_stage_cache_id(gl_shader_stage stage);
/* brw_batch.c */
void brw_require_statebuffer_space(struct brw_context *brw, int size);
void *brw_state_batch(struct brw_context *brw,
int size, int alignment, uint32_t *out_offset);
/* brw_wm_surface_state.c */
uint32_t brw_get_surface_tiling_bits(uint32_t tiling);
uint32_t brw_get_surface_num_multisamples(unsigned num_samples);
enum isl_format brw_isl_format_for_mesa_format(mesa_format mesa_format);
GLuint translate_tex_target(GLenum target);
enum isl_format translate_tex_format(struct brw_context *brw,
mesa_format mesa_format,
GLenum srgb_decode);
int brw_get_texture_swizzle(const struct gl_context *ctx,
const struct gl_texture_object *t);
void brw_emit_buffer_surface_state(struct brw_context *brw,
uint32_t *out_offset,
struct brw_bo *bo,
unsigned buffer_offset,
unsigned surface_format,
unsigned buffer_size,
unsigned pitch,
unsigned reloc_flags);
/* brw_sampler_state.c */
void brw_emit_sampler_state(struct brw_context *brw,
uint32_t *sampler_state,
uint32_t batch_offset_for_sampler_state,
unsigned min_filter,
unsigned mag_filter,
unsigned mip_filter,
unsigned max_anisotropy,
unsigned address_rounding,
unsigned wrap_s,
unsigned wrap_t,
unsigned wrap_r,
unsigned base_level,
unsigned min_lod,
unsigned max_lod,
int lod_bias,
unsigned shadow_function,
bool non_normalized_coordinates,
uint32_t border_color_offset);
/* gfx6_constant_state.c */
void
brw_populate_constant_data(struct brw_context *brw,
const struct gl_program *prog,
const struct brw_stage_state *stage_state,
void *dst,
const uint32_t *param,
unsigned nr_params);
void
brw_upload_pull_constants(struct brw_context *brw,
GLbitfield64 brw_new_constbuf,
const struct gl_program *prog,
struct brw_stage_state *stage_state,
const struct brw_stage_prog_data *prog_data);
void
brw_upload_cs_push_constants(struct brw_context *brw,
const struct gl_program *prog,
const struct brw_cs_prog_data *cs_prog_data,
struct brw_stage_state *stage_state);
/* gfx7_vs_state.c */
void
gfx7_upload_constant_state(struct brw_context *brw,
const struct brw_stage_state *stage_state,
bool active, unsigned opcode);
/* brw_clip.c */
void brw_upload_clip_prog(struct brw_context *brw);
/* brw_sf.c */
void brw_upload_sf_prog(struct brw_context *brw);
bool brw_is_drawing_points(const struct brw_context *brw);
bool brw_is_drawing_lines(const struct brw_context *brw);
/* gfx7_l3_state.c */
void
gfx7_restore_default_l3_config(struct brw_context *brw);
static inline bool
use_state_point_size(const struct brw_context *brw)
{
const struct gl_context *ctx = &brw->ctx;
/* Section 14.4 (Points) of the OpenGL 4.5 specification says:
*
* "If program point size mode is enabled, the derived point size is
* taken from the (potentially clipped) shader built-in gl_PointSize
* written by:
*
* * the geometry shader, if active;
* * the tessellation evaluation shader, if active and no
* geometry shader is active;
* * the vertex shader, otherwise
*
* and clamped to the implementation-dependent point size range. If
* the value written to gl_PointSize is less than or equal to zero,
* or if no value was written to gl_PointSize, results are undefined.
* If program point size mode is disabled, the derived point size is
* specified with the command
*
* void PointSize(float size);
*
* size specifies the requested size of a point. The default value
* is 1.0."
*
* The rules for GLES come from the ES 3.2, OES_geometry_point_size, and
* OES_tessellation_point_size specifications. To summarize: if the last
* stage before rasterization is a GS or TES, then use gl_PointSize from
* the shader if written. Otherwise, use 1.0. If the last stage is a
* vertex shader, use gl_PointSize, or it is undefined.
*
* We can combine these rules into a single condition for both APIs.
* Using the state point size when the last shader stage doesn't write
* gl_PointSize satisfies GL's requirements, as it's undefined. Because
* ES doesn't have a PointSize() command, the state point size will
* remain 1.0, satisfying the ES default value in the GS/TES case, and
* the VS case (1.0 works for "undefined"). Mesa sets the program point
* mode flag to always-enabled in ES, so we can safely check that, and
* it'll be ignored for ES.
*
* _NEW_PROGRAM | _NEW_POINT
* BRW_NEW_VUE_MAP_GEOM_OUT
*/
return (!ctx->VertexProgram.PointSizeEnabled && !ctx->Point._Attenuated) ||
(brw->vue_map_geom_out.slots_valid & VARYING_BIT_PSIZ) == 0;
}
void brw_copy_pipeline_atoms(struct brw_context *brw,
enum brw_pipeline pipeline,
const struct brw_tracked_state **atoms,
int num_atoms);
void gfx4_init_atoms(struct brw_context *brw);
void gfx45_init_atoms(struct brw_context *brw);
void gfx5_init_atoms(struct brw_context *brw);
void gfx6_init_atoms(struct brw_context *brw);
void gfx7_init_atoms(struct brw_context *brw);
void gfx75_init_atoms(struct brw_context *brw);
void gfx8_init_atoms(struct brw_context *brw);
void gfx9_init_atoms(struct brw_context *brw);
void gfx11_init_atoms(struct brw_context *brw);
static inline uint32_t
brw_mocs(const struct isl_device *dev, struct brw_bo *bo)
{
return isl_mocs(dev, 0, bo && bo->external);
}
#ifdef __cplusplus
}
#endif
#endif

View file

@ -1,789 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_state.h"
#include "brw_program.h"
#include "drivers/common/meta.h"
#include "brw_batch.h"
#include "brw_buffers.h"
#include "brw_vs.h"
#include "brw_ff_gs.h"
#include "brw_gs.h"
#include "brw_wm.h"
#include "brw_cs.h"
#include "genxml/genX_bits.h"
#include "main/framebuffer.h"
void
brw_enable_obj_preemption(struct brw_context *brw, bool enable)
{
ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo;
assert(devinfo->ver >= 9);
if (enable == brw->object_preemption)
return;
/* A fixed function pipe flush is required before modifying this field */
brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
bool replay_mode = enable ?
GFX9_REPLAY_MODE_MIDOBJECT : GFX9_REPLAY_MODE_MIDBUFFER;
/* enable object level preemption */
brw_load_register_imm32(brw, CS_CHICKEN1,
replay_mode | GFX9_REPLAY_MODE_MASK);
brw->object_preemption = enable;
}
static void
brw_upload_gfx11_slice_hashing_state(struct brw_context *brw)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
int subslices_delta =
devinfo->ppipe_subslices[0] - devinfo->ppipe_subslices[1];
if (subslices_delta == 0)
return;
unsigned size = GFX11_SLICE_HASH_TABLE_length * 4;
uint32_t hash_address;
uint32_t *map = brw_state_batch(brw, size, 64, &hash_address);
unsigned idx = 0;
unsigned sl_small = 0;
unsigned sl_big = 1;
if (subslices_delta > 0) {
sl_small = 1;
sl_big = 0;
}
/**
* Create a 16x16 slice hashing table like the following one:
*
* [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
* [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
* [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
* [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
* [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
* [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
* [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
* [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
* [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
* [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
* [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
* [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
* [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
* [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
* [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
* [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
*
* The table above is used when the pixel pipe 0 has less subslices than
* pixel pipe 1. When pixel pipe 0 has more subslices, then a similar table
* with 0's and 1's inverted is used.
*/
for (int i = 0; i < GFX11_SLICE_HASH_TABLE_length; i++) {
uint32_t dw = 0;
for (int j = 0; j < 8; j++) {
unsigned slice = idx++ % 3 ? sl_big : sl_small;
dw |= slice << (j * 4);
}
map[i] = dw;
}
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_SLICE_TABLE_STATE_POINTERS << 16 | (2 - 2));
OUT_RELOC(brw->batch.state.bo, 0, hash_address | 1);
ADVANCE_BATCH();
/* From gfx10/gfx11 workaround table in h/w specs:
*
* "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
* a value of 0xFFFF"
*
* This means that whenever we update a field with this instruction, we need
* to update all the others.
*
* Since this is the first time we emit this
* instruction, we are only setting the fSLICE_HASHING_TABLE_ENABLE flag,
* and leaving everything else at their default state (0).
*/
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_3D_MODE << 16 | (2 - 2));
OUT_BATCH(0xffff0000 | SLICE_HASHING_TABLE_ENABLE);
ADVANCE_BATCH();
}
static void
brw_upload_initial_gpu_state(struct brw_context *brw)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
const struct brw_compiler *compiler = brw->screen->compiler;
/* On platforms with hardware contexts, we can set our initial GPU state
* right away rather than doing it via state atoms. This saves a small
* amount of overhead on every draw call.
*/
if (!brw->hw_ctx)
return;
if (devinfo->ver == 6)
brw_emit_post_sync_nonzero_flush(brw);
brw_upload_invariant_state(brw);
if (devinfo->ver == 11) {
/* The default behavior of bit 5 "Headerless Message for Pre-emptable
* Contexts" in SAMPLER MODE register is set to 0, which means
* headerless sampler messages are not allowed for pre-emptable
* contexts. Set the bit 5 to 1 to allow them.
*/
brw_load_register_imm32(brw, GFX11_SAMPLER_MODE,
HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
/* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
* HALF_SLICE_CHICKEN7 register.
*/
brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
TEXEL_OFFSET_FIX_MASK |
TEXEL_OFFSET_FIX_ENABLE);
/* Wa_1406697149: Bit 9 "Error Detection Behavior Control" must be set
* in L3CNTLREG register. The default setting of the bit is not the
* desirable behavior.
*/
brw_load_register_imm32(brw, GFX8_L3CNTLREG,
GFX8_L3CNTLREG_EDBC_NO_HANG);
}
/* hardware specification recommends disabling repacking for
* the compatibility with decompression mechanism in display controller.
*/
if (devinfo->disable_ccs_repack) {
brw_load_register_imm32(brw, GFX7_CACHE_MODE_0,
GFX11_DISABLE_REPACKING_FOR_COMPRESSION |
REG_MASK(GFX11_DISABLE_REPACKING_FOR_COMPRESSION));
}
if (devinfo->ver == 9) {
/* Recommended optimizations for Victim Cache eviction and floating
* point blending.
*/
brw_load_register_imm32(brw, GFX7_CACHE_MODE_1,
REG_MASK(GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
REG_MASK(GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT) |
REG_MASK(GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
GFX9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
GFX9_MSC_RAW_HAZARD_AVOIDANCE_BIT |
GFX9_PARTIAL_RESOLVE_DISABLE_IN_VC);
}
if (devinfo->ver >= 8) {
gfx8_emit_3dstate_sample_pattern(brw);
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
* 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
*
* This is only safe on kernels with context isolation support.
*/
if (!compiler->constant_buffer_0_is_relative) {
if (devinfo->ver >= 9) {
BEGIN_BATCH(3);
OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
OUT_BATCH(CS_DEBUG_MODE2);
OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
ADVANCE_BATCH();
} else if (devinfo->ver == 8) {
BEGIN_BATCH(3);
OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
OUT_BATCH(INSTPM);
OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
ADVANCE_BATCH();
}
}
brw->object_preemption = false;
if (devinfo->ver >= 10)
brw_enable_obj_preemption(brw, true);
if (devinfo->ver == 11)
brw_upload_gfx11_slice_hashing_state(brw);
}
static inline const struct brw_tracked_state *
brw_get_pipeline_atoms(struct brw_context *brw,
enum brw_pipeline pipeline)
{
switch (pipeline) {
case BRW_RENDER_PIPELINE:
return brw->render_atoms;
case BRW_COMPUTE_PIPELINE:
return brw->compute_atoms;
default:
STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
unreachable("Unsupported pipeline");
return NULL;
}
}
void
brw_copy_pipeline_atoms(struct brw_context *brw,
enum brw_pipeline pipeline,
const struct brw_tracked_state **atoms,
int num_atoms)
{
/* This is to work around brw_context::atoms being declared const. We want
* it to be const, but it needs to be initialized somehow!
*/
struct brw_tracked_state *context_atoms =
(struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
for (int i = 0; i < num_atoms; i++) {
context_atoms[i] = *atoms[i];
assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
assert(context_atoms[i].emit);
}
brw->num_atoms[pipeline] = num_atoms;
}
void brw_init_state( struct brw_context *brw )
{
struct gl_context *ctx = &brw->ctx;
const struct intel_device_info *devinfo = &brw->screen->devinfo;
/* Force the first brw_select_pipeline to emit pipeline select */
brw->last_pipeline = BRW_NUM_PIPELINES;
brw_init_caches(brw);
if (devinfo->ver >= 11)
gfx11_init_atoms(brw);
else if (devinfo->ver >= 10)
unreachable("Gfx10 support dropped.");
else if (devinfo->ver >= 9)
gfx9_init_atoms(brw);
else if (devinfo->ver >= 8)
gfx8_init_atoms(brw);
else if (devinfo->verx10 >= 75)
gfx75_init_atoms(brw);
else if (devinfo->ver >= 7)
gfx7_init_atoms(brw);
else if (devinfo->ver >= 6)
gfx6_init_atoms(brw);
else if (devinfo->ver >= 5)
gfx5_init_atoms(brw);
else if (devinfo->verx10 >= 45)
gfx45_init_atoms(brw);
else
gfx4_init_atoms(brw);
brw_upload_initial_gpu_state(brw);
brw->NewGLState = ~0;
brw->ctx.NewDriverState = ~0ull;
/* ~0 is a nonsensical value which won't match anything we program, so
* the programming will take effect on the first time around.
*/
brw->pma_stall_bits = ~0;
/* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
* dirty flags.
*/
STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
ctx->DriverFlags.NewTessState = BRW_NEW_DEFAULT_TESS_LEVELS;
ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
}
void brw_destroy_state( struct brw_context *brw )
{
brw_destroy_caches(brw);
}
/***********************************************************************
*/
static bool
check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
{
return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
}
static void
accumulate_state(struct brw_state_flags *a, const struct brw_state_flags *b)
{
a->mesa |= b->mesa;
a->brw |= b->brw;
}
static void
xor_states(struct brw_state_flags *result,
const struct brw_state_flags *a,
const struct brw_state_flags *b)
{
result->mesa = a->mesa ^ b->mesa;
result->brw = a->brw ^ b->brw;
}
struct dirty_bit_map {
uint64_t bit;
char *name;
uint32_t count;
};
#define DEFINE_BIT(name) {name, #name, 0}
static struct dirty_bit_map mesa_bits[] = {
DEFINE_BIT(_NEW_MODELVIEW),
DEFINE_BIT(_NEW_PROJECTION),
DEFINE_BIT(_NEW_TEXTURE_MATRIX),
DEFINE_BIT(_NEW_COLOR),
DEFINE_BIT(_NEW_DEPTH),
DEFINE_BIT(_NEW_FOG),
DEFINE_BIT(_NEW_HINT),
DEFINE_BIT(_NEW_LIGHT),
DEFINE_BIT(_NEW_LINE),
DEFINE_BIT(_NEW_PIXEL),
DEFINE_BIT(_NEW_POINT),
DEFINE_BIT(_NEW_POLYGON),
DEFINE_BIT(_NEW_POLYGONSTIPPLE),
DEFINE_BIT(_NEW_SCISSOR),
DEFINE_BIT(_NEW_STENCIL),
DEFINE_BIT(_NEW_TEXTURE_OBJECT),
DEFINE_BIT(_NEW_TRANSFORM),
DEFINE_BIT(_NEW_VIEWPORT),
DEFINE_BIT(_NEW_TEXTURE_STATE),
DEFINE_BIT(_NEW_RENDERMODE),
DEFINE_BIT(_NEW_BUFFERS),
DEFINE_BIT(_NEW_CURRENT_ATTRIB),
DEFINE_BIT(_NEW_MULTISAMPLE),
DEFINE_BIT(_NEW_TRACK_MATRIX),
DEFINE_BIT(_NEW_PROGRAM),
DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
DEFINE_BIT(_NEW_FRAG_CLAMP),
{0, 0, 0}
};
static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
DEFINE_BIT(BRW_NEW_URB_FENCE),
DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
DEFINE_BIT(BRW_NEW_PRIMITIVE),
DEFINE_BIT(BRW_NEW_CONTEXT),
DEFINE_BIT(BRW_NEW_PSP),
DEFINE_BIT(BRW_NEW_SURFACES),
DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
DEFINE_BIT(BRW_NEW_INDICES),
DEFINE_BIT(BRW_NEW_VERTICES),
DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
DEFINE_BIT(BRW_NEW_BATCH),
DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
DEFINE_BIT(BRW_NEW_STATS_WM),
DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
DEFINE_BIT(BRW_NEW_GFX4_UNIT_STATE),
DEFINE_BIT(BRW_NEW_CC_VP),
DEFINE_BIT(BRW_NEW_SF_VP),
DEFINE_BIT(BRW_NEW_CLIP_VP),
DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
DEFINE_BIT(BRW_NEW_URB_SIZE),
DEFINE_BIT(BRW_NEW_CC_STATE),
DEFINE_BIT(BRW_NEW_BLORP),
DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
DEFINE_BIT(BRW_NEW_DRAW_CALL),
DEFINE_BIT(BRW_NEW_AUX_STATE),
{0, 0, 0}
};
static void
brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
{
for (int i = 0; bit_map[i].bit != 0; i++) {
if (bit_map[i].bit & bits)
bit_map[i].count++;
}
}
static void
brw_print_dirty_count(struct dirty_bit_map *bit_map)
{
for (int i = 0; bit_map[i].bit != 0; i++) {
if (bit_map[i].count > 1) {
fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
bit_map[i].bit, bit_map[i].count, bit_map[i].name);
}
}
}
static inline void
brw_upload_tess_programs(struct brw_context *brw)
{
if (brw->programs[MESA_SHADER_TESS_EVAL]) {
brw_upload_tcs_prog(brw);
brw_upload_tes_prog(brw);
} else {
brw->tcs.base.prog_data = NULL;
brw->tes.base.prog_data = NULL;
}
}
static inline void
brw_upload_programs(struct brw_context *brw,
enum brw_pipeline pipeline)
{
struct gl_context *ctx = &brw->ctx;
const struct intel_device_info *devinfo = &brw->screen->devinfo;
if (pipeline == BRW_RENDER_PIPELINE) {
brw_upload_vs_prog(brw);
brw_upload_tess_programs(brw);
if (brw->programs[MESA_SHADER_GEOMETRY]) {
brw_upload_gs_prog(brw);
} else {
brw->gs.base.prog_data = NULL;
if (devinfo->ver < 7)
brw_upload_ff_gs_prog(brw);
}
/* Update the VUE map for data exiting the GS stage of the pipeline.
* This comes from the last enabled shader stage.
*/
GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
bool old_separate = brw->vue_map_geom_out.separate;
struct brw_vue_prog_data *vue_prog_data;
if (brw->programs[MESA_SHADER_GEOMETRY])
vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
else if (brw->programs[MESA_SHADER_TESS_EVAL])
vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
else
vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
brw->vue_map_geom_out = vue_prog_data->vue_map;
/* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
if (old_slots != brw->vue_map_geom_out.slots_valid ||
old_separate != brw->vue_map_geom_out.separate)
brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
VARYING_BIT_VIEWPORT) {
ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
brw->clip.viewport_count =
(brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
ctx->Const.MaxViewports : 1;
}
brw_upload_wm_prog(brw);
if (devinfo->ver < 6) {
brw_upload_clip_prog(brw);
brw_upload_sf_prog(brw);
}
brw_disk_cache_write_render_programs(brw);
} else if (pipeline == BRW_COMPUTE_PIPELINE) {
brw_upload_cs_prog(brw);
brw_disk_cache_write_compute_program(brw);
}
}
static inline void
merge_ctx_state(struct brw_context *brw,
struct brw_state_flags *state)
{
state->mesa |= brw->NewGLState;
state->brw |= brw->ctx.NewDriverState;
}
static ALWAYS_INLINE void
check_and_emit_atom(struct brw_context *brw,
struct brw_state_flags *state,
const struct brw_tracked_state *atom)
{
if (check_state(state, &atom->dirty)) {
atom->emit(brw);
merge_ctx_state(brw, state);
}
}
static inline void
brw_upload_pipeline_state(struct brw_context *brw,
enum brw_pipeline pipeline)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
int i;
static int dirty_count = 0;
struct brw_state_flags state = brw->state.pipelines[pipeline];
const unsigned fb_samples =
MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
brw_select_pipeline(brw, pipeline);
if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1)
brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1);
if (INTEL_DEBUG(DEBUG_REEMIT)) {
/* Always re-emit all state. */
brw->NewGLState = ~0;
ctx->NewDriverState = ~0ull;
}
if (pipeline == BRW_RENDER_PIPELINE) {
if (brw->programs[MESA_SHADER_FRAGMENT] !=
ctx->FragmentProgram._Current) {
brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
}
if (brw->programs[MESA_SHADER_TESS_EVAL] !=
ctx->TessEvalProgram._Current) {
brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
}
if (brw->programs[MESA_SHADER_TESS_CTRL] !=
ctx->TessCtrlProgram._Current) {
brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
}
if (brw->programs[MESA_SHADER_GEOMETRY] !=
ctx->GeometryProgram._Current) {
brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
}
if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
}
}
if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
}
if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
brw->meta_in_progress = _mesa_meta_in_progress(ctx);
brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
}
if (brw->num_samples != fb_samples) {
brw->num_samples = fb_samples;
brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
}
/* Exit early if no state is flagged as dirty */
merge_ctx_state(brw, &state);
if ((state.mesa | state.brw) == 0)
return;
/* Emit Sandybridge workaround flushes on every primitive, for safety. */
if (devinfo->ver == 6)
brw_emit_post_sync_nonzero_flush(brw);
brw_upload_programs(brw, pipeline);
merge_ctx_state(brw, &state);
brw->vtbl.emit_state_base_address(brw);
const struct brw_tracked_state *atoms =
brw_get_pipeline_atoms(brw, pipeline);
const int num_atoms = brw->num_atoms[pipeline];
if (INTEL_DEBUG(DEBUG_ANY)) {
/* Debug version which enforces various sanity checks on the
* state flags which are generated and checked to help ensure
* state atoms are ordered correctly in the list.
*/
struct brw_state_flags examined, prev;
memset(&examined, 0, sizeof(examined));
prev = state;
for (i = 0; i < num_atoms; i++) {
const struct brw_tracked_state *atom = &atoms[i];
struct brw_state_flags generated;
check_and_emit_atom(brw, &state, atom);
accumulate_state(&examined, &atom->dirty);
/* generated = (prev ^ state)
* if (examined & generated)
* fail;
*/
xor_states(&generated, &prev, &state);
assert(!check_state(&examined, &generated));
prev = state;
}
}
else {
for (i = 0; i < num_atoms; i++) {
const struct brw_tracked_state *atom = &atoms[i];
check_and_emit_atom(brw, &state, atom);
}
}
if (INTEL_DEBUG(DEBUG_STATE)) {
STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
brw_update_dirty_count(mesa_bits, state.mesa);
brw_update_dirty_count(brw_bits, state.brw);
if (dirty_count++ % 1000 == 0) {
brw_print_dirty_count(mesa_bits);
brw_print_dirty_count(brw_bits);
fprintf(stderr, "\n");
}
}
}
/***********************************************************************
* Emit all state:
*/
void brw_upload_render_state(struct brw_context *brw)
{
brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
}
static inline void
brw_pipeline_state_finished(struct brw_context *brw,
enum brw_pipeline pipeline)
{
/* Save all dirty state into the other pipelines */
for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
if (i != pipeline) {
brw->state.pipelines[i].mesa |= brw->NewGLState;
brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
} else {
memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
}
}
brw->NewGLState = 0;
brw->ctx.NewDriverState = 0ull;
}
/**
* Clear dirty bits to account for the fact that the state emitted by
* brw_upload_render_state() has been committed to the hardware. This is a
* separate call from brw_upload_render_state() because it's possible that
* after the call to brw_upload_render_state(), we will discover that we've
* run out of aperture space, and need to rewind the batch buffer to the state
* it had before the brw_upload_render_state() call.
*/
void
brw_render_state_finished(struct brw_context *brw)
{
brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
}
void
brw_upload_compute_state(struct brw_context *brw)
{
brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
}
void
brw_compute_state_finished(struct brw_context *brw)
{
brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
}

View file

@ -1,68 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#ifndef BRW_STRUCTS_H
#define BRW_STRUCTS_H
struct brw_urb_fence
{
struct
{
unsigned length:8;
unsigned vs_realloc:1;
unsigned gs_realloc:1;
unsigned clp_realloc:1;
unsigned sf_realloc:1;
unsigned vfe_realloc:1;
unsigned cs_realloc:1;
unsigned pad:2;
unsigned opcode:16;
} header;
struct
{
unsigned vs_fence:10;
unsigned gs_fence:10;
unsigned clp_fence:10;
unsigned pad:2;
} bits0;
struct
{
unsigned sf_fence:10;
unsigned vf_fence:10;
unsigned cs_fence:11;
unsigned pad:1;
} bits1;
};
#endif

View file

@ -1,558 +0,0 @@
/*
* Copyright © 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "main/mtypes.h"
#include "isl/isl.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
enum isl_format
brw_isl_format_for_mesa_format(mesa_format mesa_format)
{
/* This table is ordered according to the enum ordering in formats.h. We do
* expect that enum to be extended without our explicit initialization
* staying in sync, so we initialize to 0 even though
* ISL_FORMAT_R32G32B32A32_FLOAT happens to also be 0.
*/
static const enum isl_format table[MESA_FORMAT_COUNT] = {
[0 ... MESA_FORMAT_COUNT-1] = ISL_FORMAT_UNSUPPORTED,
[MESA_FORMAT_R8G8B8A8_UNORM] = ISL_FORMAT_R8G8B8A8_UNORM,
[MESA_FORMAT_B8G8R8A8_UNORM] = ISL_FORMAT_B8G8R8A8_UNORM,
[MESA_FORMAT_R8G8B8X8_UNORM] = ISL_FORMAT_R8G8B8X8_UNORM,
[MESA_FORMAT_B8G8R8X8_UNORM] = ISL_FORMAT_B8G8R8X8_UNORM,
[MESA_FORMAT_RGB_UNORM8] = ISL_FORMAT_R8G8B8_UNORM,
[MESA_FORMAT_B5G6R5_UNORM] = ISL_FORMAT_B5G6R5_UNORM,
[MESA_FORMAT_B4G4R4A4_UNORM] = ISL_FORMAT_B4G4R4A4_UNORM,
[MESA_FORMAT_B5G5R5A1_UNORM] = ISL_FORMAT_B5G5R5A1_UNORM,
[MESA_FORMAT_LA_UNORM8] = ISL_FORMAT_L8A8_UNORM,
[MESA_FORMAT_LA_UNORM16] = ISL_FORMAT_L16A16_UNORM,
[MESA_FORMAT_A_UNORM8] = ISL_FORMAT_A8_UNORM,
[MESA_FORMAT_A_UNORM16] = ISL_FORMAT_A16_UNORM,
[MESA_FORMAT_L_UNORM8] = ISL_FORMAT_L8_UNORM,
[MESA_FORMAT_L_UNORM16] = ISL_FORMAT_L16_UNORM,
[MESA_FORMAT_I_UNORM8] = ISL_FORMAT_I8_UNORM,
[MESA_FORMAT_I_UNORM16] = ISL_FORMAT_I16_UNORM,
[MESA_FORMAT_YCBCR_REV] = ISL_FORMAT_YCRCB_NORMAL,
[MESA_FORMAT_YCBCR] = ISL_FORMAT_YCRCB_SWAPUVY,
[MESA_FORMAT_R_UNORM8] = ISL_FORMAT_R8_UNORM,
[MESA_FORMAT_RG_UNORM8] = ISL_FORMAT_R8G8_UNORM,
[MESA_FORMAT_R_UNORM16] = ISL_FORMAT_R16_UNORM,
[MESA_FORMAT_RG_UNORM16] = ISL_FORMAT_R16G16_UNORM,
[MESA_FORMAT_B10G10R10A2_UNORM] = ISL_FORMAT_B10G10R10A2_UNORM,
[MESA_FORMAT_S_UINT8] = ISL_FORMAT_R8_UINT,
[MESA_FORMAT_B8G8R8A8_SRGB] = ISL_FORMAT_B8G8R8A8_UNORM_SRGB,
[MESA_FORMAT_R8G8B8A8_SRGB] = ISL_FORMAT_R8G8B8A8_UNORM_SRGB,
[MESA_FORMAT_B8G8R8X8_SRGB] = ISL_FORMAT_B8G8R8X8_UNORM_SRGB,
[MESA_FORMAT_R_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB,
[MESA_FORMAT_L_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB,
[MESA_FORMAT_LA_SRGB8] = ISL_FORMAT_L8A8_UNORM_SRGB,
[MESA_FORMAT_SRGB_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB,
[MESA_FORMAT_SRGBA_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB,
[MESA_FORMAT_SRGBA_DXT3] = ISL_FORMAT_BC2_UNORM_SRGB,
[MESA_FORMAT_SRGBA_DXT5] = ISL_FORMAT_BC3_UNORM_SRGB,
[MESA_FORMAT_RGB_FXT1] = ISL_FORMAT_FXT1,
[MESA_FORMAT_RGBA_FXT1] = ISL_FORMAT_FXT1,
[MESA_FORMAT_RGB_DXT1] = ISL_FORMAT_BC1_UNORM,
[MESA_FORMAT_RGBA_DXT1] = ISL_FORMAT_BC1_UNORM,
[MESA_FORMAT_RGBA_DXT3] = ISL_FORMAT_BC2_UNORM,
[MESA_FORMAT_RGBA_DXT5] = ISL_FORMAT_BC3_UNORM,
[MESA_FORMAT_RGBA_FLOAT32] = ISL_FORMAT_R32G32B32A32_FLOAT,
[MESA_FORMAT_RGBA_FLOAT16] = ISL_FORMAT_R16G16B16A16_FLOAT,
[MESA_FORMAT_RGB_FLOAT32] = ISL_FORMAT_R32G32B32_FLOAT,
[MESA_FORMAT_A_FLOAT32] = ISL_FORMAT_A32_FLOAT,
[MESA_FORMAT_A_FLOAT16] = ISL_FORMAT_A16_FLOAT,
[MESA_FORMAT_L_FLOAT32] = ISL_FORMAT_L32_FLOAT,
[MESA_FORMAT_L_FLOAT16] = ISL_FORMAT_L16_FLOAT,
[MESA_FORMAT_LA_FLOAT32] = ISL_FORMAT_L32A32_FLOAT,
[MESA_FORMAT_LA_FLOAT16] = ISL_FORMAT_L16A16_FLOAT,
[MESA_FORMAT_I_FLOAT32] = ISL_FORMAT_I32_FLOAT,
[MESA_FORMAT_I_FLOAT16] = ISL_FORMAT_I16_FLOAT,
[MESA_FORMAT_R_FLOAT32] = ISL_FORMAT_R32_FLOAT,
[MESA_FORMAT_R_FLOAT16] = ISL_FORMAT_R16_FLOAT,
[MESA_FORMAT_RG_FLOAT32] = ISL_FORMAT_R32G32_FLOAT,
[MESA_FORMAT_RG_FLOAT16] = ISL_FORMAT_R16G16_FLOAT,
[MESA_FORMAT_R_SINT8] = ISL_FORMAT_R8_SINT,
[MESA_FORMAT_RG_SINT8] = ISL_FORMAT_R8G8_SINT,
[MESA_FORMAT_RGB_SINT8] = ISL_FORMAT_R8G8B8_SINT,
[MESA_FORMAT_RGBA_SINT8] = ISL_FORMAT_R8G8B8A8_SINT,
[MESA_FORMAT_R_SINT16] = ISL_FORMAT_R16_SINT,
[MESA_FORMAT_RG_SINT16] = ISL_FORMAT_R16G16_SINT,
[MESA_FORMAT_RGB_SINT16] = ISL_FORMAT_R16G16B16_SINT,
[MESA_FORMAT_RGBA_SINT16] = ISL_FORMAT_R16G16B16A16_SINT,
[MESA_FORMAT_R_SINT32] = ISL_FORMAT_R32_SINT,
[MESA_FORMAT_RG_SINT32] = ISL_FORMAT_R32G32_SINT,
[MESA_FORMAT_RGB_SINT32] = ISL_FORMAT_R32G32B32_SINT,
[MESA_FORMAT_RGBA_SINT32] = ISL_FORMAT_R32G32B32A32_SINT,
[MESA_FORMAT_R_UINT8] = ISL_FORMAT_R8_UINT,
[MESA_FORMAT_RG_UINT8] = ISL_FORMAT_R8G8_UINT,
[MESA_FORMAT_RGB_UINT8] = ISL_FORMAT_R8G8B8_UINT,
[MESA_FORMAT_RGBA_UINT8] = ISL_FORMAT_R8G8B8A8_UINT,
[MESA_FORMAT_R_UINT16] = ISL_FORMAT_R16_UINT,
[MESA_FORMAT_RG_UINT16] = ISL_FORMAT_R16G16_UINT,
[MESA_FORMAT_RGB_UINT16] = ISL_FORMAT_R16G16B16_UINT,
[MESA_FORMAT_RGBA_UINT16] = ISL_FORMAT_R16G16B16A16_UINT,
[MESA_FORMAT_R_UINT32] = ISL_FORMAT_R32_UINT,
[MESA_FORMAT_RG_UINT32] = ISL_FORMAT_R32G32_UINT,
[MESA_FORMAT_RGB_UINT32] = ISL_FORMAT_R32G32B32_UINT,
[MESA_FORMAT_RGBA_UINT32] = ISL_FORMAT_R32G32B32A32_UINT,
[MESA_FORMAT_R_SNORM8] = ISL_FORMAT_R8_SNORM,
[MESA_FORMAT_RG_SNORM8] = ISL_FORMAT_R8G8_SNORM,
[MESA_FORMAT_R8G8B8A8_SNORM] = ISL_FORMAT_R8G8B8A8_SNORM,
[MESA_FORMAT_R_SNORM16] = ISL_FORMAT_R16_SNORM,
[MESA_FORMAT_RG_SNORM16] = ISL_FORMAT_R16G16_SNORM,
[MESA_FORMAT_RGB_SNORM16] = ISL_FORMAT_R16G16B16_SNORM,
[MESA_FORMAT_RGBA_SNORM16] = ISL_FORMAT_R16G16B16A16_SNORM,
[MESA_FORMAT_RGBA_UNORM16] = ISL_FORMAT_R16G16B16A16_UNORM,
[MESA_FORMAT_R_RGTC1_UNORM] = ISL_FORMAT_BC4_UNORM,
[MESA_FORMAT_R_RGTC1_SNORM] = ISL_FORMAT_BC4_SNORM,
[MESA_FORMAT_RG_RGTC2_UNORM] = ISL_FORMAT_BC5_UNORM,
[MESA_FORMAT_RG_RGTC2_SNORM] = ISL_FORMAT_BC5_SNORM,
[MESA_FORMAT_ETC1_RGB8] = ISL_FORMAT_ETC1_RGB8,
[MESA_FORMAT_ETC2_RGB8] = ISL_FORMAT_ETC2_RGB8,
[MESA_FORMAT_ETC2_SRGB8] = ISL_FORMAT_ETC2_SRGB8,
[MESA_FORMAT_ETC2_RGBA8_EAC] = ISL_FORMAT_ETC2_EAC_RGBA8,
[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = ISL_FORMAT_ETC2_EAC_SRGB8_A8,
[MESA_FORMAT_ETC2_R11_EAC] = ISL_FORMAT_EAC_R11,
[MESA_FORMAT_ETC2_RG11_EAC] = ISL_FORMAT_EAC_RG11,
[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = ISL_FORMAT_EAC_SIGNED_R11,
[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = ISL_FORMAT_EAC_SIGNED_RG11,
[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = ISL_FORMAT_ETC2_RGB8_PTA,
[MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = ISL_FORMAT_ETC2_SRGB8_PTA,
[MESA_FORMAT_BPTC_RGBA_UNORM] = ISL_FORMAT_BC7_UNORM,
[MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM] = ISL_FORMAT_BC7_UNORM_SRGB,
[MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT] = ISL_FORMAT_BC6H_SF16,
[MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT] = ISL_FORMAT_BC6H_UF16,
[MESA_FORMAT_RGBA_ASTC_4x4] = ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16,
[MESA_FORMAT_RGBA_ASTC_5x4] = ISL_FORMAT_ASTC_LDR_2D_5X4_FLT16,
[MESA_FORMAT_RGBA_ASTC_5x5] = ISL_FORMAT_ASTC_LDR_2D_5X5_FLT16,
[MESA_FORMAT_RGBA_ASTC_6x5] = ISL_FORMAT_ASTC_LDR_2D_6X5_FLT16,
[MESA_FORMAT_RGBA_ASTC_6x6] = ISL_FORMAT_ASTC_LDR_2D_6X6_FLT16,
[MESA_FORMAT_RGBA_ASTC_8x5] = ISL_FORMAT_ASTC_LDR_2D_8X5_FLT16,
[MESA_FORMAT_RGBA_ASTC_8x6] = ISL_FORMAT_ASTC_LDR_2D_8X6_FLT16,
[MESA_FORMAT_RGBA_ASTC_8x8] = ISL_FORMAT_ASTC_LDR_2D_8X8_FLT16,
[MESA_FORMAT_RGBA_ASTC_10x5] = ISL_FORMAT_ASTC_LDR_2D_10X5_FLT16,
[MESA_FORMAT_RGBA_ASTC_10x6] = ISL_FORMAT_ASTC_LDR_2D_10X6_FLT16,
[MESA_FORMAT_RGBA_ASTC_10x8] = ISL_FORMAT_ASTC_LDR_2D_10X8_FLT16,
[MESA_FORMAT_RGBA_ASTC_10x10] = ISL_FORMAT_ASTC_LDR_2D_10X10_FLT16,
[MESA_FORMAT_RGBA_ASTC_12x10] = ISL_FORMAT_ASTC_LDR_2D_12X10_FLT16,
[MESA_FORMAT_RGBA_ASTC_12x12] = ISL_FORMAT_ASTC_LDR_2D_12X12_FLT16,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_4x4] = ISL_FORMAT_ASTC_LDR_2D_4X4_U8SRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x4] = ISL_FORMAT_ASTC_LDR_2D_5X4_U8SRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5] = ISL_FORMAT_ASTC_LDR_2D_5X5_U8SRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x5] = ISL_FORMAT_ASTC_LDR_2D_6X5_U8SRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_6x6] = ISL_FORMAT_ASTC_LDR_2D_6X6_U8SRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x5] = ISL_FORMAT_ASTC_LDR_2D_8X5_U8SRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x6] = ISL_FORMAT_ASTC_LDR_2D_8X6_U8SRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_8x8] = ISL_FORMAT_ASTC_LDR_2D_8X8_U8SRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x5] = ISL_FORMAT_ASTC_LDR_2D_10X5_U8SRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x6] = ISL_FORMAT_ASTC_LDR_2D_10X6_U8SRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x8] = ISL_FORMAT_ASTC_LDR_2D_10X8_U8SRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_10x10] = ISL_FORMAT_ASTC_LDR_2D_10X10_U8SRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x10] = ISL_FORMAT_ASTC_LDR_2D_12X10_U8SRGB,
[MESA_FORMAT_SRGB8_ALPHA8_ASTC_12x12] = ISL_FORMAT_ASTC_LDR_2D_12X12_U8SRGB,
[MESA_FORMAT_R9G9B9E5_FLOAT] = ISL_FORMAT_R9G9B9E5_SHAREDEXP,
[MESA_FORMAT_R11G11B10_FLOAT] = ISL_FORMAT_R11G11B10_FLOAT,
[MESA_FORMAT_R10G10B10A2_UNORM] = ISL_FORMAT_R10G10B10A2_UNORM,
[MESA_FORMAT_B10G10R10A2_UINT] = ISL_FORMAT_B10G10R10A2_UINT,
[MESA_FORMAT_R10G10B10A2_UINT] = ISL_FORMAT_R10G10B10A2_UINT,
[MESA_FORMAT_B5G5R5X1_UNORM] = ISL_FORMAT_B5G5R5X1_UNORM,
[MESA_FORMAT_R8G8B8X8_SRGB] = ISL_FORMAT_R8G8B8X8_UNORM_SRGB,
[MESA_FORMAT_B10G10R10X2_UNORM] = ISL_FORMAT_B10G10R10X2_UNORM,
[MESA_FORMAT_RGBX_UNORM16] = ISL_FORMAT_R16G16B16X16_UNORM,
[MESA_FORMAT_RGBX_FLOAT16] = ISL_FORMAT_R16G16B16X16_FLOAT,
[MESA_FORMAT_RGBX_FLOAT32] = ISL_FORMAT_R32G32B32X32_FLOAT,
};
assert(mesa_format < MESA_FORMAT_COUNT);
return table[mesa_format];
}
void
brw_screen_init_surface_formats(struct brw_screen *screen)
{
const struct intel_device_info *devinfo = &screen->devinfo;
mesa_format format;
memset(&screen->mesa_format_supports_texture, 0,
sizeof(screen->mesa_format_supports_texture));
for (format = MESA_FORMAT_NONE + 1; format < MESA_FORMAT_COUNT; format++) {
if (!_mesa_get_format_name(format))
continue;
enum isl_format texture, render;
bool is_integer = _mesa_is_format_integer_color(format);
render = texture = brw_isl_format_for_mesa_format(format);
/* Only exposed with EXT_memory_object_* support which
* is not for older gens.
*/
if (devinfo->ver < 7 && format == MESA_FORMAT_Z_UNORM16)
continue;
if (texture == ISL_FORMAT_UNSUPPORTED)
continue;
/* Don't advertise 8 and 16-bit RGB formats to core mesa. This ensures
* that they are renderable from an API perspective since core mesa will
* fall back to RGBA or RGBX (we can't render to non-power-of-two
* formats). For 8-bit, formats, this also keeps us from hitting some
* nasty corners in brw_miptree_map_blit if you ever try to map one.
*/
int format_size = _mesa_get_format_bytes(format);
if (format_size == 3 || format_size == 6)
continue;
if (isl_format_supports_sampling(devinfo, texture) &&
(isl_format_supports_filtering(devinfo, texture) || is_integer))
screen->mesa_format_supports_texture[format] = true;
/* Re-map some render target formats to make them supported when they
* wouldn't be using their format for texturing.
*/
switch (render) {
/* For these formats, we just need to read/write the first
* channel into R, which is to say that we just treat them as
* GL_RED.
*/
case ISL_FORMAT_I32_FLOAT:
case ISL_FORMAT_L32_FLOAT:
render = ISL_FORMAT_R32_FLOAT;
break;
case ISL_FORMAT_I16_FLOAT:
case ISL_FORMAT_L16_FLOAT:
render = ISL_FORMAT_R16_FLOAT;
break;
case ISL_FORMAT_I8_UNORM:
case ISL_FORMAT_L8_UNORM:
render = ISL_FORMAT_R8_UNORM;
break;
case ISL_FORMAT_I16_UNORM:
case ISL_FORMAT_L16_UNORM:
render = ISL_FORMAT_R16_UNORM;
break;
case ISL_FORMAT_R16G16B16X16_UNORM:
render = ISL_FORMAT_R16G16B16A16_UNORM;
break;
case ISL_FORMAT_R16G16B16X16_FLOAT:
render = ISL_FORMAT_R16G16B16A16_FLOAT;
break;
case ISL_FORMAT_B8G8R8X8_UNORM:
/* XRGB is handled as ARGB because the chips in this family
* cannot render to XRGB targets. This means that we have to
* mask writes to alpha (ala glColorMask) and reconfigure the
* alpha blending hardware to use GL_ONE (or GL_ZERO) for
* cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is
* used. On Gfx8+ BGRX is actually allowed (but not RGBX).
*/
if (!isl_format_supports_rendering(devinfo, texture))
render = ISL_FORMAT_B8G8R8A8_UNORM;
break;
case ISL_FORMAT_B8G8R8X8_UNORM_SRGB:
if (!isl_format_supports_rendering(devinfo, texture))
render = ISL_FORMAT_B8G8R8A8_UNORM_SRGB;
break;
case ISL_FORMAT_R8G8B8X8_UNORM:
render = ISL_FORMAT_R8G8B8A8_UNORM;
break;
case ISL_FORMAT_R8G8B8X8_UNORM_SRGB:
render = ISL_FORMAT_R8G8B8A8_UNORM_SRGB;
break;
default:
break;
}
/* Note that GL_EXT_texture_integer says that blending doesn't occur for
* integer, so we don't need hardware support for blending on it. Other
* than that, GL in general requires alpha blending for render targets,
* even though we don't support it for some formats.
*/
if (isl_format_supports_rendering(devinfo, render) &&
(isl_format_supports_alpha_blending(devinfo, render) || is_integer)) {
screen->mesa_to_isl_render_format[format] = render;
screen->mesa_format_supports_render[format] = true;
}
}
/* We will check this table for FBO completeness, but the surface format
* table above only covered color rendering.
*/
screen->mesa_format_supports_render[MESA_FORMAT_Z24_UNORM_S8_UINT] = true;
screen->mesa_format_supports_render[MESA_FORMAT_Z24_UNORM_X8_UINT] = true;
screen->mesa_format_supports_render[MESA_FORMAT_S_UINT8] = true;
screen->mesa_format_supports_render[MESA_FORMAT_Z_FLOAT32] = true;
screen->mesa_format_supports_render[MESA_FORMAT_Z32_FLOAT_S8X24_UINT] = true;
if (devinfo->ver >= 8)
screen->mesa_format_supports_render[MESA_FORMAT_Z_UNORM16] = true;
/* We remap depth formats to a supported texturing format in
* translate_tex_format().
*/
screen->mesa_format_supports_texture[MESA_FORMAT_Z24_UNORM_S8_UINT] = true;
screen->mesa_format_supports_texture[MESA_FORMAT_Z24_UNORM_X8_UINT] = true;
screen->mesa_format_supports_texture[MESA_FORMAT_Z_FLOAT32] = true;
screen->mesa_format_supports_texture[MESA_FORMAT_Z32_FLOAT_S8X24_UINT] = true;
screen->mesa_format_supports_texture[MESA_FORMAT_S_UINT8] = true;
/* Benchmarking shows that Z16 is slower than Z24, so there's no reason to
* use it unless you're under memory (not memory bandwidth) pressure.
*
* Apparently, the GPU's depth scoreboarding works on a 32-bit granularity,
* which corresponds to one pixel in the depth buffer for Z24 or Z32 formats.
* However, it corresponds to two pixels with Z16, which means both need to
* hit the early depth case in order for it to happen.
*
* Other speculation is that we may be hitting increased fragment shader
* execution from GL_LEQUAL/GL_EQUAL depth tests at reduced precision.
*
* With the PMA stall workaround in place, Z16 is faster than Z24, as it
* should be.
*/
if (devinfo->ver >= 8)
screen->mesa_format_supports_texture[MESA_FORMAT_Z_UNORM16] = true;
/* The RGBX formats are not renderable. Normally these get mapped
* internally to RGBA formats when rendering. However on Gfx9+ when this
* internal override is used fast clears don't work so they are disabled in
* brw_meta_fast_clear. To avoid this problem we can just pretend not to
* support RGBX formats at all. This will cause the upper layers of Mesa to
* pick the RGBA formats instead. This works fine because when it is used
* as a texture source the swizzle state is programmed to force the alpha
* channel to 1.0 anyway. We could also do this for all gens except that
* it's a bit more difficult when the hardware doesn't support texture
* swizzling. Gens using the blorp have further problems because that
* doesn't implement this swizzle override. We don't need to do this for
* BGRX because that actually is supported natively on Gfx8+.
*/
if (devinfo->ver >= 9) {
static const mesa_format rgbx_formats[] = {
MESA_FORMAT_R8G8B8X8_UNORM,
MESA_FORMAT_R8G8B8X8_SRGB,
MESA_FORMAT_RGBX_UNORM16,
MESA_FORMAT_RGBX_FLOAT16,
MESA_FORMAT_RGBX_FLOAT32
};
for (int i = 0; i < ARRAY_SIZE(rgbx_formats); i++) {
screen->mesa_format_supports_texture[rgbx_formats[i]] = false;
screen->mesa_format_supports_render[rgbx_formats[i]] = false;
}
}
/* On hardware that lacks support for ETC1, we map ETC1 to RGBX
* during glCompressedTexImage2D(). See brw_mipmap_tree::wraps_etc1.
*/
screen->mesa_format_supports_texture[MESA_FORMAT_ETC1_RGB8] = true;
/* On hardware that lacks support for ETC2, we map ETC2 to a suitable
* MESA_FORMAT during glCompressedTexImage2D().
* See brw_mipmap_tree::wraps_etc2.
*/
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGB8] = true;
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8] = true;
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGBA8_EAC] = true;
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = true;
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_R11_EAC] = true;
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RG11_EAC] = true;
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = true;
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = true;
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = true;
screen->mesa_format_supports_texture[MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = true;
}
void
brw_init_surface_formats(struct brw_context *brw)
{
struct brw_screen *screen = brw->screen;
struct gl_context *ctx = &brw->ctx;
brw->mesa_format_supports_render = screen->mesa_format_supports_render;
brw->mesa_to_isl_render_format = screen->mesa_to_isl_render_format;
STATIC_ASSERT(ARRAY_SIZE(ctx->TextureFormatSupported) ==
ARRAY_SIZE(screen->mesa_format_supports_texture));
for (unsigned i = 0; i < ARRAY_SIZE(ctx->TextureFormatSupported); ++i) {
ctx->TextureFormatSupported[i] = screen->mesa_format_supports_texture[i];
}
}
bool
brw_render_target_supported(struct brw_context *brw,
struct gl_renderbuffer *rb)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
mesa_format format = rb->Format;
/* Many integer formats are promoted to RGBA (like XRGB8888 is), which means
* we would consider them renderable even though we don't have surface
* support for their alpha behavior and don't have the blending unit
* available to fake it like we do for XRGB8888. Force them to being
* unsupported.
*/
if (_mesa_is_format_integer_color(format) &&
rb->_BaseFormat != GL_RGBA &&
rb->_BaseFormat != GL_RG &&
rb->_BaseFormat != GL_RED)
return false;
/* Under some conditions, MSAA is not supported for formats whose width is
* more than 64 bits.
*/
if (devinfo->ver < 8 &&
rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) {
/* Gfx6: MSAA on >64 bit formats is unsupported. */
if (devinfo->ver <= 6)
return false;
/* Gfx7: 8x MSAA on >64 bit formats is unsupported. */
if (rb->NumSamples >= 8)
return false;
}
return brw->mesa_format_supports_render[format];
}
enum isl_format
translate_tex_format(struct brw_context *brw,
mesa_format mesa_format,
GLenum srgb_decode)
{
struct gl_context *ctx = &brw->ctx;
if (srgb_decode == GL_SKIP_DECODE_EXT)
mesa_format = _mesa_get_srgb_format_linear(mesa_format);
switch( mesa_format ) {
case MESA_FORMAT_Z_UNORM16:
return ISL_FORMAT_R16_UNORM;
case MESA_FORMAT_Z24_UNORM_S8_UINT:
case MESA_FORMAT_Z24_UNORM_X8_UINT:
return ISL_FORMAT_R24_UNORM_X8_TYPELESS;
case MESA_FORMAT_Z_FLOAT32:
return ISL_FORMAT_R32_FLOAT;
case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
return ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS;
case MESA_FORMAT_RGBA_FLOAT32:
/* The value of this ISL surface format is 0, which tricks the
* assertion below.
*/
return ISL_FORMAT_R32G32B32A32_FLOAT;
case MESA_FORMAT_RGBA_ASTC_4x4:
case MESA_FORMAT_RGBA_ASTC_5x4:
case MESA_FORMAT_RGBA_ASTC_5x5:
case MESA_FORMAT_RGBA_ASTC_6x5:
case MESA_FORMAT_RGBA_ASTC_6x6:
case MESA_FORMAT_RGBA_ASTC_8x5:
case MESA_FORMAT_RGBA_ASTC_8x6:
case MESA_FORMAT_RGBA_ASTC_8x8:
case MESA_FORMAT_RGBA_ASTC_10x5:
case MESA_FORMAT_RGBA_ASTC_10x6:
case MESA_FORMAT_RGBA_ASTC_10x8:
case MESA_FORMAT_RGBA_ASTC_10x10:
case MESA_FORMAT_RGBA_ASTC_12x10:
case MESA_FORMAT_RGBA_ASTC_12x12: {
enum isl_format isl_fmt =
brw_isl_format_for_mesa_format(mesa_format);
/**
* It is possible to process these formats using the LDR Profile
* or the Full Profile mode of the hardware. Because, it isn't
* possible to determine if an HDR or LDR texture is being rendered, we
* can't determine which mode to enable in the hardware. Therefore, to
* handle all cases, always default to Full profile unless we are
* processing sRGBs, which are incompatible with this mode.
*/
if (ctx->Extensions.KHR_texture_compression_astc_hdr)
isl_fmt |= GFX9_SURFACE_ASTC_HDR_FORMAT_BIT;
return isl_fmt;
}
default:
return brw_isl_format_for_mesa_format(mesa_format);
}
}
/**
* Convert a MESA_FORMAT to the corresponding BRW_DEPTHFORMAT enum.
*/
uint32_t
brw_depth_format(struct brw_context *brw, mesa_format format)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
switch (format) {
case MESA_FORMAT_Z_UNORM16:
return BRW_DEPTHFORMAT_D16_UNORM;
case MESA_FORMAT_Z_FLOAT32:
return BRW_DEPTHFORMAT_D32_FLOAT;
case MESA_FORMAT_Z24_UNORM_X8_UINT:
if (devinfo->ver >= 6) {
return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
} else {
/* Use D24_UNORM_S8, not D24_UNORM_X8.
*
* D24_UNORM_X8 was not introduced until Gfx5. (See the Ironlake PRM,
* Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
* 3DSTATE_DEPTH_BUFFER.Surface_Format).
*
* However, on Gfx5, D24_UNORM_X8 may be used only if separate
* stencil is enabled, and we never enable it. From the Ironlake PRM,
* same section as above, 3DSTATE_DEPTH_BUFFER's
* "Separate Stencil Buffer Enable" bit:
*
* "If this field is disabled, the Surface Format of the depth
* buffer cannot be D24_UNORM_X8_UINT."
*/
return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
}
case MESA_FORMAT_Z24_UNORM_S8_UINT:
return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
default:
unreachable("Unexpected depth format.");
}
}

View file

@ -1,642 +0,0 @@
/*
* Copyright © 2008 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
*
*/
/**
* \file
* \brief Support for GL_ARB_sync and EGL_KHR_fence_sync.
*
* GL_ARB_sync is implemented by flushing the current batchbuffer and keeping a
* reference on it. We can then check for completion or wait for completion
* using the normal buffer object mechanisms. This does mean that if an
* application is using many sync objects, it will emit small batchbuffers
* which may end up being a significant overhead. In other tests of removing
* gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant
* performance bottleneck, though.
*/
#include <libsync.h> /* Requires Android or libdrm-2.4.72 */
#include "util/os_file.h"
#include "util/u_memory.h"
#include <xf86drm.h>
#include "brw_context.h"
#include "brw_batch.h"
#include "mesa/main/externalobjects.h"
struct brw_fence {
struct brw_context *brw;
enum brw_fence_type {
/** The fence waits for completion of brw_fence::batch_bo. */
BRW_FENCE_TYPE_BO_WAIT,
/** The fence waits for brw_fence::sync_fd to signal. */
BRW_FENCE_TYPE_SYNC_FD,
} type;
union {
struct brw_bo *batch_bo;
/* This struct owns the fd. */
int sync_fd;
};
mtx_t mutex;
bool signalled;
};
struct brw_gl_sync {
struct gl_sync_object gl;
struct brw_fence fence;
};
struct intel_semaphore_object {
struct gl_semaphore_object Base;
struct drm_syncobj_handle *syncobj;
};
static inline struct intel_semaphore_object *
intel_semaphore_object(struct gl_semaphore_object *sem_obj) {
return (struct intel_semaphore_object*) sem_obj;
}
static struct gl_semaphore_object *
intel_semaphoreobj_alloc(struct gl_context *ctx, GLuint name)
{
struct intel_semaphore_object *is_obj = CALLOC_STRUCT(intel_semaphore_object);
if (!is_obj)
return NULL;
_mesa_initialize_semaphore_object(ctx, &is_obj->Base, name);
return &is_obj->Base;
}
static void
intel_semaphoreobj_free(struct gl_context *ctx,
struct gl_semaphore_object *semObj)
{
_mesa_delete_semaphore_object(ctx, semObj);
}
static void
intel_semaphoreobj_import(struct gl_context *ctx,
struct gl_semaphore_object *semObj,
int fd)
{
struct brw_context *brw = brw_context(ctx);
struct brw_screen *screen = brw->screen;
struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
iSemObj->syncobj = CALLOC_STRUCT(drm_syncobj_handle);
iSemObj->syncobj->fd = fd;
if (drmIoctl(screen->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, iSemObj->syncobj) < 0) {
fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE failed: %s\n",
strerror(errno));
free(iSemObj->syncobj);
}
}
static void
intel_semaphoreobj_signal(struct gl_context *ctx,
struct gl_semaphore_object *semObj,
GLuint numBufferBarriers,
struct gl_buffer_object **bufObjs,
GLuint numTextureBarriers,
struct gl_texture_object **texObjs,
const GLenum *dstLayouts)
{
struct brw_context *brw = brw_context(ctx);
struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
struct drm_i915_gem_exec_fence *fence =
util_dynarray_grow(&brw->batch.exec_fences, struct drm_i915_gem_exec_fence *, 1);
fence->flags = I915_EXEC_FENCE_SIGNAL;
fence->handle = iSemObj->syncobj->handle;
brw->batch.contains_fence_signal = true;
}
static void
intel_semaphoreobj_wait(struct gl_context *ctx,
struct gl_semaphore_object *semObj,
GLuint numBufferBarriers,
struct gl_buffer_object **bufObjs,
GLuint numTextureBarriers,
struct gl_texture_object **texObjs,
const GLenum *srcLayouts)
{
struct brw_context *brw = brw_context(ctx);
struct brw_screen *screen = brw->screen;
struct intel_semaphore_object *iSemObj = intel_semaphore_object(semObj);
struct drm_syncobj_wait args = {
.handles = (uintptr_t)&iSemObj->syncobj->handle,
.count_handles = 1,
};
drmIoctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
}
static void
brw_fence_init(struct brw_context *brw, struct brw_fence *fence,
enum brw_fence_type type)
{
fence->brw = brw;
fence->type = type;
mtx_init(&fence->mutex, mtx_plain);
switch (type) {
case BRW_FENCE_TYPE_BO_WAIT:
fence->batch_bo = NULL;
break;
case BRW_FENCE_TYPE_SYNC_FD:
fence->sync_fd = -1;
break;
}
}
static void
brw_fence_finish(struct brw_fence *fence)
{
switch (fence->type) {
case BRW_FENCE_TYPE_BO_WAIT:
if (fence->batch_bo)
brw_bo_unreference(fence->batch_bo);
break;
case BRW_FENCE_TYPE_SYNC_FD:
if (fence->sync_fd != -1)
close(fence->sync_fd);
break;
}
mtx_destroy(&fence->mutex);
}
static bool MUST_CHECK
brw_fence_insert_locked(struct brw_context *brw, struct brw_fence *fence)
{
__DRIcontext *driContext = brw->driContext;
__DRIdrawable *driDrawable = driContext->driDrawablePriv;
/*
* From KHR_fence_sync:
*
* When the condition of the sync object is satisfied by the fence
* command, the sync is signaled by the associated client API context,
* causing any eglClientWaitSyncKHR commands (see below) blocking on
* <sync> to unblock. The only condition currently supported is
* EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR, which is satisfied by
* completion of the fence command corresponding to the sync object,
* and all preceding commands in the associated client API context's
* command stream. The sync object will not be signaled until all
* effects from these commands on the client API's internal and
* framebuffer state are fully realized. No other state is affected by
* execution of the fence command.
*
* Note the emphasis there on ensuring that the framebuffer is fully
* realised before the fence is signaled. We cannot just flush the batch,
* but must also resolve the drawable first. The importance of this is,
* for example, in creating a fence for a frame to be passed to a
* remote compositor. Without us flushing the drawable explicitly, the
* resolve will be in a following batch (when the client finally calls
* SwapBuffers, or triggers a resolve via some other path) and so the
* compositor may read the incomplete framebuffer instead.
*/
if (driDrawable)
brw_resolve_for_dri2_flush(brw, driDrawable);
brw_emit_mi_flush(brw);
switch (fence->type) {
case BRW_FENCE_TYPE_BO_WAIT:
assert(!fence->batch_bo);
assert(!fence->signalled);
fence->batch_bo = brw->batch.batch.bo;
brw_bo_reference(fence->batch_bo);
if (brw_batch_flush(brw) < 0) {
brw_bo_unreference(fence->batch_bo);
fence->batch_bo = NULL;
return false;
}
break;
case BRW_FENCE_TYPE_SYNC_FD:
assert(!fence->signalled);
if (fence->sync_fd == -1) {
/* Create an out-fence that signals after all pending commands
* complete.
*/
if (brw_batch_flush_fence(brw, -1, &fence->sync_fd) < 0)
return false;
assert(fence->sync_fd != -1);
} else {
/* Wait on the in-fence before executing any subsequently submitted
* commands.
*/
if (brw_batch_flush(brw) < 0)
return false;
/* Emit a dummy batch just for the fence. */
brw_emit_mi_flush(brw);
if (brw_batch_flush_fence(brw, fence->sync_fd, NULL) < 0)
return false;
}
break;
}
return true;
}
static bool MUST_CHECK
brw_fence_insert(struct brw_context *brw, struct brw_fence *fence)
{
bool ret;
mtx_lock(&fence->mutex);
ret = brw_fence_insert_locked(brw, fence);
mtx_unlock(&fence->mutex);
return ret;
}
static bool
brw_fence_has_completed_locked(struct brw_fence *fence)
{
if (fence->signalled)
return true;
switch (fence->type) {
case BRW_FENCE_TYPE_BO_WAIT:
if (!fence->batch_bo) {
/* There may be no batch if brw_batch_flush() failed. */
return false;
}
if (brw_bo_busy(fence->batch_bo))
return false;
brw_bo_unreference(fence->batch_bo);
fence->batch_bo = NULL;
fence->signalled = true;
return true;
case BRW_FENCE_TYPE_SYNC_FD:
assert(fence->sync_fd != -1);
if (sync_wait(fence->sync_fd, 0) == -1)
return false;
fence->signalled = true;
return true;
}
return false;
}
static bool
brw_fence_has_completed(struct brw_fence *fence)
{
bool ret;
mtx_lock(&fence->mutex);
ret = brw_fence_has_completed_locked(fence);
mtx_unlock(&fence->mutex);
return ret;
}
static bool
brw_fence_client_wait_locked(struct brw_context *brw, struct brw_fence *fence,
uint64_t timeout)
{
int32_t timeout_i32;
if (fence->signalled)
return true;
switch (fence->type) {
case BRW_FENCE_TYPE_BO_WAIT:
if (!fence->batch_bo) {
/* There may be no batch if brw_batch_flush() failed. */
return false;
}
/* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns
* immediately for timeouts <= 0. The best we can do is to clamp the
* timeout to INT64_MAX. This limits the maximum timeout from 584 years to
* 292 years - likely not a big deal.
*/
if (timeout > INT64_MAX)
timeout = INT64_MAX;
if (brw_bo_wait(fence->batch_bo, timeout) != 0)
return false;
fence->signalled = true;
brw_bo_unreference(fence->batch_bo);
fence->batch_bo = NULL;
return true;
case BRW_FENCE_TYPE_SYNC_FD:
if (fence->sync_fd == -1)
return false;
if (timeout > INT32_MAX)
timeout_i32 = -1;
else
timeout_i32 = timeout;
if (sync_wait(fence->sync_fd, timeout_i32) == -1)
return false;
fence->signalled = true;
return true;
}
assert(!"bad enum brw_fence_type");
return false;
}
/**
* Return true if the function successfully signals or has already signalled.
* (This matches the behavior expected from __DRI2fence::client_wait_sync).
*/
static bool
brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
uint64_t timeout)
{
bool ret;
mtx_lock(&fence->mutex);
ret = brw_fence_client_wait_locked(brw, fence, timeout);
mtx_unlock(&fence->mutex);
return ret;
}
static void
brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence)
{
switch (fence->type) {
case BRW_FENCE_TYPE_BO_WAIT:
/* We have nothing to do for WaitSync. Our GL command stream is sequential,
* so given that the sync object has already flushed the batchbuffer, any
* batchbuffers coming after this waitsync will naturally not occur until
* the previous one is done.
*/
break;
case BRW_FENCE_TYPE_SYNC_FD:
assert(fence->sync_fd != -1);
/* The user wants explicit synchronization, so give them what they want. */
if (!brw_fence_insert(brw, fence)) {
/* FIXME: There exists no way yet to report an error here. If an error
* occurs, continue silently and hope for the best.
*/
}
break;
}
}
static struct gl_sync_object *
brw_gl_new_sync(struct gl_context *ctx)
{
struct brw_gl_sync *sync;
sync = calloc(1, sizeof(*sync));
if (!sync)
return NULL;
return &sync->gl;
}
static void
brw_gl_delete_sync(struct gl_context *ctx, struct gl_sync_object *_sync)
{
struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
brw_fence_finish(&sync->fence);
free(sync->gl.Label);
free(sync);
}
static void
brw_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
GLenum condition, GLbitfield flags)
{
struct brw_context *brw = brw_context(ctx);
struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
/* brw_fence_insert_locked() assumes it must do a complete flush */
assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
brw_fence_init(brw, &sync->fence, BRW_FENCE_TYPE_BO_WAIT);
if (!brw_fence_insert_locked(brw, &sync->fence)) {
/* FIXME: There exists no way to report a GL error here. If an error
* occurs, continue silently and hope for the best.
*/
}
}
static void
brw_gl_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
GLbitfield flags, GLuint64 timeout)
{
struct brw_context *brw = brw_context(ctx);
struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
if (brw_fence_client_wait(brw, &sync->fence, timeout))
sync->gl.StatusFlag = 1;
}
static void
brw_gl_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync,
GLbitfield flags, GLuint64 timeout)
{
struct brw_context *brw = brw_context(ctx);
struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
brw_fence_server_wait(brw, &sync->fence);
}
static void
brw_gl_check_sync(struct gl_context *ctx, struct gl_sync_object *_sync)
{
struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync;
if (brw_fence_has_completed(&sync->fence))
sync->gl.StatusFlag = 1;
}
void
brw_init_syncobj_functions(struct dd_function_table *functions)
{
functions->NewSyncObject = brw_gl_new_sync;
functions->DeleteSyncObject = brw_gl_delete_sync;
functions->FenceSync = brw_gl_fence_sync;
functions->CheckSync = brw_gl_check_sync;
functions->ClientWaitSync = brw_gl_client_wait_sync;
functions->ServerWaitSync = brw_gl_server_wait_sync;
functions->NewSemaphoreObject = intel_semaphoreobj_alloc;
functions->DeleteSemaphoreObject = intel_semaphoreobj_free;
functions->ImportSemaphoreFd = intel_semaphoreobj_import;
functions->ServerSignalSemaphoreObject = intel_semaphoreobj_signal;
functions->ServerWaitSemaphoreObject = intel_semaphoreobj_wait;
}
static void *
brw_dri_create_fence(__DRIcontext *ctx)
{
struct brw_context *brw = ctx->driverPrivate;
struct brw_fence *fence;
fence = calloc(1, sizeof(*fence));
if (!fence)
return NULL;
brw_fence_init(brw, fence, BRW_FENCE_TYPE_BO_WAIT);
if (!brw_fence_insert_locked(brw, fence)) {
brw_fence_finish(fence);
free(fence);
return NULL;
}
return fence;
}
static void
brw_dri_destroy_fence(__DRIscreen *dri_screen, void *_fence)
{
struct brw_fence *fence = _fence;
brw_fence_finish(fence);
free(fence);
}
static GLboolean
brw_dri_client_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags,
uint64_t timeout)
{
struct brw_fence *fence = _fence;
return brw_fence_client_wait(fence->brw, fence, timeout);
}
static void
brw_dri_server_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags)
{
struct brw_fence *fence = _fence;
/* We might be called here with a NULL fence as a result of WaitSyncKHR
* on a EGL_KHR_reusable_sync fence. Nothing to do here in such case.
*/
if (!fence)
return;
brw_fence_server_wait(fence->brw, fence);
}
static unsigned
brw_dri_get_capabilities(__DRIscreen *dri_screen)
{
struct brw_screen *screen = dri_screen->driverPrivate;
unsigned caps = 0;
if (screen->has_exec_fence)
caps |= __DRI_FENCE_CAP_NATIVE_FD;
return caps;
}
static void *
brw_dri_create_fence_fd(__DRIcontext *dri_ctx, int fd)
{
struct brw_context *brw = dri_ctx->driverPrivate;
struct brw_fence *fence;
assert(brw->screen->has_exec_fence);
fence = calloc(1, sizeof(*fence));
if (!fence)
return NULL;
brw_fence_init(brw, fence, BRW_FENCE_TYPE_SYNC_FD);
if (fd == -1) {
/* Create an out-fence fd */
if (!brw_fence_insert_locked(brw, fence))
goto fail;
} else {
/* Import the sync fd as an in-fence. */
fence->sync_fd = os_dupfd_cloexec(fd);
}
assert(fence->sync_fd != -1);
return fence;
fail:
brw_fence_finish(fence);
free(fence);
return NULL;
}
static int
brw_dri_get_fence_fd_locked(struct brw_fence *fence)
{
assert(fence->type == BRW_FENCE_TYPE_SYNC_FD);
return os_dupfd_cloexec(fence->sync_fd);
}
static int
brw_dri_get_fence_fd(__DRIscreen *dri_screen, void *_fence)
{
struct brw_fence *fence = _fence;
int fd;
mtx_lock(&fence->mutex);
fd = brw_dri_get_fence_fd_locked(fence);
mtx_unlock(&fence->mutex);
return fd;
}
const __DRI2fenceExtension brwFenceExtension = {
.base = { __DRI2_FENCE, 2 },
.create_fence = brw_dri_create_fence,
.destroy_fence = brw_dri_destroy_fence,
.client_wait_sync = brw_dri_client_wait_sync,
.server_wait_sync = brw_dri_server_wait_sync,
.get_fence_from_cl_event = NULL,
.get_capabilities = brw_dri_get_capabilities,
.create_fence_fd = brw_dri_create_fence_fd,
.get_fence_fd = brw_dri_get_fence_fd,
};

View file

@ -1,295 +0,0 @@
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* \file brw_tcs.c
*
* Tessellation control shader state upload code.
*/
#include "brw_context.h"
#include "compiler/brw_nir.h"
#include "brw_program.h"
#include "brw_state.h"
#include "program/prog_parameter.h"
#include "nir_builder.h"
static bool
brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
struct brw_program *tep, struct brw_tcs_prog_key *key)
{
struct gl_context *ctx = &brw->ctx;
const struct brw_compiler *compiler = brw->screen->compiler;
const struct intel_device_info *devinfo = compiler->devinfo;
struct brw_stage_state *stage_state = &brw->tcs.base;
nir_shader *nir;
struct brw_tcs_prog_data prog_data;
bool start_busy = false;
double start_time = 0;
void *mem_ctx = ralloc_context(NULL);
if (tcp) {
nir = nir_shader_clone(mem_ctx, tcp->program.nir);
} else {
const nir_shader_compiler_options *options =
ctx->Const.ShaderCompilerOptions[MESA_SHADER_TESS_CTRL].NirOptions;
nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key);
}
memset(&prog_data, 0, sizeof(prog_data));
if (tcp) {
brw_assign_common_binding_table_offsets(devinfo, &tcp->program,
&prog_data.base.base, 0);
brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tcp->program,
&prog_data.base.base,
compiler->scalar_stage[MESA_SHADER_TESS_CTRL]);
if (brw->can_push_ubos) {
brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
prog_data.base.base.ubo_ranges);
}
} else {
/* Upload the Patch URB Header as the first two uniforms.
* Do the annoying scrambling so the shader doesn't have to.
*/
assert(nir->num_uniforms == 32);
prog_data.base.base.param = rzalloc_array(mem_ctx, uint32_t, 8);
prog_data.base.base.nr_params = 8;
uint32_t *param = prog_data.base.base.param;
for (int i = 0; i < 8; i++)
param[i] = BRW_PARAM_BUILTIN_ZERO;
if (key->tes_primitive_mode == GL_QUADS) {
for (int i = 0; i < 4; i++)
param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
param[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
param[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
} else if (key->tes_primitive_mode == GL_TRIANGLES) {
for (int i = 0; i < 3; i++)
param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
param[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
} else {
assert(key->tes_primitive_mode == GL_ISOLINES);
param[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
param[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
}
}
int st_index = -1;
if (INTEL_DEBUG(DEBUG_SHADER_TIME) && tep)
st_index = brw_get_shader_time_index(brw, &tep->program, ST_TCS, true);
if (unlikely(brw->perf_debug)) {
start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo);
start_time = get_time();
}
char *error_str;
const unsigned *program =
brw_compile_tcs(compiler, brw, mem_ctx, key, &prog_data, nir, st_index,
NULL, &error_str);
if (program == NULL) {
if (tep) {
tep->program.sh.data->LinkStatus = LINKING_FAILURE;
ralloc_strcat(&tep->program.sh.data->InfoLog, error_str);
}
_mesa_problem(NULL, "Failed to compile tessellation control shader: "
"%s\n", error_str);
ralloc_free(mem_ctx);
return false;
}
if (unlikely(brw->perf_debug)) {
if (tcp) {
if (tcp->compiled_once) {
brw_debug_recompile(brw, MESA_SHADER_TESS_CTRL, tcp->program.Id,
&key->base);
}
tcp->compiled_once = true;
}
if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
perf_debug("TCS compile took %.03f ms and stalled the GPU\n",
(get_time() - start_time) * 1000);
}
}
/* Scratch space is used for register spilling */
brw_alloc_stage_scratch(brw, stage_state,
prog_data.base.base.total_scratch);
/* The param and pull_param arrays will be freed by the shader cache. */
ralloc_steal(NULL, prog_data.base.base.param);
ralloc_steal(NULL, prog_data.base.base.pull_param);
brw_upload_cache(&brw->cache, BRW_CACHE_TCS_PROG,
key, sizeof(*key),
program, prog_data.base.base.program_size,
&prog_data, sizeof(prog_data),
&stage_state->prog_offset, &brw->tcs.base.prog_data);
ralloc_free(mem_ctx);
return true;
}
void
brw_tcs_populate_key(struct brw_context *brw,
struct brw_tcs_prog_key *key)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
const struct brw_compiler *compiler = brw->screen->compiler;
struct brw_program *tcp =
(struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
struct brw_program *tep =
(struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
struct gl_program *tes_prog = &tep->program;
uint64_t per_vertex_slots = tes_prog->info.inputs_read;
uint32_t per_patch_slots = tes_prog->info.patch_inputs_read;
memset(key, 0, sizeof(*key));
if (tcp) {
struct gl_program *prog = &tcp->program;
per_vertex_slots |= prog->info.outputs_written;
per_patch_slots |= prog->info.patch_outputs_written;
}
if (devinfo->ver < 8 || !tcp || compiler->use_tcs_8_patch)
key->input_vertices = brw->ctx.TessCtrlProgram.patch_vertices;
key->outputs_written = per_vertex_slots;
key->patch_outputs_written = per_patch_slots;
/* We need to specialize our code generation for tessellation levels
* based on the domain the DS is expecting to tessellate.
*/
key->tes_primitive_mode = tep->program.info.tess.primitive_mode;
key->quads_workaround = devinfo->ver < 9 &&
tep->program.info.tess.primitive_mode == GL_QUADS &&
tep->program.info.tess.spacing == TESS_SPACING_EQUAL;
if (tcp) {
/* _NEW_TEXTURE */
brw_populate_base_prog_key(&brw->ctx, tcp, &key->base);
}
}
void
brw_upload_tcs_prog(struct brw_context *brw)
{
struct brw_stage_state *stage_state = &brw->tcs.base;
struct brw_tcs_prog_key key;
/* BRW_NEW_TESS_PROGRAMS */
struct brw_program *tcp =
(struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
ASSERTED struct brw_program *tep =
(struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
assert(tep);
if (!brw_state_dirty(brw,
_NEW_TEXTURE,
BRW_NEW_PATCH_PRIMITIVE |
BRW_NEW_TESS_PROGRAMS))
return;
brw_tcs_populate_key(brw, &key);
if (brw_search_cache(&brw->cache, BRW_CACHE_TCS_PROG, &key, sizeof(key),
&stage_state->prog_offset, &brw->tcs.base.prog_data,
true))
return;
if (brw_disk_cache_upload_program(brw, MESA_SHADER_TESS_CTRL))
return;
tcp = (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
if (tcp)
tcp->id = key.base.program_string_id;
ASSERTED bool success = brw_codegen_tcs_prog(brw, tcp, tep, &key);
assert(success);
}
void
brw_tcs_populate_default_key(const struct brw_compiler *compiler,
struct brw_tcs_prog_key *key,
struct gl_shader_program *sh_prog,
struct gl_program *prog)
{
const struct intel_device_info *devinfo = compiler->devinfo;
struct brw_program *btcp = brw_program(prog);
const struct gl_linked_shader *tes =
sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
memset(key, 0, sizeof(*key));
brw_populate_default_base_prog_key(devinfo, btcp, &key->base);
/* Guess that the input and output patches have the same dimensionality. */
if (devinfo->ver < 8 || compiler->use_tcs_8_patch)
key->input_vertices = prog->info.tess.tcs_vertices_out;
if (tes) {
key->tes_primitive_mode = tes->Program->info.tess.primitive_mode;
key->quads_workaround = devinfo->ver < 9 &&
tes->Program->info.tess.primitive_mode == GL_QUADS &&
tes->Program->info.tess.spacing == TESS_SPACING_EQUAL;
} else {
key->tes_primitive_mode = GL_TRIANGLES;
}
key->outputs_written = prog->nir->info.outputs_written;
key->patch_outputs_written = prog->nir->info.patch_outputs_written;
}
bool
brw_tcs_precompile(struct gl_context *ctx,
struct gl_shader_program *shader_prog,
struct gl_program *prog)
{
struct brw_context *brw = brw_context(ctx);
const struct brw_compiler *compiler = brw->screen->compiler;
struct brw_tcs_prog_key key;
uint32_t old_prog_offset = brw->tcs.base.prog_offset;
struct brw_stage_prog_data *old_prog_data = brw->tcs.base.prog_data;
bool success;
struct brw_program *btcp = brw_program(prog);
const struct gl_linked_shader *tes =
shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
struct brw_program *btep = tes ? brw_program(tes->Program) : NULL;
brw_tcs_populate_default_key(compiler, &key, shader_prog, prog);
success = brw_codegen_tcs_prog(brw, btcp, btep, &key);
brw->tcs.base.prog_offset = old_prog_offset;
brw->tcs.base.prog_data = old_prog_data;
return success;
}

View file

@ -1,116 +0,0 @@
/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "main/mtypes.h"
#include "program/prog_parameter.h"
#include "main/shaderapi.h"
#include "brw_context.h"
#include "brw_state.h"
/* Creates a new TCS constant buffer reflecting the current TCS program's
* constants, if needed by the TCS program.
*
* Otherwise, constants go through the CURBEs using the brw_constant_buffer
* state atom.
*/
static void
brw_upload_tcs_pull_constants(struct brw_context *brw)
{
struct brw_stage_state *stage_state = &brw->tcs.base;
/* BRW_NEW_TESS_PROGRAMS */
struct brw_program *tcp =
(struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
if (!tcp)
return;
/* BRW_NEW_TCS_PROG_DATA */
const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
_mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_CTRL);
/* _NEW_PROGRAM_CONSTANTS */
brw_upload_pull_constants(brw, BRW_NEW_TCS_CONSTBUF, &tcp->program,
stage_state, prog_data);
}
const struct brw_tracked_state brw_tcs_pull_constants = {
.dirty = {
.mesa = _NEW_PROGRAM_CONSTANTS,
.brw = BRW_NEW_BATCH |
BRW_NEW_TCS_PROG_DATA |
BRW_NEW_TESS_PROGRAMS,
},
.emit = brw_upload_tcs_pull_constants,
};
static void
brw_upload_tcs_ubo_surfaces(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
/* _NEW_PROGRAM */
struct gl_program *prog =
ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
/* BRW_NEW_TCS_PROG_DATA */
struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
brw_upload_ubo_surfaces(brw, prog, &brw->tcs.base, prog_data);
}
const struct brw_tracked_state brw_tcs_ubo_surfaces = {
.dirty = {
.mesa = _NEW_PROGRAM,
.brw = BRW_NEW_BATCH |
BRW_NEW_TCS_PROG_DATA |
BRW_NEW_UNIFORM_BUFFER,
},
.emit = brw_upload_tcs_ubo_surfaces,
};
static void
brw_upload_tcs_image_surfaces(struct brw_context *brw)
{
/* BRW_NEW_TESS_PROGRAMS */
const struct gl_program *tcp = brw->programs[MESA_SHADER_TESS_CTRL];
if (tcp) {
/* BRW_NEW_TCS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
brw_upload_image_surfaces(brw, tcp, &brw->tcs.base,
brw->tcs.base.prog_data);
}
}
const struct brw_tracked_state brw_tcs_image_surfaces = {
.dirty = {
.brw = BRW_NEW_BATCH |
BRW_NEW_AUX_STATE |
BRW_NEW_IMAGE_UNITS |
BRW_NEW_TCS_PROG_DATA |
BRW_NEW_TESS_PROGRAMS,
},
.emit = brw_upload_tcs_image_surfaces,
};

View file

@ -1,233 +0,0 @@
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* \file brw_tes.c
*
* Tessellation evaluation shader state upload code.
*/
#include "brw_context.h"
#include "compiler/brw_nir.h"
#include "brw_program.h"
#include "brw_state.h"
#include "program/prog_parameter.h"
static bool
brw_codegen_tes_prog(struct brw_context *brw,
struct brw_program *tep,
struct brw_tes_prog_key *key)
{
const struct brw_compiler *compiler = brw->screen->compiler;
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct brw_stage_state *stage_state = &brw->tes.base;
struct brw_tes_prog_data prog_data;
bool start_busy = false;
double start_time = 0;
memset(&prog_data, 0, sizeof(prog_data));
void *mem_ctx = ralloc_context(NULL);
nir_shader *nir = nir_shader_clone(mem_ctx, tep->program.nir);
brw_assign_common_binding_table_offsets(devinfo, &tep->program,
&prog_data.base.base, 0);
brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tep->program,
&prog_data.base.base,
compiler->scalar_stage[MESA_SHADER_TESS_EVAL]);
if (brw->can_push_ubos) {
brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
prog_data.base.base.ubo_ranges);
}
int st_index = -1;
if (INTEL_DEBUG(DEBUG_SHADER_TIME))
st_index = brw_get_shader_time_index(brw, &tep->program, ST_TES, true);
if (unlikely(brw->perf_debug)) {
start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo);
start_time = get_time();
}
struct brw_vue_map input_vue_map;
brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
key->patch_inputs_read);
char *error_str;
const unsigned *program =
brw_compile_tes(compiler, brw, mem_ctx, key, &input_vue_map, &prog_data,
nir, st_index, NULL, &error_str);
if (program == NULL) {
tep->program.sh.data->LinkStatus = LINKING_FAILURE;
ralloc_strcat(&tep->program.sh.data->InfoLog, error_str);
_mesa_problem(NULL, "Failed to compile tessellation evaluation shader: "
"%s\n", error_str);
ralloc_free(mem_ctx);
return false;
}
if (unlikely(brw->perf_debug)) {
if (tep->compiled_once) {
brw_debug_recompile(brw, MESA_SHADER_TESS_EVAL, tep->program.Id,
&key->base);
}
if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
perf_debug("TES compile took %.03f ms and stalled the GPU\n",
(get_time() - start_time) * 1000);
}
tep->compiled_once = true;
}
/* Scratch space is used for register spilling */
brw_alloc_stage_scratch(brw, stage_state,
prog_data.base.base.total_scratch);
/* The param and pull_param arrays will be freed by the shader cache. */
ralloc_steal(NULL, prog_data.base.base.param);
ralloc_steal(NULL, prog_data.base.base.pull_param);
brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG,
key, sizeof(*key),
program, prog_data.base.base.program_size,
&prog_data, sizeof(prog_data),
&stage_state->prog_offset, &brw->tes.base.prog_data);
ralloc_free(mem_ctx);
return true;
}
void
brw_tes_populate_key(struct brw_context *brw,
struct brw_tes_prog_key *key)
{
struct brw_program *tcp =
(struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
struct brw_program *tep =
(struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
struct gl_program *prog = &tep->program;
uint64_t per_vertex_slots = prog->info.inputs_read;
uint32_t per_patch_slots = prog->info.patch_inputs_read;
memset(key, 0, sizeof(*key));
/* _NEW_TEXTURE */
brw_populate_base_prog_key(&brw->ctx, tep, &key->base);
/* The TCS may have additional outputs which aren't read by the
* TES (possibly for cross-thread communication). These need to
* be stored in the Patch URB Entry as well.
*/
if (tcp) {
struct gl_program *tcp_prog = &tcp->program;
per_vertex_slots |= tcp_prog->info.outputs_written &
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
per_patch_slots |= tcp_prog->info.patch_outputs_written;
}
key->inputs_read = per_vertex_slots;
key->patch_inputs_read = per_patch_slots;
}
void
brw_upload_tes_prog(struct brw_context *brw)
{
struct brw_stage_state *stage_state = &brw->tes.base;
struct brw_tes_prog_key key;
/* BRW_NEW_TESS_PROGRAMS */
struct brw_program *tep =
(struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
if (!brw_state_dirty(brw,
_NEW_TEXTURE,
BRW_NEW_TESS_PROGRAMS))
return;
brw_tes_populate_key(brw, &key);
if (brw_search_cache(&brw->cache, BRW_CACHE_TES_PROG, &key, sizeof(key),
&stage_state->prog_offset, &brw->tes.base.prog_data,
true))
return;
if (brw_disk_cache_upload_program(brw, MESA_SHADER_TESS_EVAL))
return;
tep = (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
tep->id = key.base.program_string_id;
ASSERTED bool success = brw_codegen_tes_prog(brw, tep, &key);
assert(success);
}
void
brw_tes_populate_default_key(const struct brw_compiler *compiler,
struct brw_tes_prog_key *key,
struct gl_shader_program *sh_prog,
struct gl_program *prog)
{
const struct intel_device_info *devinfo = compiler->devinfo;
struct brw_program *btep = brw_program(prog);
memset(key, 0, sizeof(*key));
brw_populate_default_base_prog_key(devinfo, btep, &key->base);
key->inputs_read = prog->nir->info.inputs_read;
key->patch_inputs_read = prog->nir->info.patch_inputs_read;
if (sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]) {
struct gl_program *tcp =
sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program;
key->inputs_read |= tcp->nir->info.outputs_written &
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
key->patch_inputs_read |= tcp->nir->info.patch_outputs_written;
}
}
bool
brw_tes_precompile(struct gl_context *ctx,
struct gl_shader_program *shader_prog,
struct gl_program *prog)
{
struct brw_context *brw = brw_context(ctx);
const struct brw_compiler *compiler = brw->screen->compiler;
struct brw_tes_prog_key key;
uint32_t old_prog_offset = brw->tes.base.prog_offset;
struct brw_stage_prog_data *old_prog_data = brw->tes.base.prog_data;
bool success;
struct brw_program *btep = brw_program(prog);
brw_tes_populate_default_key(compiler, &key, shader_prog, prog);
success = brw_codegen_tes_prog(brw, btep, &key);
brw->tes.base.prog_offset = old_prog_offset;
brw->tes.base.prog_data = old_prog_data;
return success;
}

View file

@ -1,116 +0,0 @@
/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "main/mtypes.h"
#include "program/prog_parameter.h"
#include "main/shaderapi.h"
#include "brw_context.h"
#include "brw_state.h"
/* Creates a new TES constant buffer reflecting the current TES program's
* constants, if needed by the TES program.
*
* Otherwise, constants go through the CURBEs using the brw_constant_buffer
* state atom.
*/
static void
brw_upload_tes_pull_constants(struct brw_context *brw)
{
struct brw_stage_state *stage_state = &brw->tes.base;
/* BRW_NEW_TESS_PROGRAMS */
struct brw_program *dp =
(struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
if (!dp)
return;
/* BRW_NEW_TES_PROG_DATA */
const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
_mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_EVAL);
/* _NEW_PROGRAM_CONSTANTS */
brw_upload_pull_constants(brw, BRW_NEW_TES_CONSTBUF, &dp->program,
stage_state, prog_data);
}
const struct brw_tracked_state brw_tes_pull_constants = {
.dirty = {
.mesa = _NEW_PROGRAM_CONSTANTS,
.brw = BRW_NEW_BATCH |
BRW_NEW_TES_PROG_DATA |
BRW_NEW_TESS_PROGRAMS,
},
.emit = brw_upload_tes_pull_constants,
};
static void
brw_upload_tes_ubo_surfaces(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
/* _NEW_PROGRAM */
struct gl_program *prog =
ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
/* BRW_NEW_TES_PROG_DATA */
struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
brw_upload_ubo_surfaces(brw, prog, &brw->tes.base, prog_data);
}
const struct brw_tracked_state brw_tes_ubo_surfaces = {
.dirty = {
.mesa = _NEW_PROGRAM,
.brw = BRW_NEW_BATCH |
BRW_NEW_TES_PROG_DATA |
BRW_NEW_UNIFORM_BUFFER,
},
.emit = brw_upload_tes_ubo_surfaces,
};
static void
brw_upload_tes_image_surfaces(struct brw_context *brw)
{
/* BRW_NEW_TESS_PROGRAMS */
const struct gl_program *tep = brw->programs[MESA_SHADER_TESS_EVAL];
if (tep) {
/* BRW_NEW_TES_PROG_DATA, BRW_NEW_IMAGE_UNITS */
brw_upload_image_surfaces(brw, tep, &brw->tes.base,
brw->tes.base.prog_data);
}
}
const struct brw_tracked_state brw_tes_image_surfaces = {
.dirty = {
.brw = BRW_NEW_BATCH |
BRW_NEW_AUX_STATE |
BRW_NEW_IMAGE_UNITS |
BRW_NEW_TESS_PROGRAMS |
BRW_NEW_TES_PROG_DATA,
},
.emit = brw_upload_tes_image_surfaces,
};

View file

@ -1,415 +0,0 @@
#include "swrast/swrast.h"
#include "main/renderbuffer.h"
#include "main/texobj.h"
#include "main/teximage.h"
#include "main/mipmap.h"
#include "drivers/common/meta.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_buffer_objects.h"
#include "brw_mipmap_tree.h"
#include "brw_tex.h"
#include "brw_fbo.h"
#include "brw_state.h"
#include "util/u_memory.h"
#define FILE_DEBUG_FLAG DEBUG_TEXTURE
static struct gl_texture_image *
brw_new_texture_image(struct gl_context *ctx)
{
DBG("%s\n", __func__);
(void) ctx;
return (struct gl_texture_image *) CALLOC_STRUCT(brw_texture_image);
}
static void
brw_delete_texture_image(struct gl_context *ctx, struct gl_texture_image *img)
{
/* nothing special (yet) for brw_texture_image */
_mesa_delete_texture_image(ctx, img);
}
static struct gl_texture_object *
brw_new_texture_object(struct gl_context *ctx, GLuint name, GLenum target)
{
struct brw_texture_object *obj = CALLOC_STRUCT(brw_texture_object);
(void) ctx;
DBG("%s\n", __func__);
if (obj == NULL)
return NULL;
_mesa_initialize_texture_object(ctx, &obj->base, name, target);
obj->needs_validate = true;
return &obj->base;
}
static void
brw_delete_texture_object(struct gl_context *ctx,
struct gl_texture_object *texObj)
{
struct brw_texture_object *brw_obj = brw_texture_object(texObj);
brw_miptree_release(&brw_obj->mt);
_mesa_delete_texture_object(ctx, texObj);
}
static GLboolean
brw_alloc_texture_image_buffer(struct gl_context *ctx,
struct gl_texture_image *image)
{
struct brw_context *brw = brw_context(ctx);
struct brw_texture_image *intel_image = brw_texture_image(image);
struct gl_texture_object *texobj = image->TexObject;
struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
assert(image->Border == 0);
/* Quantize sample count */
if (image->NumSamples) {
image->NumSamples = brw_quantize_num_samples(brw->screen, image->NumSamples);
if (!image->NumSamples)
return false;
}
/* Because the driver uses AllocTextureImageBuffer() internally, it may end
* up mismatched with FreeTextureImageBuffer(), but that is safe to call
* multiple times.
*/
ctx->Driver.FreeTextureImageBuffer(ctx, image);
if (!_swrast_init_texture_image(image))
return false;
if (intel_texobj->mt &&
brw_miptree_match_image(intel_texobj->mt, image)) {
brw_miptree_reference(&intel_image->mt, intel_texobj->mt);
DBG("%s: alloc obj %p level %d %dx%dx%d using object's miptree %p\n",
__func__, texobj, image->Level,
image->Width, image->Height, image->Depth, intel_texobj->mt);
} else {
intel_image->mt = brw_miptree_create_for_teximage(brw, intel_texobj,
intel_image,
MIPTREE_CREATE_DEFAULT);
if (!intel_image->mt)
return false;
/* Even if the object currently has a mipmap tree associated
* with it, this one is a more likely candidate to represent the
* whole object since our level didn't fit what was there
* before, and any lower levels would fit into our miptree.
*/
brw_miptree_reference(&intel_texobj->mt, intel_image->mt);
DBG("%s: alloc obj %p level %d %dx%dx%d using new miptree %p\n",
__func__, texobj, image->Level,
image->Width, image->Height, image->Depth, intel_image->mt);
}
intel_texobj->needs_validate = true;
return true;
}
/**
* ctx->Driver.AllocTextureStorage() handler.
*
* Compare this to _mesa_AllocTextureStorage_sw, which would call into
* brw_alloc_texture_image_buffer() above.
*/
static GLboolean
brw_alloc_texture_storage(struct gl_context *ctx,
struct gl_texture_object *texobj,
GLsizei levels, GLsizei width,
GLsizei height, GLsizei depth)
{
struct brw_context *brw = brw_context(ctx);
struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
struct gl_texture_image *first_image = texobj->Image[0][0];
int num_samples = brw_quantize_num_samples(brw->screen,
first_image->NumSamples);
const int numFaces = _mesa_num_tex_faces(texobj->Target);
int face;
int level;
/* If the object's current miptree doesn't match what we need, make a new
* one.
*/
if (!intel_texobj->mt ||
!brw_miptree_match_image(intel_texobj->mt, first_image) ||
intel_texobj->mt->last_level != levels - 1) {
brw_miptree_release(&intel_texobj->mt);
brw_get_image_dims(first_image, &width, &height, &depth);
intel_texobj->mt = brw_miptree_create(brw, texobj->Target,
first_image->TexFormat,
0, levels - 1,
width, height, depth,
MAX2(num_samples, 1),
MIPTREE_CREATE_DEFAULT);
if (intel_texobj->mt == NULL) {
return false;
}
}
for (face = 0; face < numFaces; face++) {
for (level = 0; level < levels; level++) {
struct gl_texture_image *image = texobj->Image[face][level];
struct brw_texture_image *intel_image = brw_texture_image(image);
image->NumSamples = num_samples;
_swrast_free_texture_image_buffer(ctx, image);
if (!_swrast_init_texture_image(image))
return false;
brw_miptree_reference(&intel_image->mt, intel_texobj->mt);
}
}
/* The miptree is in a validated state, so no need to check later. */
intel_texobj->needs_validate = false;
intel_texobj->validated_first_level = 0;
intel_texobj->validated_last_level = levels - 1;
intel_texobj->_Format = first_image->TexFormat;
return true;
}
static void
brw_free_texture_image_buffer(struct gl_context * ctx,
struct gl_texture_image *texImage)
{
struct brw_texture_image *brw_image = brw_texture_image(texImage);
DBG("%s\n", __func__);
brw_miptree_release(&brw_image->mt);
_swrast_free_texture_image_buffer(ctx, texImage);
}
/**
* Map texture memory/buffer into user space.
* Note: the region of interest parameters are ignored here.
* \param mode bitmask of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT
* \param mapOut returns start of mapping of region of interest
* \param rowStrideOut returns row stride in bytes
*/
static void
brw_map_texture_image(struct gl_context *ctx,
struct gl_texture_image *tex_image,
GLuint slice,
GLuint x, GLuint y, GLuint w, GLuint h,
GLbitfield mode,
GLubyte **map,
GLint *out_stride)
{
struct brw_context *brw = brw_context(ctx);
struct brw_texture_image *intel_image = brw_texture_image(tex_image);
struct brw_mipmap_tree *mt = intel_image->mt;
ptrdiff_t stride;
/* Our texture data is always stored in a miptree. */
assert(mt);
/* Check that our caller wasn't confused about how to map a 1D texture. */
assert(tex_image->TexObject->Target != GL_TEXTURE_1D_ARRAY || h == 1);
/* brw_miptree_map operates on a unified "slice" number that references the
* cube face, since it's all just slices to the miptree code.
*/
if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
slice = tex_image->Face;
brw_miptree_map(brw, mt,
tex_image->Level + tex_image->TexObject->Attrib.MinLevel,
slice + tex_image->TexObject->Attrib.MinLayer,
x, y, w, h, mode,
(void **)map, &stride);
*out_stride = stride;
}
static void
brw_unmap_texture_image(struct gl_context *ctx,
struct gl_texture_image *tex_image, GLuint slice)
{
struct brw_context *brw = brw_context(ctx);
struct brw_texture_image *intel_image = brw_texture_image(tex_image);
struct brw_mipmap_tree *mt = intel_image->mt;
if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
slice = tex_image->Face;
brw_miptree_unmap(brw, mt,
tex_image->Level + tex_image->TexObject->Attrib.MinLevel,
slice + tex_image->TexObject->Attrib.MinLayer);
}
static GLboolean
brw_texture_view(struct gl_context *ctx,
struct gl_texture_object *texObj,
struct gl_texture_object *origTexObj)
{
struct brw_context *brw = brw_context(ctx);
struct brw_texture_object *intel_tex = brw_texture_object(texObj);
struct brw_texture_object *intel_orig_tex = brw_texture_object(origTexObj);
assert(intel_orig_tex->mt);
brw_miptree_reference(&intel_tex->mt, intel_orig_tex->mt);
/* Since we can only make views of immutable-format textures,
* we can assume that everything is in origTexObj's miptree.
*
* Mesa core has already made us a copy of all the teximage objects,
* except it hasn't copied our mt pointers, etc.
*/
const int numFaces = _mesa_num_tex_faces(texObj->Target);
const int numLevels = texObj->Attrib.NumLevels;
int face;
int level;
for (face = 0; face < numFaces; face++) {
for (level = 0; level < numLevels; level++) {
struct gl_texture_image *image = texObj->Image[face][level];
struct brw_texture_image *intel_image = brw_texture_image(image);
brw_miptree_reference(&intel_image->mt, intel_orig_tex->mt);
}
}
/* The miptree is in a validated state, so no need to check later. */
intel_tex->needs_validate = false;
intel_tex->validated_first_level = 0;
intel_tex->validated_last_level = numLevels - 1;
/* Set the validated texture format, with the same adjustments that
* would have been applied to determine the underlying texture's
* mt->format.
*/
intel_tex->_Format = brw_depth_format_for_depthstencil_format(
brw_lower_compressed_format(brw, texObj->Image[0][0]->TexFormat));
return GL_TRUE;
}
static void
brw_texture_barrier(struct gl_context *ctx)
{
struct brw_context *brw = brw_context(ctx);
const struct intel_device_info *devinfo = &brw->screen->devinfo;
if (devinfo->ver >= 6) {
brw_emit_pipe_control_flush(brw,
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_CS_STALL);
brw_emit_pipe_control_flush(brw,
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
} else {
brw_emit_mi_flush(brw);
}
}
/* Return the usual surface usage flags for the given format. */
static isl_surf_usage_flags_t
isl_surf_usage(mesa_format format)
{
switch(_mesa_get_format_base_format(format)) {
case GL_DEPTH_COMPONENT:
return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
case GL_DEPTH_STENCIL:
return ISL_SURF_USAGE_DEPTH_BIT | ISL_SURF_USAGE_STENCIL_BIT |
ISL_SURF_USAGE_TEXTURE_BIT;
case GL_STENCIL_INDEX:
return ISL_SURF_USAGE_STENCIL_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
default:
return ISL_SURF_USAGE_RENDER_TARGET_BIT | ISL_SURF_USAGE_TEXTURE_BIT;
}
}
static GLboolean
intel_texture_for_memory_object(struct gl_context *ctx,
struct gl_texture_object *tex_obj,
struct gl_memory_object *mem_obj,
GLsizei levels, GLsizei width,
GLsizei height, GLsizei depth,
GLuint64 offset)
{
struct brw_context *brw = brw_context(ctx);
struct brw_memory_object *intel_memobj = brw_memory_object(mem_obj);
struct brw_texture_object *intel_texobj = brw_texture_object(tex_obj);
struct gl_texture_image *image = tex_obj->Image[0][0];
struct isl_surf surf;
/* Only color formats are supported. */
if (!_mesa_is_format_color_format(image->TexFormat))
return GL_FALSE;
isl_tiling_flags_t tiling_flags = ISL_TILING_ANY_MASK;
if (tex_obj->TextureTiling == GL_LINEAR_TILING_EXT)
tiling_flags = ISL_TILING_LINEAR_BIT;
UNUSED const bool isl_surf_created_successfully =
isl_surf_init(&brw->screen->isl_dev, &surf,
.dim = get_isl_surf_dim(tex_obj->Target),
.format = brw_isl_format_for_mesa_format(image->TexFormat),
.width = width,
.height = height,
.depth = depth,
.levels = levels,
.array_len = tex_obj->Target == GL_TEXTURE_3D ? 1 : depth,
.samples = MAX2(image->NumSamples, 1),
.usage = isl_surf_usage(image->TexFormat),
.tiling_flags = tiling_flags);
assert(isl_surf_created_successfully);
intel_texobj->mt = brw_miptree_create_for_bo(brw,
intel_memobj->bo,
image->TexFormat,
offset,
width,
height,
depth,
surf.row_pitch_B,
surf.tiling,
MIPTREE_CREATE_NO_AUX);
assert(intel_texobj->mt);
brw_alloc_texture_image_buffer(ctx, image);
intel_texobj->needs_validate = false;
intel_texobj->validated_first_level = 0;
intel_texobj->validated_last_level = levels - 1;
intel_texobj->_Format = image->TexFormat;
return GL_TRUE;
}
void
brw_init_texture_functions(struct dd_function_table *functions)
{
functions->NewTextureObject = brw_new_texture_object;
functions->NewTextureImage = brw_new_texture_image;
functions->DeleteTextureImage = brw_delete_texture_image;
functions->DeleteTexture = brw_delete_texture_object;
functions->AllocTextureImageBuffer = brw_alloc_texture_image_buffer;
functions->FreeTextureImageBuffer = brw_free_texture_image_buffer;
functions->AllocTextureStorage = brw_alloc_texture_storage;
functions->MapTextureImage = brw_map_texture_image;
functions->UnmapTextureImage = brw_unmap_texture_image;
functions->TextureView = brw_texture_view;
functions->TextureBarrier = brw_texture_barrier;
functions->SetTextureStorageForMemoryObject = intel_texture_for_memory_object;
}

View file

@ -1,58 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef INTELTEX_INC
#define INTELTEX_INC
#include "main/mtypes.h"
#include "main/formats.h"
#include "brw_context.h"
#include "brw_mipmap_tree.h"
void brw_init_texture_functions(struct dd_function_table *functions);
void brw_init_texture_image_functions(struct dd_function_table *functions);
void brw_init_texture_copy_image_functions(struct dd_function_table *functs);
void brw_init_copy_image_functions(struct dd_function_table *functions);
void brw_set_texbuffer(__DRIcontext *pDRICtx,
GLint target, __DRIdrawable *pDraw);
void brw_set_texbuffer2(__DRIcontext *pDRICtx,
GLint target, GLint format, __DRIdrawable *pDraw);
void brw_release_texbuffer(__DRIcontext *pDRICtx, GLint target,
__DRIdrawable *dPriv);
struct brw_mipmap_tree *
brw_miptree_create_for_teximage(struct brw_context *brw,
struct brw_texture_object *brw_obj,
struct brw_texture_image *brw_image,
enum brw_miptree_create_flags flags);
void brw_finalize_mipmap_tree(struct brw_context *brw,
struct gl_texture_object *tex_obj);
#endif

View file

@ -1,72 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "main/mtypes.h"
#include "main/enums.h"
#include "main/image.h"
#include "main/teximage.h"
#include "main/texobj.h"
#include "main/texstate.h"
#include "main/fbobject.h"
#include "drivers/common/meta.h"
#include "brw_screen.h"
#include "brw_mipmap_tree.h"
#include "brw_fbo.h"
#include "brw_tex.h"
#include "brw_context.h"
#define FILE_DEBUG_FLAG DEBUG_TEXTURE
static void
brw_copytexsubimage(struct gl_context *ctx, GLuint dims,
struct gl_texture_image *texImage,
GLint xoffset, GLint yoffset, GLint slice,
struct gl_renderbuffer *rb,
GLint x, GLint y,
GLsizei width, GLsizei height)
{
struct brw_context *brw = brw_context(ctx);
/* Try BLORP first. It can handle almost everything. */
if (brw_blorp_copytexsubimage(brw, rb, texImage, slice, x, y,
xoffset, yoffset, width, height))
return;
/* Finally, fall back to meta. This will likely be slow. */
perf_debug("%s - fallback to swrast\n", __func__);
_mesa_meta_CopyTexSubImage(ctx, dims, texImage,
xoffset, yoffset, slice,
rb, x, y, width, height);
}
void
brw_init_texture_copy_image_functions(struct dd_function_table *functions)
{
functions->CopyTexSubImage = brw_copytexsubimage;
}

View file

@ -1,992 +0,0 @@
#include "main/macros.h"
#include "main/mtypes.h"
#include "main/enums.h"
#include "main/bufferobj.h"
#include "main/context.h"
#include "main/formats.h"
#include "main/glformats.h"
#include "main/image.h"
#include "main/pbo.h"
#include "main/renderbuffer.h"
#include "main/texcompress.h"
#include "main/texgetimage.h"
#include "main/texobj.h"
#include "main/teximage.h"
#include "main/texstore.h"
#include "main/glthread.h"
#include "drivers/common/meta.h"
#include "brw_mipmap_tree.h"
#include "brw_buffer_objects.h"
#include "brw_batch.h"
#include "brw_tex.h"
#include "brw_fbo.h"
#include "brw_image.h"
#include "brw_context.h"
#include "brw_blorp.h"
#define FILE_DEBUG_FLAG DEBUG_TEXTURE
/* Make sure one doesn't end up shrinking base level zero unnecessarily.
* Determining the base level dimension by shifting higher level dimension
* ends up in off-by-one value in case base level has NPOT size (for example,
* 293 != 146 << 1).
* Choose the original base level dimension when shifted dimensions agree.
* Otherwise assume real resize is intended and use the new shifted value.
*/
static unsigned
get_base_dim(unsigned old_base_dim, unsigned new_level_dim, unsigned level)
{
const unsigned old_level_dim = old_base_dim >> level;
const unsigned new_base_dim = new_level_dim << level;
return old_level_dim == new_level_dim ? old_base_dim : new_base_dim;
}
/* Work back from the specified level of the image to the baselevel and create a
* miptree of that size.
*/
struct brw_mipmap_tree *
brw_miptree_create_for_teximage(struct brw_context *brw,
struct brw_texture_object *brw_obj,
struct brw_texture_image *brw_image,
enum brw_miptree_create_flags flags)
{
GLuint lastLevel;
int width, height, depth;
unsigned old_width = 0, old_height = 0, old_depth = 0;
const struct brw_mipmap_tree *old_mt = brw_obj->mt;
const unsigned level = brw_image->base.Base.Level;
brw_get_image_dims(&brw_image->base.Base, &width, &height, &depth);
if (old_mt) {
old_width = old_mt->surf.logical_level0_px.width;
old_height = old_mt->surf.logical_level0_px.height;
old_depth = old_mt->surf.dim == ISL_SURF_DIM_3D ?
old_mt->surf.logical_level0_px.depth :
old_mt->surf.logical_level0_px.array_len;
}
DBG("%s\n", __func__);
/* Figure out image dimensions at start level. */
switch(brw_obj->base.Target) {
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
case GL_TEXTURE_RECTANGLE:
case GL_TEXTURE_EXTERNAL_OES:
assert(level == 0);
break;
case GL_TEXTURE_3D:
depth = old_mt ? get_base_dim(old_depth, depth, level) :
depth << level;
FALLTHROUGH;
case GL_TEXTURE_2D:
case GL_TEXTURE_2D_ARRAY:
case GL_TEXTURE_CUBE_MAP:
case GL_TEXTURE_CUBE_MAP_ARRAY:
height = old_mt ? get_base_dim(old_height, height, level) :
height << level;
FALLTHROUGH;
case GL_TEXTURE_1D:
case GL_TEXTURE_1D_ARRAY:
width = old_mt ? get_base_dim(old_width, width, level) :
width << level;
break;
default:
unreachable("Unexpected target");
}
/* Guess a reasonable value for lastLevel. This is probably going
* to be wrong fairly often and might mean that we have to look at
* resizable buffers, or require that buffers implement lazy
* pagetable arrangements.
*/
if ((brw_obj->base.Sampler.Attrib.MinFilter == GL_NEAREST ||
brw_obj->base.Sampler.Attrib.MinFilter == GL_LINEAR) &&
brw_image->base.Base.Level == 0 &&
!brw_obj->base.Attrib.GenerateMipmap) {
lastLevel = 0;
} else {
lastLevel = _mesa_get_tex_max_num_levels(brw_obj->base.Target,
width, height, depth) - 1;
}
return brw_miptree_create(brw,
brw_obj->base.Target,
brw_image->base.Base.TexFormat,
0,
lastLevel,
width,
height,
depth,
MAX2(brw_image->base.Base.NumSamples, 1),
flags);
}
static bool
brw_texsubimage_blorp(struct brw_context *brw, GLuint dims,
struct gl_texture_image *tex_image,
unsigned x, unsigned y, unsigned z,
unsigned width, unsigned height, unsigned depth,
GLenum format, GLenum type, const void *pixels,
const struct gl_pixelstore_attrib *packing)
{
struct brw_texture_image *intel_image = brw_texture_image(tex_image);
const unsigned mt_level = tex_image->Level + tex_image->TexObject->Attrib.MinLevel;
const unsigned mt_z = tex_image->TexObject->Attrib.MinLayer + tex_image->Face + z;
/* The blorp path can't understand crazy format hackery */
if (_mesa_base_tex_format(&brw->ctx, tex_image->InternalFormat) !=
_mesa_get_format_base_format(tex_image->TexFormat))
return false;
return brw_blorp_upload_miptree(brw, intel_image->mt, tex_image->TexFormat,
mt_level, x, y, mt_z, width, height, depth,
tex_image->TexObject->Target, format, type,
pixels, packing);
}
/**
* \brief A fast path for glTexImage and glTexSubImage.
*
* This fast path is taken when the texture format is BGRA, RGBA,
* A or L and when the texture memory is X- or Y-tiled. It uploads
* the texture data by mapping the texture memory without a GTT fence, thus
* acquiring a tiled view of the memory, and then copying sucessive
* spans within each tile.
*
* This is a performance win over the conventional texture upload path because
* it avoids the performance penalty of writing through the write-combine
* buffer. In the conventional texture upload path,
* texstore.c:store_texsubimage(), the texture memory is mapped through a GTT
* fence, thus acquiring a linear view of the memory, then each row in the
* image is memcpy'd. In this fast path, we replace each row's copy with
* a sequence of copies over each linear span in tile.
*
* One use case is Google Chrome's paint rectangles. Chrome (as
* of version 21) renders each page as a tiling of 256x256 GL_BGRA textures.
* Each page's content is initially uploaded with glTexImage2D and damaged
* regions are updated with glTexSubImage2D. On some workloads, the
* performance gain of this fastpath on Sandybridge is over 5x.
*/
static bool
brw_texsubimage_tiled_memcpy(struct gl_context * ctx,
GLuint dims,
struct gl_texture_image *texImage,
GLint xoffset, GLint yoffset, GLint zoffset,
GLsizei width, GLsizei height, GLsizei depth,
GLenum format, GLenum type,
const GLvoid *pixels,
const struct gl_pixelstore_attrib *packing)
{
struct brw_context *brw = brw_context(ctx);
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct brw_texture_image *image = brw_texture_image(texImage);
int src_pitch;
/* The miptree's buffer. */
struct brw_bo *bo;
uint32_t cpp;
isl_memcpy_type copy_type;
/* This fastpath is restricted to specific texture types:
* a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
* more types.
*
* FINISHME: The restrictions below on packing alignment and packing row
* length are likely unneeded now because we calculate the source stride
* with _mesa_image_row_stride. However, before removing the restrictions
* we need tests.
*/
if (!devinfo->has_llc ||
!(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
!(texImage->TexObject->Target == GL_TEXTURE_2D ||
texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
pixels == NULL ||
packing->BufferObj ||
packing->Alignment > 4 ||
packing->SkipPixels > 0 ||
packing->SkipRows > 0 ||
(packing->RowLength != 0 && packing->RowLength != width) ||
packing->SwapBytes ||
packing->LsbFirst ||
packing->Invert)
return false;
/* Only a simple blit, no scale, bias or other mapping. */
if (ctx->_ImageTransferState)
return false;
copy_type = brw_miptree_get_memcpy_type(texImage->TexFormat, format, type,
&cpp);
if (copy_type == ISL_MEMCPY_INVALID)
return false;
/* If this is a nontrivial texture view, let another path handle it instead. */
if (texImage->TexObject->Attrib.MinLayer)
return false;
if (!image->mt ||
(image->mt->surf.tiling != ISL_TILING_X &&
image->mt->surf.tiling != ISL_TILING_Y0)) {
/* The algorithm is written only for X- or Y-tiled memory. */
return false;
}
/* linear_to_tiled() assumes that if the object is swizzled, it is using
* I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y. This is only
* true on gfx5 and above.
*
* The killer on top is that some gfx4 have an L-shaped swizzle mode, where
* parts of the memory aren't swizzled at all. Userspace just can't handle
* that.
*/
if (devinfo->ver < 5 && devinfo->has_bit6_swizzle)
return false;
int level = texImage->Level + texImage->TexObject->Attrib.MinLevel;
/* Since we are going to write raw data to the miptree, we need to resolve
* any pending fast color clears before we start.
*/
assert(image->mt->surf.logical_level0_px.depth == 1);
assert(image->mt->surf.logical_level0_px.array_len == 1);
brw_miptree_access_raw(brw, image->mt, level, 0, true);
bo = image->mt->bo;
if (brw_batch_references(&brw->batch, bo)) {
perf_debug("Flushing before mapping a referenced bo.\n");
brw_batch_flush(brw);
}
void *map = brw_bo_map(brw, bo, MAP_WRITE | MAP_RAW);
if (map == NULL) {
DBG("%s: failed to map bo\n", __func__);
return false;
}
src_pitch = _mesa_image_row_stride(packing, width, format, type);
/* We postponed printing this message until having committed to executing
* the function.
*/
DBG("%s: level=%d offset=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
"mesa_format=0x%x tiling=%d "
"packing=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d) ",
__func__, texImage->Level, xoffset, yoffset, width, height,
format, type, texImage->TexFormat, image->mt->surf.tiling,
packing->Alignment, packing->RowLength, packing->SkipPixels,
packing->SkipRows);
/* Adjust x and y offset based on miplevel */
unsigned level_x, level_y;
brw_miptree_get_image_offset(image->mt, level, 0, &level_x, &level_y);
xoffset += level_x;
yoffset += level_y;
isl_memcpy_linear_to_tiled(
xoffset * cpp, (xoffset + width) * cpp,
yoffset, yoffset + height,
map,
pixels,
image->mt->surf.row_pitch_B, src_pitch,
devinfo->has_bit6_swizzle,
image->mt->surf.tiling,
copy_type
);
brw_bo_unmap(bo);
return true;
}
static void
brw_upload_tex(struct gl_context * ctx,
GLuint dims,
struct gl_texture_image *texImage,
GLint xoffset, GLint yoffset, GLint zoffset,
GLsizei width, GLsizei height, GLsizei depth,
GLenum format, GLenum type,
const GLvoid * pixels,
const struct gl_pixelstore_attrib *packing)
{
struct brw_context *brw = brw_context(ctx);
struct brw_mipmap_tree *mt = brw_texture_image(texImage)->mt;
bool ok;
/* Check that there is actually data to store. */
if (pixels == NULL && !packing->BufferObj)
return;
bool tex_busy = mt &&
(brw_batch_references(&brw->batch, mt->bo) || brw_bo_busy(mt->bo));
if (packing->BufferObj || tex_busy ||
mt->aux_usage == ISL_AUX_USAGE_CCS_E) {
ok = brw_texsubimage_blorp(brw, dims, texImage,
xoffset, yoffset, zoffset,
width, height, depth, format, type,
pixels, packing);
if (ok)
return;
}
ok = brw_texsubimage_tiled_memcpy(ctx, dims, texImage,
xoffset, yoffset, zoffset,
width, height, depth,
format, type, pixels, packing);
if (ok)
return;
_mesa_store_texsubimage(ctx, dims, texImage,
xoffset, yoffset, zoffset,
width, height, depth,
format, type, pixels, packing);
}
static void
brw_teximage(struct gl_context * ctx,
GLuint dims,
struct gl_texture_image *texImage,
GLenum format, GLenum type, const void *pixels,
const struct gl_pixelstore_attrib *unpack)
{
DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
__func__, _mesa_get_format_name(texImage->TexFormat),
_mesa_enum_to_string(texImage->TexObject->Target),
_mesa_enum_to_string(format), _mesa_enum_to_string(type),
texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
/* Allocate storage for texture data. */
if (!ctx->Driver.AllocTextureImageBuffer(ctx, texImage)) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage%uD", dims);
return;
}
assert(brw_texture_image(texImage)->mt);
brw_upload_tex(ctx, dims, texImage, 0, 0, 0,
texImage->Width, texImage->Height, texImage->Depth,
format, type, pixels, unpack);
}
static void
brw_texsubimage(struct gl_context * ctx,
GLuint dims,
struct gl_texture_image *texImage,
GLint xoffset, GLint yoffset, GLint zoffset,
GLsizei width, GLsizei height, GLsizei depth,
GLenum format, GLenum type,
const GLvoid * pixels,
const struct gl_pixelstore_attrib *packing)
{
DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
__func__, _mesa_get_format_name(texImage->TexFormat),
_mesa_enum_to_string(texImage->TexObject->Target),
_mesa_enum_to_string(format), _mesa_enum_to_string(type),
texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
brw_upload_tex(ctx, dims, texImage, xoffset, yoffset, zoffset,
width, height, depth, format, type, pixels, packing);
}
static void
brw_set_texture_image_mt(struct brw_context *brw,
struct gl_texture_image *image,
GLenum internal_format,
mesa_format format,
struct brw_mipmap_tree *mt)
{
struct gl_texture_object *texobj = image->TexObject;
struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
struct brw_texture_image *intel_image = brw_texture_image(image);
_mesa_init_teximage_fields(&brw->ctx, image,
mt->surf.logical_level0_px.width,
mt->surf.logical_level0_px.height, 1,
0, internal_format, format);
brw->ctx.Driver.FreeTextureImageBuffer(&brw->ctx, image);
intel_texobj->needs_validate = true;
intel_image->base.RowStride = mt->surf.row_pitch_B / mt->cpp;
assert(mt->surf.row_pitch_B % mt->cpp == 0);
brw_miptree_reference(&intel_image->mt, mt);
/* Immediately validate the image to the object. */
brw_miptree_reference(&intel_texobj->mt, mt);
}
void
brw_set_texbuffer2(__DRIcontext *pDRICtx, GLint target,
GLint texture_format,
__DRIdrawable *dPriv)
{
struct gl_framebuffer *fb = dPriv->driverPrivate;
struct brw_context *brw = pDRICtx->driverPrivate;
struct gl_context *ctx = &brw->ctx;
struct brw_renderbuffer *rb;
struct gl_texture_object *texObj;
struct gl_texture_image *texImage;
mesa_format texFormat = MESA_FORMAT_NONE;
GLenum internal_format = 0;
_mesa_glthread_finish(ctx);
texObj = _mesa_get_current_tex_object(ctx, target);
if (!texObj)
return;
if (dPriv->lastStamp != dPriv->dri2.stamp ||
!pDRICtx->driScreenPriv->dri2.useInvalidate)
brw_update_renderbuffers(pDRICtx, dPriv);
rb = brw_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
/* If the miptree isn't set, then intel_update_renderbuffers was unable
* to get the BO for the drawable from the window system.
*/
if (!rb || !rb->mt)
return;
/* Neither the EGL and GLX texture_from_pixmap specs say anything about
* sRGB. They are both from a time where sRGB was considered an extra
* encoding step you did as part of rendering/blending and not a format.
* Even though we have concept of sRGB visuals, X has classically assumed
* that your data is just bits and sRGB rendering is entirely a client-side
* rendering construct. The assumption is that the result of BindTexImage
* is a texture with a linear format even if it was rendered with sRGB
* encoding enabled.
*/
texFormat = _mesa_get_srgb_format_linear(brw_rb_format(rb));
if (rb->mt->cpp == 4) {
/* The extra texture_format parameter indicates whether the alpha
* channel should be respected or ignored. If we set internal_format to
* GL_RGB, the texture handling code is smart enough to swap the format
* or apply a swizzle if the underlying format is RGBA so we don't need
* to stomp it to RGBX or anything like that.
*/
if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
internal_format = GL_RGB;
else
internal_format = GL_RGBA;
} else if (rb->mt->cpp == 2) {
internal_format = GL_RGB;
}
brw_miptree_finish_external(brw, rb->mt);
_mesa_lock_texture(&brw->ctx, texObj);
texImage = _mesa_get_tex_image(ctx, texObj, target, 0);
brw_set_texture_image_mt(brw, texImage, internal_format,
texFormat, rb->mt);
_mesa_unlock_texture(&brw->ctx, texObj);
}
void
brw_release_texbuffer(__DRIcontext *pDRICtx, GLint target,
__DRIdrawable *dPriv)
{
struct brw_context *brw = pDRICtx->driverPrivate;
struct gl_context *ctx = &brw->ctx;
struct gl_texture_object *tex_obj;
struct brw_texture_object *intel_tex;
tex_obj = _mesa_get_current_tex_object(ctx, target);
if (!tex_obj)
return;
_mesa_lock_texture(&brw->ctx, tex_obj);
intel_tex = brw_texture_object(tex_obj);
if (!intel_tex->mt) {
_mesa_unlock_texture(&brw->ctx, tex_obj);
return;
}
/* The brw_miptree_prepare_external below as well as the finish_external
* above in brw_set_texbuffer2 *should* do nothing. The BindTexImage call
* from both GLX and EGL has TexImage2D and not TexSubImage2D semantics so
* the texture is not immutable. This means that the user cannot create a
* texture view of the image with a different format. Since the only three
* formats available when using BindTexImage are all UNORM, we can never
* end up with an sRGB format being used for texturing and so we shouldn't
* get any format-related resolves when texturing from it.
*
* While very unlikely, it is possible that the client could use the bound
* texture with GL_ARB_image_load_store. In that case, we'll do a resolve
* but that's not actually a problem as it just means that we lose
* compression on this texture until the next time it's used as a render
* target.
*
* The only other way we could end up with an unexpected aux usage would be
* if we rendered to the image from the same context as we have it bound as
* a texture between BindTexImage and ReleaseTexImage. However, the spec
* clearly calls this case out and says you shouldn't do that. It doesn't
* explicitly prevent binding the texture to a framebuffer but it says the
* results of trying to render to it while bound are undefined.
*
* Just to keep everything safe and sane, we do a prepare_external but it
* should be a no-op in almost all cases. On the off chance that someone
* ever triggers this, we should at least warn them.
*/
if (intel_tex->mt->aux_buf &&
brw_miptree_get_aux_state(intel_tex->mt, 0, 0) !=
isl_drm_modifier_get_default_aux_state(intel_tex->mt->drm_modifier)) {
_mesa_warning(ctx, "Aux state changed between BindTexImage and "
"ReleaseTexImage. Most likely someone tried to draw "
"to the pixmap bound in BindTexImage or used it with "
"image_load_store.");
}
brw_miptree_prepare_external(brw, intel_tex->mt);
_mesa_unlock_texture(&brw->ctx, tex_obj);
}
static GLboolean
brw_bind_renderbuffer_tex_image(struct gl_context *ctx,
struct gl_renderbuffer *rb,
struct gl_texture_image *image)
{
struct brw_renderbuffer *irb = brw_renderbuffer(rb);
struct brw_texture_image *intel_image = brw_texture_image(image);
struct gl_texture_object *texobj = image->TexObject;
struct brw_texture_object *intel_texobj = brw_texture_object(texobj);
/* We can only handle RB allocated with AllocRenderbufferStorage, or
* window-system renderbuffers.
*/
assert(!rb->TexImage);
if (!irb->mt)
return false;
_mesa_lock_texture(ctx, texobj);
_mesa_init_teximage_fields(ctx, image, rb->Width, rb->Height, 1, 0,
rb->InternalFormat, rb->Format);
image->NumSamples = rb->NumSamples;
brw_miptree_reference(&intel_image->mt, irb->mt);
/* Immediately validate the image to the object. */
brw_miptree_reference(&intel_texobj->mt, intel_image->mt);
intel_texobj->needs_validate = true;
_mesa_unlock_texture(ctx, texobj);
return true;
}
void
brw_set_texbuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
{
/* The old interface didn't have the format argument, so copy our
* implementation's behavior at the time.
*/
brw_set_texbuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
}
static void
brw_image_target_texture(struct gl_context *ctx, GLenum target,
struct gl_texture_object *texObj,
struct gl_texture_image *texImage,
GLeglImageOES image_handle,
bool storage)
{
struct brw_context *brw = brw_context(ctx);
struct brw_mipmap_tree *mt;
__DRIscreen *dri_screen = brw->screen->driScrnPriv;
__DRIimage *image;
image = dri_screen->dri2.image->lookupEGLImage(dri_screen, image_handle,
dri_screen->loaderPrivate);
if (image == NULL)
return;
/* Disallow depth/stencil textures: we don't have a way to pass the
* separate stencil miptree of a GL_DEPTH_STENCIL texture through.
*/
if (image->has_depthstencil) {
_mesa_error(ctx, GL_INVALID_OPERATION, __func__);
return;
}
mt = brw_miptree_create_for_dri_image(brw, image, target, image->format,
false);
if (mt == NULL)
return;
struct brw_texture_object *intel_texobj = brw_texture_object(texObj);
intel_texobj->planar_format = image->planar_format;
intel_texobj->yuv_color_space = image->yuv_color_space;
GLenum internal_format =
image->internal_format != 0 ?
image->internal_format : _mesa_get_format_base_format(mt->format);
/* Fix the internal format when _mesa_get_format_base_format(mt->format)
* isn't a valid one for that particular format.
*/
if (brw->mesa_format_supports_render[image->format]) {
if (image->format == MESA_FORMAT_R10G10B10A2_UNORM ||
image->format == MESA_FORMAT_R10G10B10X2_UNORM ||
image->format == MESA_FORMAT_B10G10R10A2_UNORM ||
image->format == MESA_FORMAT_B10G10R10X2_UNORM)
internal_format = GL_RGB10_A2;
}
/* Guess sized internal format for dma-bufs, as specified by
* EXT_EGL_image_storage.
*/
if (storage && target == GL_TEXTURE_2D && image->imported_dmabuf) {
internal_format = driGLFormatToSizedInternalGLFormat(image->format);
if (internal_format == GL_NONE) {
_mesa_error(ctx, GL_INVALID_OPERATION, __func__);
return;
}
}
brw_set_texture_image_mt(brw, texImage, internal_format, mt->format, mt);
brw_miptree_release(&mt);
}
static void
brw_image_target_texture_2d(struct gl_context *ctx, GLenum target,
struct gl_texture_object *texObj,
struct gl_texture_image *texImage,
GLeglImageOES image_handle)
{
brw_image_target_texture(ctx, target, texObj, texImage, image_handle,
false);
}
static void
brw_image_target_tex_storage(struct gl_context *ctx, GLenum target,
struct gl_texture_object *texObj,
struct gl_texture_image *texImage,
GLeglImageOES image_handle)
{
struct brw_texture_object *intel_texobj = brw_texture_object(texObj);
brw_image_target_texture(ctx, target, texObj, texImage, image_handle,
true);
/* The miptree is in a validated state, so no need to check later. */
intel_texobj->needs_validate = false;
intel_texobj->validated_first_level = 0;
intel_texobj->validated_last_level = 0;
intel_texobj->_Format = texImage->TexFormat;
}
static bool
brw_gettexsubimage_blorp(struct brw_context *brw,
struct gl_texture_image *tex_image,
unsigned x, unsigned y, unsigned z,
unsigned width, unsigned height, unsigned depth,
GLenum format, GLenum type, const void *pixels,
const struct gl_pixelstore_attrib *packing)
{
struct brw_texture_image *intel_image = brw_texture_image(tex_image);
const unsigned mt_level = tex_image->Level + tex_image->TexObject->Attrib.MinLevel;
const unsigned mt_z = tex_image->TexObject->Attrib.MinLayer + tex_image->Face + z;
/* The blorp path can't understand crazy format hackery */
if (_mesa_base_tex_format(&brw->ctx, tex_image->InternalFormat) !=
_mesa_get_format_base_format(tex_image->TexFormat))
return false;
return brw_blorp_download_miptree(brw, intel_image->mt,
tex_image->TexFormat, SWIZZLE_XYZW,
mt_level, x, y, mt_z,
width, height, depth,
tex_image->TexObject->Target,
format, type, false, pixels, packing);
}
/**
* \brief A fast path for glGetTexImage.
*
* \see brw_readpixels_tiled_memcpy()
*/
static bool
brw_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
struct gl_texture_image *texImage,
GLint xoffset, GLint yoffset,
GLsizei width, GLsizei height,
GLenum format, GLenum type,
GLvoid *pixels,
const struct gl_pixelstore_attrib *packing)
{
struct brw_context *brw = brw_context(ctx);
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct brw_texture_image *image = brw_texture_image(texImage);
int dst_pitch;
/* The miptree's buffer. */
struct brw_bo *bo;
uint32_t cpp;
isl_memcpy_type copy_type;
/* This fastpath is restricted to specific texture types:
* a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
* more types.
*
* FINISHME: The restrictions below on packing alignment and packing row
* length are likely unneeded now because we calculate the destination stride
* with _mesa_image_row_stride. However, before removing the restrictions
* we need tests.
*/
if (!devinfo->has_llc ||
!(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
!(texImage->TexObject->Target == GL_TEXTURE_2D ||
texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
pixels == NULL ||
packing->BufferObj ||
packing->Alignment > 4 ||
packing->SkipPixels > 0 ||
packing->SkipRows > 0 ||
(packing->RowLength != 0 && packing->RowLength != width) ||
packing->SwapBytes ||
packing->LsbFirst ||
packing->Invert)
return false;
/* We can't handle copying from RGBX or BGRX because the tiled_memcpy
* function doesn't set the last channel to 1. Note this checks BaseFormat
* rather than TexFormat in case the RGBX format is being simulated with an
* RGBA format.
*/
if (texImage->_BaseFormat == GL_RGB)
return false;
copy_type = brw_miptree_get_memcpy_type(texImage->TexFormat, format, type,
&cpp);
if (copy_type == ISL_MEMCPY_INVALID)
return false;
/* If this is a nontrivial texture view, let another path handle it instead. */
if (texImage->TexObject->Attrib.MinLayer)
return false;
if (!image->mt ||
(image->mt->surf.tiling != ISL_TILING_X &&
image->mt->surf.tiling != ISL_TILING_Y0)) {
/* The algorithm is written only for X- or Y-tiled memory. */
return false;
}
/* tiled_to_linear() assumes that if the object is swizzled, it is using
* I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y. This is only
* true on gfx5 and above.
*
* The killer on top is that some gfx4 have an L-shaped swizzle mode, where
* parts of the memory aren't swizzled at all. Userspace just can't handle
* that.
*/
if (devinfo->ver < 5 && devinfo->has_bit6_swizzle)
return false;
int level = texImage->Level + texImage->TexObject->Attrib.MinLevel;
/* Since we are going to write raw data to the miptree, we need to resolve
* any pending fast color clears before we start.
*/
assert(image->mt->surf.logical_level0_px.depth == 1);
assert(image->mt->surf.logical_level0_px.array_len == 1);
brw_miptree_access_raw(brw, image->mt, level, 0, true);
bo = image->mt->bo;
if (brw_batch_references(&brw->batch, bo)) {
perf_debug("Flushing before mapping a referenced bo.\n");
brw_batch_flush(brw);
}
void *map = brw_bo_map(brw, bo, MAP_READ | MAP_RAW);
if (map == NULL) {
DBG("%s: failed to map bo\n", __func__);
return false;
}
dst_pitch = _mesa_image_row_stride(packing, width, format, type);
DBG("%s: level=%d x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
"mesa_format=0x%x tiling=%d "
"packing=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n",
__func__, texImage->Level, xoffset, yoffset, width, height,
format, type, texImage->TexFormat, image->mt->surf.tiling,
packing->Alignment, packing->RowLength, packing->SkipPixels,
packing->SkipRows);
/* Adjust x and y offset based on miplevel */
unsigned level_x, level_y;
brw_miptree_get_image_offset(image->mt, level, 0, &level_x, &level_y);
xoffset += level_x;
yoffset += level_y;
isl_memcpy_tiled_to_linear(
xoffset * cpp, (xoffset + width) * cpp,
yoffset, yoffset + height,
pixels,
map,
dst_pitch, image->mt->surf.row_pitch_B,
devinfo->has_bit6_swizzle,
image->mt->surf.tiling,
copy_type
);
brw_bo_unmap(bo);
return true;
}
static void
brw_get_tex_sub_image(struct gl_context *ctx,
GLint xoffset, GLint yoffset, GLint zoffset,
GLsizei width, GLsizei height, GLint depth,
GLenum format, GLenum type, GLvoid *pixels,
struct gl_texture_image *texImage)
{
struct brw_context *brw = brw_context(ctx);
bool ok;
DBG("%s\n", __func__);
if (ctx->Pack.BufferObj) {
if (brw_gettexsubimage_blorp(brw, texImage,
xoffset, yoffset, zoffset,
width, height, depth, format, type,
pixels, &ctx->Pack))
return;
perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
}
ok = brw_gettexsubimage_tiled_memcpy(ctx, texImage, xoffset, yoffset,
width, height,
format, type, pixels, &ctx->Pack);
if(ok)
return;
_mesa_meta_GetTexSubImage(ctx, xoffset, yoffset, zoffset,
width, height, depth,
format, type, pixels, texImage);
DBG("%s - DONE\n", __func__);
}
static void
flush_astc_denorms(struct gl_context *ctx, GLuint dims,
struct gl_texture_image *texImage,
GLint xoffset, GLint yoffset, GLint zoffset,
GLsizei width, GLsizei height, GLsizei depth)
{
struct compressed_pixelstore store;
_mesa_compute_compressed_pixelstore(dims, texImage->TexFormat,
width, height, depth,
&ctx->Unpack, &store);
for (int slice = 0; slice < store.CopySlices; slice++) {
/* Map dest texture buffer */
GLubyte *dstMap;
GLint dstRowStride;
ctx->Driver.MapTextureImage(ctx, texImage, slice + zoffset,
xoffset, yoffset, width, height,
GL_MAP_READ_BIT | GL_MAP_WRITE_BIT,
&dstMap, &dstRowStride);
if (!dstMap)
continue;
for (int i = 0; i < store.CopyRowsPerSlice; i++) {
/* An ASTC block is stored in little endian mode. The byte that
* contains bits 0..7 is stored at the lower address in memory.
*/
struct astc_void_extent {
uint16_t header : 12;
uint16_t dontcare[3];
uint16_t R;
uint16_t G;
uint16_t B;
uint16_t A;
} *blocks = (struct astc_void_extent*) dstMap;
/* Iterate over every copied block in the row */
for (int j = 0; j < store.CopyBytesPerRow / 16; j++) {
/* Check if the header matches that of an LDR void-extent block */
if (blocks[j].header == 0xDFC) {
/* Flush UNORM16 values that would be denormalized */
if (blocks[j].A < 4) blocks[j].A = 0;
if (blocks[j].B < 4) blocks[j].B = 0;
if (blocks[j].G < 4) blocks[j].G = 0;
if (blocks[j].R < 4) blocks[j].R = 0;
}
}
dstMap += dstRowStride;
}
ctx->Driver.UnmapTextureImage(ctx, texImage, slice + zoffset);
}
}
static void
brw_compressedtexsubimage(struct gl_context *ctx, GLuint dims,
struct gl_texture_image *texImage,
GLint xoffset, GLint yoffset, GLint zoffset,
GLsizei width, GLsizei height, GLsizei depth,
GLenum format,
GLsizei imageSize, const GLvoid *data)
{
/* Upload the compressed data blocks */
_mesa_store_compressed_texsubimage(ctx, dims, texImage,
xoffset, yoffset, zoffset,
width, height, depth,
format, imageSize, data);
/* Fix up copied ASTC blocks if necessary */
GLenum gl_format = _mesa_compressed_format_to_glenum(ctx,
texImage->TexFormat);
bool is_linear_astc = _mesa_is_astc_format(gl_format) &&
!_mesa_is_srgb_format(gl_format);
struct brw_context *brw = (struct brw_context*) ctx;
const struct intel_device_info *devinfo = &brw->screen->devinfo;
if (devinfo->ver == 9 &&
!intel_device_info_is_9lp(devinfo) &&
is_linear_astc)
flush_astc_denorms(ctx, dims, texImage,
xoffset, yoffset, zoffset,
width, height, depth);
}
void
brw_init_texture_image_functions(struct dd_function_table *functions)
{
functions->TexImage = brw_teximage;
functions->TexSubImage = brw_texsubimage;
functions->CompressedTexSubImage = brw_compressedtexsubimage;
functions->EGLImageTargetTexture2D = brw_image_target_texture_2d;
functions->EGLImageTargetTexStorage = brw_image_target_tex_storage;
functions->BindRenderbufferTexImage = brw_bind_renderbuffer_tex_image;
functions->GetTexSubImage = brw_get_tex_sub_image;
}

View file

@ -1,101 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _BRW_TEX_OBJ_H
#define _BRW_TEX_OBJ_H
#include "swrast/s_context.h"
#ifdef __cplusplus
extern "C" {
#endif
struct brw_texture_object
{
struct gl_texture_object base;
/* This is a mirror of base._MaxLevel, updated at validate time,
* except that we don't bother with the non-base levels for
* non-mipmapped textures.
*/
unsigned int _MaxLevel;
unsigned int validated_first_level;
unsigned int validated_last_level;
/* The miptree of pixel data for the texture (if !needs_validate). After
* validation, the images will also have references to the same mt.
*/
struct brw_mipmap_tree *mt;
/**
* Set when mipmap trees in the texture images of this texture object
* might not all be the mipmap tree above.
*/
bool needs_validate;
/* Mesa format for the validated texture object. For non-views this
* will always be the same as texObj->Image[0][0].TexFormat. For views, it
* may differ since the mt is shared across views with differing formats.
*/
mesa_format _Format;
const struct brw_image_format *planar_format;
unsigned int yuv_color_space;
};
/**
* brw_texture_image is a subclass of swrast_texture_image because we
* sometimes fall back to using the swrast module for software rendering.
*/
struct brw_texture_image
{
struct swrast_texture_image base;
/* If brw_image->mt != NULL, image data is stored here.
* Else if brw_image->base.Buffer != NULL, image is stored there.
* Else there is no image data.
*/
struct brw_mipmap_tree *mt;
};
static inline struct brw_texture_object *
brw_texture_object(struct gl_texture_object *obj)
{
return (struct brw_texture_object *) obj;
}
static inline struct brw_texture_image *
brw_texture_image(struct gl_texture_image *img)
{
return (struct brw_texture_image *) img;
}
#ifdef __cplusplus
}
#endif
#endif /* _BRW_TEX_OBJ_H */

View file

@ -1,223 +0,0 @@
/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/samplerobj.h"
#include "main/teximage.h"
#include "main/texobj.h"
#include "brw_context.h"
#include "brw_mipmap_tree.h"
#include "brw_tex.h"
#define FILE_DEBUG_FLAG DEBUG_TEXTURE
/**
* Sets our driver-specific variant of tObj->_MaxLevel for later surface state
* upload.
*
* If we're only ensuring that there is storage for the first miplevel of a
* texture, then in texture setup we're going to have to make sure we don't
* allow sampling beyond level 0.
*/
static void
brw_update_max_level(struct gl_texture_object *tObj,
struct gl_sampler_object *sampler)
{
struct brw_texture_object *brw_obj = brw_texture_object(tObj);
if (!tObj->_MipmapComplete ||
(tObj->_RenderToTexture &&
(sampler->Attrib.MinFilter == GL_NEAREST ||
sampler->Attrib.MinFilter == GL_LINEAR))) {
brw_obj->_MaxLevel = tObj->Attrib.BaseLevel;
} else {
brw_obj->_MaxLevel = tObj->_MaxLevel;
}
}
/**
* At rendering-from-a-texture time, make sure that the texture object has a
* miptree that can hold the entire texture based on
* BaseLevel/MaxLevel/filtering, and copy in any texture images that are
* stored in other miptrees.
*/
void
brw_finalize_mipmap_tree(struct brw_context *brw,
struct gl_texture_object *tObj)
{
struct brw_texture_object *brw_obj = brw_texture_object(tObj);
GLuint face, i;
GLuint nr_faces = 0;
struct brw_texture_image *firstImage;
int width, height, depth;
/* TBOs require no validation -- they always just point to their BO. */
if (tObj->Target == GL_TEXTURE_BUFFER)
return;
/* What levels does this validated texture image require? */
int validate_first_level = tObj->Attrib.BaseLevel;
int validate_last_level = brw_obj->_MaxLevel;
/* Skip the loop over images in the common case of no images having
* changed. But if the GL_BASE_LEVEL or GL_MAX_LEVEL change to something we
* haven't looked at, then we do need to look at those new images.
*/
if (!brw_obj->needs_validate &&
validate_first_level >= brw_obj->validated_first_level &&
validate_last_level <= brw_obj->validated_last_level) {
return;
}
/* On recent generations, immutable textures should not get this far
* -- they should have been created in a validated state, and nothing
* can invalidate them.
*
* Unfortunately, this is not true on pre-Sandybridge hardware -- when
* rendering into an immutable-format depth texture we may have to rebase
* the rendered levels to meet alignment requirements.
*
* FINISHME: Avoid doing this.
*/
assert(!tObj->Immutable || brw->screen->devinfo.ver < 6);
firstImage = brw_texture_image(tObj->Image[0][tObj->Attrib.BaseLevel]);
if (!firstImage)
return;
/* Check tree can hold all active levels. Check tree matches
* target, imageFormat, etc.
*/
if (brw_obj->mt &&
(!brw_miptree_match_image(brw_obj->mt, &firstImage->base.Base) ||
validate_first_level < brw_obj->mt->first_level ||
validate_last_level > brw_obj->mt->last_level)) {
brw_miptree_release(&brw_obj->mt);
}
/* May need to create a new tree:
*/
if (!brw_obj->mt) {
const unsigned level = firstImage->base.Base.Level;
brw_get_image_dims(&firstImage->base.Base, &width, &height, &depth);
/* Figure out image dimensions at start level. */
switch(brw_obj->base.Target) {
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
case GL_TEXTURE_RECTANGLE:
case GL_TEXTURE_EXTERNAL_OES:
assert(level == 0);
break;
case GL_TEXTURE_3D:
depth = depth << level;
FALLTHROUGH;
case GL_TEXTURE_2D:
case GL_TEXTURE_2D_ARRAY:
case GL_TEXTURE_CUBE_MAP:
case GL_TEXTURE_CUBE_MAP_ARRAY:
height = height << level;
FALLTHROUGH;
case GL_TEXTURE_1D:
case GL_TEXTURE_1D_ARRAY:
width = width << level;
break;
default:
unreachable("Unexpected target");
}
perf_debug("Creating new %s %dx%dx%d %d-level miptree to handle "
"finalized texture miptree.\n",
_mesa_get_format_name(firstImage->base.Base.TexFormat),
width, height, depth, validate_last_level + 1);
brw_obj->mt = brw_miptree_create(brw,
brw_obj->base.Target,
firstImage->base.Base.TexFormat,
0, /* first_level */
validate_last_level,
width,
height,
depth,
1 /* num_samples */,
MIPTREE_CREATE_BUSY);
if (!brw_obj->mt)
return;
}
/* Pull in any images not in the object's tree:
*/
nr_faces = _mesa_num_tex_faces(brw_obj->base.Target);
for (face = 0; face < nr_faces; face++) {
for (i = validate_first_level; i <= validate_last_level; i++) {
struct brw_texture_image *brw_image =
brw_texture_image(brw_obj->base.Image[face][i]);
/* skip too small size mipmap */
if (brw_image == NULL)
break;
if (brw_obj->mt != brw_image->mt)
brw_miptree_copy_teximage(brw, brw_image, brw_obj->mt);
/* After we're done, we'd better agree that our layout is
* appropriate, or we'll end up hitting this function again on the
* next draw
*/
assert(brw_miptree_match_image(brw_obj->mt, &brw_image->base.Base));
}
}
brw_obj->validated_first_level = validate_first_level;
brw_obj->validated_last_level = validate_last_level;
brw_obj->_Format = firstImage->base.Base.TexFormat,
brw_obj->needs_validate = false;
}
/**
* Finalizes all textures, completing any rendering that needs to be done
* to prepare them.
*/
void
brw_validate_textures(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
const int max_enabled_unit = ctx->Texture._MaxEnabledTexImageUnit;
for (int unit = 0; unit <= max_enabled_unit; unit++) {
struct gl_texture_object *tex_obj = ctx->Texture.Unit[unit]._Current;
if (!tex_obj)
continue;
struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
/* We know that this is true by now, and if it wasn't, we might have
* mismatched level sizes and the copies would fail.
*/
assert(tex_obj->_BaseComplete);
brw_update_max_level(tex_obj, sampler);
brw_finalize_mipmap_tree(brw, tex_obj);
}
}

View file

@ -1,134 +0,0 @@
/*
* Copyright 2003 VMware, Inc.
* Copyright © 2007 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* @file intel_upload.c
*
* Batched upload via BOs.
*/
#include "main/macros.h"
#include "brw_bufmgr.h"
#include "brw_context.h"
#include "brw_buffer_objects.h"
void
brw_upload_finish(struct brw_uploader *upload)
{
assert((upload->bo == NULL) == (upload->map == NULL));
if (!upload->bo)
return;
brw_bo_unmap(upload->bo);
brw_bo_unreference(upload->bo);
upload->bo = NULL;
upload->map = NULL;
upload->next_offset = 0;
}
/**
* Interface for getting memory for uploading streamed data to the GPU
*
* In most cases, streamed data (for GPU state structures, for example) is
* uploaded through brw_state_batch(), since that interface allows relocations
* from the streamed space returned to other BOs. However, that interface has
* the restriction that the amount of space allocated has to be "small".
*
* This interface, on the other hand, is able to handle arbitrary sized
* allocation requests, though it will batch small allocations into the same
* BO for efficiency and reduced memory footprint.
*
* \note The returned pointer is valid only until brw_upload_finish().
*
* \param out_bo Pointer to a BO, which must point to a valid BO or NULL on
* entry, and will have a reference to the new BO containing the state on
* return.
*
* \param out_offset Offset within the buffer object that the data will land.
*/
void *
brw_upload_space(struct brw_uploader *upload,
uint32_t size,
uint32_t alignment,
struct brw_bo **out_bo,
uint32_t *out_offset)
{
uint32_t offset;
offset = ALIGN_NPOT(upload->next_offset, alignment);
if (upload->bo && offset + size > upload->bo->size) {
brw_upload_finish(upload);
offset = 0;
}
assert((upload->bo == NULL) == (upload->map == NULL));
if (!upload->bo) {
upload->bo = brw_bo_alloc(upload->bufmgr, "streamed data",
MAX2(upload->default_size, size),
BRW_MEMZONE_OTHER);
upload->map = brw_bo_map(NULL, upload->bo,
MAP_READ | MAP_WRITE |
MAP_PERSISTENT | MAP_ASYNC);
}
upload->next_offset = offset + size;
*out_offset = offset;
if (*out_bo != upload->bo) {
brw_bo_unreference(*out_bo);
*out_bo = upload->bo;
brw_bo_reference(upload->bo);
}
return upload->map + offset;
}
/**
* Handy interface to upload some data to temporary GPU memory quickly.
*
* References to this memory should not be retained across batch flushes.
*/
void
brw_upload_data(struct brw_uploader *upload,
const void *data,
uint32_t size,
uint32_t alignment,
struct brw_bo **out_bo,
uint32_t *out_offset)
{
void *dst = brw_upload_space(upload, size, alignment, out_bo, out_offset);
memcpy(dst, data, size);
}
void
brw_upload_init(struct brw_uploader *upload,
struct brw_bufmgr *bufmgr,
unsigned default_size)
{
upload->bufmgr = bufmgr;
upload->bo = NULL;
upload->map = NULL;
upload->next_offset = 0;
upload->default_size = default_size;
}

View file

@ -1,268 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#include "brw_batch.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#define VS 0
#define GS 1
#define CLP 2
#define SF 3
#define CS 4
/** @file brw_urb.c
*
* Manages the division of the URB space between the various fixed-function
* units.
*
* See the Thread Initiation Management section of the GFX4 B-Spec, and
* the individual *_STATE structures for restrictions on numbers of
* entries and threads.
*/
/*
* Generally, a unit requires a min_nr_entries based on how many entries
* it produces before the downstream unit gets unblocked and can use and
* dereference some of its handles.
*
* The SF unit preallocates a PUE at the start of thread dispatch, and only
* uses that one. So it requires one entry per thread.
*
* For CLIP, the SF unit will hold the previous primitive while the
* next is getting assembled, meaning that linestrips require 3 CLIP VUEs
* (vertices) to ensure continued processing, trifans require 4, and tristrips
* require 5. There can be 1 or 2 threads, and each has the same requirement.
*
* GS has the same requirement as CLIP, but it never handles tristrips,
* so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
* We only run it single-threaded.
*
* For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
* Each thread processes 2 preallocated VUEs (vertices) at a time, and they
* get streamed down as soon as threads processing earlier vertices get
* theirs accepted.
*
* Each unit will take the number of URB entries we give it (based on the
* entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
* and brw_curbe.c for the CURBEs) and decide its maximum number of
* threads it can support based on that. in brw_*_state.c.
*
* XXX: Are the min_entry_size numbers useful?
* XXX: Verify min_nr_entries, esp for VS.
* XXX: Verify SF min_entry_size.
*/
static const struct {
GLuint min_nr_entries;
GLuint preferred_nr_entries;
GLuint min_entry_size;
GLuint max_entry_size;
} limits[CS+1] = {
{ 16, 32, 1, 5 }, /* vs */
{ 4, 8, 1, 5 }, /* gs */
{ 5, 10, 1, 5 }, /* clp */
{ 1, 8, 1, 12 }, /* sf */
{ 1, 4, 1, 32 } /* cs */
};
static bool check_urb_layout(struct brw_context *brw)
{
brw->urb.vs_start = 0;
brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
return brw->urb.cs_start + brw->urb.nr_cs_entries *
brw->urb.csize <= brw->urb.size;
}
/* Most minimal update, forces re-emit of URB fence packet after GS
* unit turned on/off.
*/
void
brw_calculate_urb_fence(struct brw_context *brw, unsigned csize,
unsigned vsize, unsigned sfsize)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
if (csize < limits[CS].min_entry_size)
csize = limits[CS].min_entry_size;
if (vsize < limits[VS].min_entry_size)
vsize = limits[VS].min_entry_size;
if (sfsize < limits[SF].min_entry_size)
sfsize = limits[SF].min_entry_size;
if (brw->urb.vsize < vsize ||
brw->urb.sfsize < sfsize ||
brw->urb.csize < csize ||
(brw->urb.constrained && (brw->urb.vsize > vsize ||
brw->urb.sfsize > sfsize ||
brw->urb.csize > csize))) {
brw->urb.csize = csize;
brw->urb.sfsize = sfsize;
brw->urb.vsize = vsize;
brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;
brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
brw->urb.constrained = 0;
if (devinfo->ver == 5) {
brw->urb.nr_vs_entries = 128;
brw->urb.nr_sf_entries = 48;
if (check_urb_layout(brw)) {
goto done;
} else {
brw->urb.constrained = 1;
brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
}
} else if (devinfo->verx10 == 45) {
brw->urb.nr_vs_entries = 64;
if (check_urb_layout(brw)) {
goto done;
} else {
brw->urb.constrained = 1;
brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
}
}
if (!check_urb_layout(brw)) {
brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
/* Mark us as operating with constrained nr_entries, so that next
* time we recalculate we'll resize the fences in the hope of
* escaping constrained mode and getting back to normal performance.
*/
brw->urb.constrained = 1;
if (!check_urb_layout(brw)) {
/* This is impossible, given the maximal sizes of urb
* entries and the values for minimum nr of entries
* provided above.
*/
fprintf(stderr, "couldn't calculate URB layout!\n");
exit(1);
}
if (INTEL_DEBUG(DEBUG_URB|DEBUG_PERF))
fprintf(stderr, "URB CONSTRAINED\n");
}
done:
if (INTEL_DEBUG(DEBUG_URB))
fprintf(stderr,
"URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
brw->urb.vs_start,
brw->urb.gs_start,
brw->urb.clip_start,
brw->urb.sf_start,
brw->urb.cs_start,
brw->urb.size);
brw->ctx.NewDriverState |= BRW_NEW_URB_FENCE;
}
}
static void recalculate_urb_fence( struct brw_context *brw )
{
brw_calculate_urb_fence(brw, brw->curbe.total_size,
brw_vue_prog_data(brw->vs.base.prog_data)->urb_entry_size,
brw->sf.prog_data->urb_entry_size);
}
const struct brw_tracked_state brw_recalculate_urb_fence = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BLORP |
BRW_NEW_PUSH_CONSTANT_ALLOCATION |
BRW_NEW_SF_PROG_DATA |
BRW_NEW_VS_PROG_DATA,
},
.emit = recalculate_urb_fence
};
void brw_upload_urb_fence(struct brw_context *brw)
{
struct brw_urb_fence uf;
memset(&uf, 0, sizeof(uf));
uf.header.opcode = CMD_URB_FENCE;
uf.header.length = sizeof(uf)/4-2;
uf.header.vs_realloc = 1;
uf.header.gs_realloc = 1;
uf.header.clp_realloc = 1;
uf.header.sf_realloc = 1;
uf.header.vfe_realloc = 1;
uf.header.cs_realloc = 1;
/* The ordering below is correct, not the layout in the
* instruction.
*
* There are 256/384 urb reg pairs in total.
*/
uf.bits0.vs_fence = brw->urb.gs_start;
uf.bits0.gs_fence = brw->urb.clip_start;
uf.bits0.clp_fence = brw->urb.sf_start;
uf.bits1.sf_fence = brw->urb.cs_start;
uf.bits1.cs_fence = brw->urb.size;
/* erratum: URB_FENCE must not cross a 64byte cacheline */
if ((USED_BATCH(brw->batch) & 15) > 12) {
int pad = 16 - (USED_BATCH(brw->batch) & 15);
do
*brw->batch.map_next++ = MI_NOOP;
while (--pad);
}
brw_batch_data(brw, &uf, sizeof(uf));
}

View file

@ -1,125 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#include "brw_util.h"
#include "brw_defines.h"
#include "compiler/brw_eu_defines.h"
GLuint brw_translate_blend_equation( GLenum mode )
{
switch (mode) {
case GL_FUNC_ADD:
return BRW_BLENDFUNCTION_ADD;
case GL_MIN:
return BRW_BLENDFUNCTION_MIN;
case GL_MAX:
return BRW_BLENDFUNCTION_MAX;
case GL_FUNC_SUBTRACT:
return BRW_BLENDFUNCTION_SUBTRACT;
case GL_FUNC_REVERSE_SUBTRACT:
return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
default:
unreachable("not reached");
}
}
GLuint brw_translate_blend_factor( GLenum factor )
{
switch(factor) {
case GL_ZERO:
return BRW_BLENDFACTOR_ZERO;
case GL_SRC_ALPHA:
return BRW_BLENDFACTOR_SRC_ALPHA;
case GL_ONE:
return BRW_BLENDFACTOR_ONE;
case GL_SRC_COLOR:
return BRW_BLENDFACTOR_SRC_COLOR;
case GL_ONE_MINUS_SRC_COLOR:
return BRW_BLENDFACTOR_INV_SRC_COLOR;
case GL_DST_COLOR:
return BRW_BLENDFACTOR_DST_COLOR;
case GL_ONE_MINUS_DST_COLOR:
return BRW_BLENDFACTOR_INV_DST_COLOR;
case GL_ONE_MINUS_SRC_ALPHA:
return BRW_BLENDFACTOR_INV_SRC_ALPHA;
case GL_DST_ALPHA:
return BRW_BLENDFACTOR_DST_ALPHA;
case GL_ONE_MINUS_DST_ALPHA:
return BRW_BLENDFACTOR_INV_DST_ALPHA;
case GL_SRC_ALPHA_SATURATE:
return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
case GL_CONSTANT_COLOR:
return BRW_BLENDFACTOR_CONST_COLOR;
case GL_ONE_MINUS_CONSTANT_COLOR:
return BRW_BLENDFACTOR_INV_CONST_COLOR;
case GL_CONSTANT_ALPHA:
return BRW_BLENDFACTOR_CONST_ALPHA;
case GL_ONE_MINUS_CONSTANT_ALPHA:
return BRW_BLENDFACTOR_INV_CONST_ALPHA;
case GL_SRC1_COLOR:
return BRW_BLENDFACTOR_SRC1_COLOR;
case GL_SRC1_ALPHA:
return BRW_BLENDFACTOR_SRC1_ALPHA;
case GL_ONE_MINUS_SRC1_COLOR:
return BRW_BLENDFACTOR_INV_SRC1_COLOR;
case GL_ONE_MINUS_SRC1_ALPHA:
return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
default:
unreachable("not reached");
}
}
static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = {
[GL_POINTS] =_3DPRIM_POINTLIST,
[GL_LINES] = _3DPRIM_LINELIST,
[GL_LINE_LOOP] = _3DPRIM_LINELOOP,
[GL_LINE_STRIP] = _3DPRIM_LINESTRIP,
[GL_TRIANGLES] = _3DPRIM_TRILIST,
[GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
[GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
[GL_QUADS] = _3DPRIM_QUADLIST,
[GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
[GL_POLYGON] = _3DPRIM_POLYGON,
[GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
[GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
[GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
[GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
};
uint32_t
get_hw_prim_for_gl_prim(int mode)
{
assert(mode < ARRAY_SIZE(prim_to_hw_prim));
return prim_to_hw_prim[mode];
}

View file

@ -1,74 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#ifndef BRW_UTIL_H
#define BRW_UTIL_H
#include "brw_context.h"
#include "main/framebuffer.h"
extern GLuint brw_translate_blend_factor( GLenum factor );
extern GLuint brw_translate_blend_equation( GLenum mode );
static inline float
brw_get_line_width(struct brw_context *brw)
{
/* From the OpenGL 4.4 spec:
*
* "The actual width of non-antialiased lines is determined by rounding
* the supplied width to the nearest integer, then clamping it to the
* implementation-dependent maximum non-antialiased line width."
*/
float line_width =
CLAMP(!_mesa_is_multisample_enabled(&brw->ctx) && !brw->ctx.Line.SmoothFlag
? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width,
0.125f, brw->ctx.Const.MaxLineWidth);
if (!_mesa_is_multisample_enabled(&brw->ctx) && brw->ctx.Line.SmoothFlag && line_width < 1.5f) {
/* For 1 pixel line thickness or less, the general
* anti-aliasing algorithm gives up, and a garbage line is
* generated. Setting a Line Width of 0.0 specifies the
* rasterization of the "thinnest" (one-pixel-wide),
* non-antialiased lines.
*
* Lines rendered with zero Line Width are rasterized using
* Grid Intersection Quantization rules as specified by
* bspec section 6.3.12.1 Zero-Width (Cosmetic) Line
* Rasterization.
*/
line_width = 0.0f;
}
return line_width;
}
#endif

View file

@ -1,369 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#include "util/compiler.h"
#include "main/context.h"
#include "brw_context.h"
#include "brw_vs.h"
#include "brw_util.h"
#include "brw_state.h"
#include "program/prog_print.h"
#include "program/prog_parameter.h"
#include "compiler/brw_nir.h"
#include "brw_program.h"
#include "util/ralloc.h"
/**
* Decide which set of clip planes should be used when clipping via
* gl_Position or gl_ClipVertex.
*/
gl_clip_plane *
brw_select_clip_planes(struct gl_context *ctx)
{
if (ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]) {
/* There is currently a GLSL vertex shader, so clip according to GLSL
* rules, which means compare gl_ClipVertex (or gl_Position, if
* gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes
* that were stored in EyeUserPlane at the time the clip planes were
* specified.
*/
return ctx->Transform.EyeUserPlane;
} else {
/* Either we are using fixed function or an ARB vertex program. In
* either case the clip planes are going to be compared against
* gl_Position (which is in clip coordinates) so we have to clip using
* _ClipUserPlane, which was transformed into clip coordinates by Mesa
* core.
*/
return ctx->Transform._ClipUserPlane;
}
}
static GLbitfield64
brw_vs_outputs_written(struct brw_context *brw, struct brw_vs_prog_key *key,
GLbitfield64 user_varyings)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
GLbitfield64 outputs_written = user_varyings;
if (devinfo->ver < 6) {
/* Put dummy slots into the VUE for the SF to put the replaced
* point sprite coords in. We shouldn't need these dummy slots,
* which take up precious URB space, but it would mean that the SF
* doesn't get nice aligned pairs of input coords into output
* coords, which would be a pain to handle.
*/
for (unsigned i = 0; i < 8; i++) {
if (key->point_coord_replace & (1 << i))
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
}
/* if back colors are written, allocate slots for front colors too */
if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
}
/* In order for legacy clipping to work, we need to populate the clip
* distance varying slots whenever clipping is enabled, even if the vertex
* shader doesn't write to gl_ClipDistance.
*/
if (key->nr_userclip_plane_consts > 0) {
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
}
return outputs_written;
}
static bool
brw_codegen_vs_prog(struct brw_context *brw,
struct brw_program *vp,
struct brw_vs_prog_key *key)
{
const struct brw_compiler *compiler = brw->screen->compiler;
const struct intel_device_info *devinfo = &brw->screen->devinfo;
const GLuint *program;
struct brw_vs_prog_data prog_data;
struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
void *mem_ctx;
bool start_busy = false;
double start_time = 0;
memset(&prog_data, 0, sizeof(prog_data));
/* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
if (vp->program.info.is_arb_asm)
stage_prog_data->use_alt_mode = true;
mem_ctx = ralloc_context(NULL);
nir_shader *nir = nir_shader_clone(mem_ctx, vp->program.nir);
brw_assign_common_binding_table_offsets(devinfo, &vp->program,
&prog_data.base.base, 0);
if (!vp->program.info.is_arb_asm) {
brw_nir_setup_glsl_uniforms(mem_ctx, nir, &vp->program,
&prog_data.base.base,
compiler->scalar_stage[MESA_SHADER_VERTEX]);
if (brw->can_push_ubos) {
brw_nir_analyze_ubo_ranges(compiler, nir, key,
prog_data.base.base.ubo_ranges);
}
} else {
brw_nir_setup_arb_uniforms(mem_ctx, nir, &vp->program,
&prog_data.base.base);
}
if (key->nr_userclip_plane_consts > 0) {
brw_nir_lower_legacy_clipping(nir, key->nr_userclip_plane_consts,
&prog_data.base.base);
}
if (key->copy_edgeflag)
nir_lower_passthrough_edgeflags(nir);
uint64_t outputs_written =
brw_vs_outputs_written(brw, key, nir->info.outputs_written);
brw_compute_vue_map(devinfo,
&prog_data.base.vue_map, outputs_written,
nir->info.separate_shader, 1);
if (0) {
_mesa_fprint_program_opt(stderr, &vp->program, PROG_PRINT_DEBUG, true);
}
if (unlikely(brw->perf_debug)) {
start_busy = (brw->batch.last_bo &&
brw_bo_busy(brw->batch.last_bo));
start_time = get_time();
}
if (INTEL_DEBUG(DEBUG_VS)) {
if (vp->program.info.is_arb_asm)
brw_dump_arb_asm("vertex", &vp->program);
}
/* Emit GFX4 code.
*/
struct brw_compile_vs_params params = {
.nir = nir,
.key = key,
.prog_data = &prog_data,
.log_data = brw,
};
if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
params.shader_time = true;
params.shader_time_index =
brw_get_shader_time_index(brw, &vp->program, ST_VS,
!vp->program.info.is_arb_asm);
}
program = brw_compile_vs(compiler, mem_ctx, &params);
if (program == NULL) {
if (!vp->program.info.is_arb_asm) {
vp->program.sh.data->LinkStatus = LINKING_FAILURE;
ralloc_strcat(&vp->program.sh.data->InfoLog, params.error_str);
}
_mesa_problem(NULL, "Failed to compile vertex shader: %s\n", params.error_str);
ralloc_free(mem_ctx);
return false;
}
if (unlikely(brw->perf_debug)) {
if (vp->compiled_once) {
brw_debug_recompile(brw, MESA_SHADER_VERTEX, vp->program.Id,
&key->base);
}
if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
perf_debug("VS compile took %.03f ms and stalled the GPU\n",
(get_time() - start_time) * 1000);
}
vp->compiled_once = true;
}
/* Scratch space is used for register spilling */
brw_alloc_stage_scratch(brw, &brw->vs.base,
prog_data.base.base.total_scratch);
/* The param and pull_param arrays will be freed by the shader cache. */
ralloc_steal(NULL, prog_data.base.base.param);
ralloc_steal(NULL, prog_data.base.base.pull_param);
brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
key, sizeof(struct brw_vs_prog_key),
program, prog_data.base.base.program_size,
&prog_data, sizeof(prog_data),
&brw->vs.base.prog_offset, &brw->vs.base.prog_data);
ralloc_free(mem_ctx);
return true;
}
static bool
brw_vs_state_dirty(const struct brw_context *brw)
{
return brw_state_dirty(brw,
_NEW_BUFFERS |
_NEW_LIGHT |
_NEW_POINT |
_NEW_POLYGON |
_NEW_TEXTURE |
_NEW_TRANSFORM,
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_VS_ATTRIB_WORKAROUNDS);
}
void
brw_vs_populate_key(struct brw_context *brw,
struct brw_vs_prog_key *key)
{
struct gl_context *ctx = &brw->ctx;
/* BRW_NEW_VERTEX_PROGRAM */
struct gl_program *prog = brw->programs[MESA_SHADER_VERTEX];
struct brw_program *vp = (struct brw_program *) prog;
const struct intel_device_info *devinfo = &brw->screen->devinfo;
memset(key, 0, sizeof(*key));
/* Just upload the program verbatim for now. Always send it all
* the inputs it asks for, whether they are varying or not.
*/
/* _NEW_TEXTURE */
brw_populate_base_prog_key(ctx, vp, &key->base);
if (ctx->Transform.ClipPlanesEnabled != 0 &&
(ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES) &&
vp->program.info.clip_distance_array_size == 0) {
key->nr_userclip_plane_consts =
util_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
}
if (devinfo->ver < 6) {
/* _NEW_POLYGON */
key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
ctx->Polygon.BackMode != GL_FILL);
/* _NEW_POINT */
if (ctx->Point.PointSprite) {
key->point_coord_replace = ctx->Point.CoordReplace & 0xff;
}
}
if (prog->info.outputs_written &
(VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 |
VARYING_BIT_BFC1)) {
/* _NEW_LIGHT | _NEW_BUFFERS */
key->clamp_vertex_color = ctx->Light._ClampVertexColor;
}
/* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
if (devinfo->verx10 <= 70) {
memcpy(key->gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
sizeof(brw->vb.attrib_wa_flags));
}
}
void
brw_upload_vs_prog(struct brw_context *brw)
{
struct brw_vs_prog_key key;
/* BRW_NEW_VERTEX_PROGRAM */
struct brw_program *vp =
(struct brw_program *) brw->programs[MESA_SHADER_VERTEX];
if (!brw_vs_state_dirty(brw))
return;
brw_vs_populate_key(brw, &key);
if (brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG, &key, sizeof(key),
&brw->vs.base.prog_offset, &brw->vs.base.prog_data,
true))
return;
if (brw_disk_cache_upload_program(brw, MESA_SHADER_VERTEX))
return;
vp = (struct brw_program *) brw->programs[MESA_SHADER_VERTEX];
vp->id = key.base.program_string_id;
ASSERTED bool success = brw_codegen_vs_prog(brw, vp, &key);
assert(success);
}
void
brw_vs_populate_default_key(const struct brw_compiler *compiler,
struct brw_vs_prog_key *key,
struct gl_program *prog)
{
const struct intel_device_info *devinfo = compiler->devinfo;
struct brw_program *bvp = brw_program(prog);
memset(key, 0, sizeof(*key));
brw_populate_default_base_prog_key(devinfo, bvp, &key->base);
key->clamp_vertex_color =
(prog->info.outputs_written &
(VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 |
VARYING_BIT_BFC1));
}
bool
brw_vs_precompile(struct gl_context *ctx, struct gl_program *prog)
{
struct brw_context *brw = brw_context(ctx);
struct brw_vs_prog_key key;
uint32_t old_prog_offset = brw->vs.base.prog_offset;
struct brw_stage_prog_data *old_prog_data = brw->vs.base.prog_data;
bool success;
struct brw_program *bvp = brw_program(prog);
brw_vs_populate_default_key(brw->screen->compiler, &key, prog);
success = brw_codegen_vs_prog(brw, bvp, &key);
brw->vs.base.prog_offset = old_prog_offset;
brw->vs.base.prog_data = old_prog_data;
return success;
}

View file

@ -1,57 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#ifndef BRW_VS_H
#define BRW_VS_H
#include "brw_context.h"
#ifdef __cplusplus
extern "C" {
#endif
void
brw_upload_vs_prog(struct brw_context *brw);
void
brw_vs_populate_key(struct brw_context *brw,
struct brw_vs_prog_key *key);
void
brw_vs_populate_default_key(const struct brw_compiler *compiler,
struct brw_vs_prog_key *key,
struct gl_program *prog);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

View file

@ -1,119 +0,0 @@
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keithw@vmware.com>
*/
#include "main/mtypes.h"
#include "program/prog_parameter.h"
#include "main/shaderapi.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_buffer_objects.h"
/* Creates a new VS constant buffer reflecting the current VS program's
* constants, if needed by the VS program.
*
* Otherwise, constants go through the CURBEs using the brw_constant_buffer
* state atom.
*/
static void
brw_upload_vs_pull_constants(struct brw_context *brw)
{
struct brw_stage_state *stage_state = &brw->vs.base;
/* BRW_NEW_VERTEX_PROGRAM */
struct brw_program *vp =
(struct brw_program *) brw->programs[MESA_SHADER_VERTEX];
/* BRW_NEW_VS_PROG_DATA */
const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
_mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_VERTEX);
/* _NEW_PROGRAM_CONSTANTS */
brw_upload_pull_constants(brw, BRW_NEW_VS_CONSTBUF, &vp->program,
stage_state, prog_data);
}
const struct brw_tracked_state brw_vs_pull_constants = {
.dirty = {
.mesa = _NEW_PROGRAM_CONSTANTS,
.brw = BRW_NEW_BATCH |
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_VS_PROG_DATA,
},
.emit = brw_upload_vs_pull_constants,
};
static void
brw_upload_vs_ubo_surfaces(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
/* _NEW_PROGRAM */
struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
/* BRW_NEW_VS_PROG_DATA */
brw_upload_ubo_surfaces(brw, prog, &brw->vs.base, brw->vs.base.prog_data);
}
const struct brw_tracked_state brw_vs_ubo_surfaces = {
.dirty = {
.mesa = _NEW_PROGRAM,
.brw = BRW_NEW_BATCH |
BRW_NEW_UNIFORM_BUFFER |
BRW_NEW_VS_PROG_DATA,
},
.emit = brw_upload_vs_ubo_surfaces,
};
static void
brw_upload_vs_image_surfaces(struct brw_context *brw)
{
/* BRW_NEW_VERTEX_PROGRAM */
const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
if (vp) {
/* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
brw_upload_image_surfaces(brw, vp, &brw->vs.base,
brw->vs.base.prog_data);
}
}
const struct brw_tracked_state brw_vs_image_surfaces = {
.dirty = {
.mesa = _NEW_TEXTURE,
.brw = BRW_NEW_BATCH |
BRW_NEW_AUX_STATE |
BRW_NEW_IMAGE_UNITS |
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_VS_PROG_DATA,
},
.emit = brw_upload_vs_image_surfaces,
};

View file

@ -1,639 +0,0 @@
/*
* Copyright (C) Intel Corp. 2006. All Rights Reserved.
* Intel funded Tungsten Graphics to
* develop this 3D driver.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "brw_context.h"
#include "brw_wm.h"
#include "brw_state.h"
#include "main/enums.h"
#include "main/formats.h"
#include "main/fbobject.h"
#include "main/samplerobj.h"
#include "main/framebuffer.h"
#include "program/prog_parameter.h"
#include "program/program.h"
#include "brw_mipmap_tree.h"
#include "brw_image.h"
#include "brw_fbo.h"
#include "compiler/brw_nir.h"
#include "brw_program.h"
#include "util/ralloc.h"
#include "util/u_math.h"
static void
assign_fs_binding_table_offsets(const struct intel_device_info *devinfo,
const struct gl_program *prog,
const struct brw_wm_prog_key *key,
struct brw_wm_prog_data *prog_data)
{
/* Render targets implicitly start at surface index 0. Even if there are
* no color regions, we still perform an FB write to a null render target,
* which will be surface 0.
*/
uint32_t next_binding_table_offset = MAX2(key->nr_color_regions, 1);
next_binding_table_offset =
brw_assign_common_binding_table_offsets(devinfo, prog, &prog_data->base,
next_binding_table_offset);
if (prog->nir->info.outputs_read && !key->coherent_fb_fetch) {
prog_data->binding_table.render_target_read_start =
next_binding_table_offset;
next_binding_table_offset += key->nr_color_regions;
}
/* Update the binding table size */
prog_data->base.binding_table.size_bytes = next_binding_table_offset * 4;
}
static bool
brw_codegen_wm_prog(struct brw_context *brw,
struct brw_program *fp,
struct brw_wm_prog_key *key,
struct brw_vue_map *vue_map)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
void *mem_ctx = ralloc_context(NULL);
struct brw_wm_prog_data prog_data;
const GLuint *program;
bool start_busy = false;
double start_time = 0;
nir_shader *nir = nir_shader_clone(mem_ctx, fp->program.nir);
memset(&prog_data, 0, sizeof(prog_data));
/* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
if (fp->program.info.is_arb_asm)
prog_data.base.use_alt_mode = true;
assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data);
if (!fp->program.info.is_arb_asm) {
brw_nir_setup_glsl_uniforms(mem_ctx, nir, &fp->program,
&prog_data.base, true);
if (brw->can_push_ubos) {
brw_nir_analyze_ubo_ranges(brw->screen->compiler, nir,
NULL, prog_data.base.ubo_ranges);
}
} else {
brw_nir_setup_arb_uniforms(mem_ctx, nir, &fp->program, &prog_data.base);
if (INTEL_DEBUG(DEBUG_WM))
brw_dump_arb_asm("fragment", &fp->program);
}
if (unlikely(brw->perf_debug)) {
start_busy = (brw->batch.last_bo &&
brw_bo_busy(brw->batch.last_bo));
start_time = get_time();
}
struct brw_compile_fs_params params = {
.nir = nir,
.key = key,
.prog_data = &prog_data,
.allow_spilling = true,
.vue_map = vue_map,
.log_data = brw,
};
if (INTEL_DEBUG(DEBUG_SHADER_TIME)) {
params.shader_time = true;
params.shader_time_index8 =
brw_get_shader_time_index(brw, &fp->program, ST_FS8,
!fp->program.info.is_arb_asm);
params.shader_time_index16 =
brw_get_shader_time_index(brw, &fp->program, ST_FS16,
!fp->program.info.is_arb_asm);
params.shader_time_index32 =
brw_get_shader_time_index(brw, &fp->program, ST_FS32,
!fp->program.info.is_arb_asm);
}
program = brw_compile_fs(brw->screen->compiler, mem_ctx, &params);
if (program == NULL) {
if (!fp->program.info.is_arb_asm) {
fp->program.sh.data->LinkStatus = LINKING_FAILURE;
ralloc_strcat(&fp->program.sh.data->InfoLog, params.error_str);
}
_mesa_problem(NULL, "Failed to compile fragment shader: %s\n", params.error_str);
ralloc_free(mem_ctx);
return false;
}
if (unlikely(brw->perf_debug)) {
if (fp->compiled_once) {
brw_debug_recompile(brw, MESA_SHADER_FRAGMENT, fp->program.Id,
&key->base);
}
fp->compiled_once = true;
if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
perf_debug("FS compile took %.03f ms and stalled the GPU\n",
(get_time() - start_time) * 1000);
}
}
brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch);
if (INTEL_DEBUG(DEBUG_WM) && fp->program.info.is_arb_asm)
fprintf(stderr, "\n");
/* The param and pull_param arrays will be freed by the shader cache. */
ralloc_steal(NULL, prog_data.base.param);
ralloc_steal(NULL, prog_data.base.pull_param);
brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG,
key, sizeof(struct brw_wm_prog_key),
program, prog_data.base.program_size,
&prog_data, sizeof(prog_data),
&brw->wm.base.prog_offset, &brw->wm.base.prog_data);
ralloc_free(mem_ctx);
return true;
}
static uint8_t
gfx6_gather_workaround(GLenum internalformat)
{
switch (internalformat) {
case GL_R8I: return WA_SIGN | WA_8BIT;
case GL_R8UI: return WA_8BIT;
case GL_R16I: return WA_SIGN | WA_16BIT;
case GL_R16UI: return WA_16BIT;
default:
/* Note that even though GL_R32I and GL_R32UI have format overrides in
* the surface state, there is no shader w/a required.
*/
return 0;
}
}
static void
brw_populate_sampler_prog_key_data(struct gl_context *ctx,
const struct gl_program *prog,
struct brw_sampler_prog_key_data *key)
{
struct brw_context *brw = brw_context(ctx);
const struct intel_device_info *devinfo = &brw->screen->devinfo;
GLbitfield mask = prog->SamplersUsed;
while (mask) {
const int s = u_bit_scan(&mask);
key->swizzles[s] = SWIZZLE_NOOP;
key->scale_factors[s] = 0.0f;
int unit_id = prog->SamplerUnits[s];
const struct gl_texture_unit *unit = &ctx->Texture.Unit[unit_id];
if (unit->_Current && unit->_Current->Target != GL_TEXTURE_BUFFER) {
const struct gl_texture_object *t = unit->_Current;
const struct gl_texture_image *img = t->Image[0][t->Attrib.BaseLevel];
struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit_id);
const bool alpha_depth = t->Attrib.DepthMode == GL_ALPHA &&
(img->_BaseFormat == GL_DEPTH_COMPONENT ||
img->_BaseFormat == GL_DEPTH_STENCIL);
/* Haswell handles texture swizzling as surface format overrides
* (except for GL_ALPHA); all other platforms need MOVs in the shader.
*/
if (alpha_depth || (devinfo->verx10 <= 70))
key->swizzles[s] = brw_get_texture_swizzle(ctx, t);
if (devinfo->ver < 8 &&
sampler->Attrib.MinFilter != GL_NEAREST &&
sampler->Attrib.MagFilter != GL_NEAREST) {
if (sampler->Attrib.WrapS == GL_CLAMP)
key->gl_clamp_mask[0] |= 1 << s;
if (sampler->Attrib.WrapT == GL_CLAMP)
key->gl_clamp_mask[1] |= 1 << s;
if (sampler->Attrib.WrapR == GL_CLAMP)
key->gl_clamp_mask[2] |= 1 << s;
}
/* gather4 for RG32* is broken in multiple ways on Gfx7. */
if (devinfo->ver == 7 && prog->info.uses_texture_gather) {
switch (img->InternalFormat) {
case GL_RG32I:
case GL_RG32UI: {
/* We have to override the format to R32G32_FLOAT_LD.
* This means that SCS_ALPHA and SCS_ONE will return 0x3f8
* (1.0) rather than integer 1. This needs shader hacks.
*
* On Ivybridge, we whack W (alpha) to ONE in our key's
* swizzle. On Haswell, we look at the original texture
* swizzle, and use XYZW with channels overridden to ONE,
* leaving normal texture swizzling to SCS.
*/
unsigned src_swizzle =
devinfo->platform == INTEL_PLATFORM_HSW ?
t->Attrib._Swizzle : key->swizzles[s];
for (int i = 0; i < 4; i++) {
unsigned src_comp = GET_SWZ(src_swizzle, i);
if (src_comp == SWIZZLE_ONE || src_comp == SWIZZLE_W) {
key->swizzles[i] &= ~(0x7 << (3 * i));
key->swizzles[i] |= SWIZZLE_ONE << (3 * i);
}
}
}
FALLTHROUGH;
case GL_RG32F:
/* The channel select for green doesn't work - we have to
* request blue. Haswell can use SCS for this, but Ivybridge
* needs a shader workaround.
*/
if (devinfo->platform != INTEL_PLATFORM_HSW)
key->gather_channel_quirk_mask |= 1 << s;
break;
}
}
/* Gfx6's gather4 is broken for UINT/SINT; we treat them as
* UNORM/FLOAT instead and fix it in the shader.
*/
if (devinfo->ver == 6 && prog->info.uses_texture_gather) {
key->gfx6_gather_wa[s] = gfx6_gather_workaround(img->InternalFormat);
}
/* If this is a multisample sampler, and uses the CMS MSAA layout,
* then we need to emit slightly different code to first sample the
* MCS surface.
*/
struct brw_texture_object *intel_tex =
brw_texture_object((struct gl_texture_object *)t);
/* From gfx9 onwards some single sampled buffers can also be
* compressed. These don't need ld2dms sampling along with mcs fetch.
*/
if (intel_tex->mt->aux_usage == ISL_AUX_USAGE_MCS) {
assert(devinfo->ver >= 7);
assert(intel_tex->mt->surf.samples > 1);
assert(intel_tex->mt->aux_buf);
assert(intel_tex->mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY);
key->compressed_multisample_layout_mask |= 1 << s;
if (intel_tex->mt->surf.samples >= 16) {
assert(devinfo->ver >= 9);
key->msaa_16 |= 1 << s;
}
}
if (t->Target == GL_TEXTURE_EXTERNAL_OES && intel_tex->planar_format) {
/* Setup possible scaling factor. */
key->scale_factors[s] = intel_tex->planar_format->scaling_factor;
switch (intel_tex->planar_format->components) {
case __DRI_IMAGE_COMPONENTS_Y_UV:
key->y_uv_image_mask |= 1 << s;
break;
case __DRI_IMAGE_COMPONENTS_Y_U_V:
key->y_u_v_image_mask |= 1 << s;
break;
case __DRI_IMAGE_COMPONENTS_Y_XUXV:
key->yx_xuxv_image_mask |= 1 << s;
break;
case __DRI_IMAGE_COMPONENTS_Y_UXVX:
key->xy_uxvx_image_mask |= 1 << s;
break;
case __DRI_IMAGE_COMPONENTS_AYUV:
key->ayuv_image_mask |= 1 << s;
break;
case __DRI_IMAGE_COMPONENTS_XYUV:
key->xyuv_image_mask |= 1 << s;
break;
default:
break;
}
switch (intel_tex->yuv_color_space) {
case __DRI_YUV_COLOR_SPACE_ITU_REC709:
key->bt709_mask |= 1 << s;
break;
case __DRI_YUV_COLOR_SPACE_ITU_REC2020:
key->bt2020_mask |= 1 << s;
break;
default:
break;
}
}
}
}
}
void
brw_populate_base_prog_key(struct gl_context *ctx,
const struct brw_program *prog,
struct brw_base_prog_key *key)
{
key->program_string_id = prog->id;
key->subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
brw_populate_sampler_prog_key_data(ctx, &prog->program, &key->tex);
}
void
brw_populate_default_base_prog_key(const struct intel_device_info *devinfo,
const struct brw_program *prog,
struct brw_base_prog_key *key)
{
key->program_string_id = prog->id;
key->subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;
brw_setup_tex_for_precompile(devinfo, &key->tex, &prog->program);
}
static bool
brw_wm_state_dirty(const struct brw_context *brw)
{
return brw_state_dirty(brw,
_NEW_BUFFERS |
_NEW_COLOR |
_NEW_DEPTH |
_NEW_FRAG_CLAMP |
_NEW_HINT |
_NEW_LIGHT |
_NEW_LINE |
_NEW_MULTISAMPLE |
_NEW_POLYGON |
_NEW_STENCIL |
_NEW_TEXTURE,
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_REDUCED_PRIMITIVE |
BRW_NEW_STATS_WM |
BRW_NEW_VUE_MAP_GEOM_OUT);
}
void
brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key)
{
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct gl_program *prog = brw->programs[MESA_SHADER_FRAGMENT];
const struct brw_program *fp = brw_program_const(prog);
GLuint lookup = 0;
GLuint line_aa;
memset(key, 0, sizeof(*key));
/* Build the index for table lookup
*/
if (devinfo->ver < 6) {
struct brw_renderbuffer *depth_irb =
brw_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
/* _NEW_COLOR */
if (prog->info.fs.uses_discard || ctx->Color.AlphaEnabled) {
lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT;
}
if (prog->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
lookup |= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT;
}
/* _NEW_DEPTH */
if (depth_irb && ctx->Depth.Test) {
lookup |= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT;
if (brw_depth_writes_enabled(brw))
lookup |= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT;
}
/* _NEW_STENCIL | _NEW_BUFFERS */
if (brw->stencil_enabled) {
lookup |= BRW_WM_IZ_STENCIL_TEST_ENABLE_BIT;
if (ctx->Stencil.WriteMask[0] ||
ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
lookup |= BRW_WM_IZ_STENCIL_WRITE_ENABLE_BIT;
}
key->iz_lookup = lookup;
}
line_aa = BRW_WM_AA_NEVER;
/* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
if (ctx->Line.SmoothFlag) {
if (brw->reduced_primitive == GL_LINES) {
line_aa = BRW_WM_AA_ALWAYS;
}
else if (brw->reduced_primitive == GL_TRIANGLES) {
if (ctx->Polygon.FrontMode == GL_LINE) {
line_aa = BRW_WM_AA_SOMETIMES;
if (ctx->Polygon.BackMode == GL_LINE ||
(ctx->Polygon.CullFlag &&
ctx->Polygon.CullFaceMode == GL_BACK))
line_aa = BRW_WM_AA_ALWAYS;
}
else if (ctx->Polygon.BackMode == GL_LINE) {
line_aa = BRW_WM_AA_SOMETIMES;
if ((ctx->Polygon.CullFlag &&
ctx->Polygon.CullFaceMode == GL_FRONT))
line_aa = BRW_WM_AA_ALWAYS;
}
}
}
key->line_aa = line_aa;
/* _NEW_HINT */
key->high_quality_derivatives =
prog->info.uses_fddx_fddy &&
ctx->Hint.FragmentShaderDerivative == GL_NICEST;
if (devinfo->ver < 6)
key->stats_wm = brw->stats_wm;
/* _NEW_LIGHT */
key->flat_shade =
(prog->info.inputs_read & (VARYING_BIT_COL0 | VARYING_BIT_COL1)) &&
(ctx->Light.ShadeModel == GL_FLAT);
/* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
/* _NEW_TEXTURE */
brw_populate_base_prog_key(ctx, fp, &key->base);
/* _NEW_BUFFERS */
key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers;
/* _NEW_COLOR */
key->force_dual_color_blend = brw->dual_color_blend_by_location &&
(ctx->Color.BlendEnabled & 1) && ctx->Color._BlendUsesDualSrc & 0x1;
/* _NEW_MULTISAMPLE, _NEW_BUFFERS */
key->alpha_to_coverage = _mesa_is_alpha_to_coverage_enabled(ctx);
/* _NEW_COLOR, _NEW_BUFFERS */
key->alpha_test_replicate_alpha =
ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
_mesa_is_alpha_test_enabled(ctx);
/* _NEW_BUFFERS _NEW_MULTISAMPLE */
/* Ignore sample qualifier while computing this flag. */
if (ctx->Multisample.Enabled) {
key->persample_interp =
ctx->Multisample.SampleShading &&
(ctx->Multisample.MinSampleShadingValue *
_mesa_geometric_samples(ctx->DrawBuffer) > 1);
key->multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
}
key->ignore_sample_mask_out = !key->multisample_fbo;
/* BRW_NEW_VUE_MAP_GEOM_OUT */
if (devinfo->ver < 6 || util_bitcount64(prog->info.inputs_read &
BRW_FS_VARYING_INPUT_MASK) > 16) {
key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
}
/* _NEW_COLOR | _NEW_BUFFERS */
/* Pre-gfx6, the hardware alpha test always used each render
* target's alpha to do alpha test, as opposed to render target 0's alpha
* like GL requires. Fix that by building the alpha test into the
* shader, and we'll skip enabling the fixed function alpha test.
*/
if (devinfo->ver < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
ctx->Color.AlphaEnabled) {
key->alpha_test_func = ctx->Color.AlphaFunc;
key->alpha_test_ref = ctx->Color.AlphaRef;
}
/* Whether reads from the framebuffer should behave coherently. */
key->coherent_fb_fetch = ctx->Extensions.EXT_shader_framebuffer_fetch;
}
void
brw_upload_wm_prog(struct brw_context *brw)
{
struct brw_wm_prog_key key;
struct brw_program *fp =
(struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
if (!brw_wm_state_dirty(brw))
return;
brw_wm_populate_key(brw, &key);
if (brw_search_cache(&brw->cache, BRW_CACHE_FS_PROG, &key, sizeof(key),
&brw->wm.base.prog_offset, &brw->wm.base.prog_data,
true))
return;
if (brw_disk_cache_upload_program(brw, MESA_SHADER_FRAGMENT))
return;
fp = (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
fp->id = key.base.program_string_id;
ASSERTED bool success = brw_codegen_wm_prog(brw, fp, &key,
&brw->vue_map_geom_out);
assert(success);
}
void
brw_wm_populate_default_key(const struct brw_compiler *compiler,
struct brw_wm_prog_key *key,
struct gl_program *prog)
{
const struct intel_device_info *devinfo = compiler->devinfo;
memset(key, 0, sizeof(*key));
brw_populate_default_base_prog_key(devinfo, brw_program(prog),
&key->base);
uint64_t outputs_written = prog->info.outputs_written;
if (devinfo->ver < 6) {
if (prog->info.fs.uses_discard)
key->iz_lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT;
if (outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
key->iz_lookup |= BRW_WM_IZ_PS_COMPUTES_DEPTH_BIT;
/* Just assume depth testing. */
key->iz_lookup |= BRW_WM_IZ_DEPTH_TEST_ENABLE_BIT;
key->iz_lookup |= BRW_WM_IZ_DEPTH_WRITE_ENABLE_BIT;
}
if (devinfo->ver < 6 || util_bitcount64(prog->info.inputs_read &
BRW_FS_VARYING_INPUT_MASK) > 16) {
key->input_slots_valid = prog->info.inputs_read | VARYING_BIT_POS;
}
key->nr_color_regions = util_bitcount64(outputs_written &
~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
/* Whether reads from the framebuffer should behave coherently. */
key->coherent_fb_fetch = devinfo->ver >= 9;
}
bool
brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog)
{
struct brw_context *brw = brw_context(ctx);
const struct intel_device_info *devinfo = &brw->screen->devinfo;
struct brw_wm_prog_key key;
struct brw_program *bfp = brw_program(prog);
brw_wm_populate_default_key(brw->screen->compiler, &key, prog);
/* check brw_wm_populate_default_key coherent_fb_fetch setting */
assert(key.coherent_fb_fetch ==
ctx->Extensions.EXT_shader_framebuffer_fetch);
uint32_t old_prog_offset = brw->wm.base.prog_offset;
struct brw_stage_prog_data *old_prog_data = brw->wm.base.prog_data;
struct brw_vue_map vue_map;
if (devinfo->ver < 6) {
brw_compute_vue_map(&brw->screen->devinfo, &vue_map,
prog->info.inputs_read | VARYING_BIT_POS,
false, 1);
}
bool success = brw_codegen_wm_prog(brw, bfp, &key, &vue_map);
brw->wm.base.prog_offset = old_prog_offset;
brw->wm.base.prog_data = old_prog_data;
return success;
}

Some files were not shown because too many files have changed in this diff Show more