Skip to content

Commit 2c1e8ed

Browse files
authored
Fix GX_NRM_NBT and GX_NRM_NBT3 vertex attribute handling (#181)
* Fix GX_NRM_NBT and GX_NRM_NBT3 vertex attribute handling * Fix GX_NRM_NBT3 indexing, add GX_TG_BINRM/GX_TG_TANGENT support * Add GX_TG_BUMP emboss bump mapping support
1 parent d7a57eb commit 2c1e8ed

5 files changed

Lines changed: 157 additions & 40 deletions

File tree

lib/gx/command_processor.cpp

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -106,13 +106,13 @@ static constexpr u8 CP_VAT_MASK = GX_VAT_MASK;
106106

107107
// Read helpers for big/little endian
108108
#if _MSC_VER
109-
template<typename T>
109+
template <typename T>
110110
__forceinline // Yes, this was necessary.
111-
inline T unaligned_load(const T* ptr) {
111+
inline T unaligned_load(const T* ptr) {
112112
return *static_cast<const __unaligned T*>(ptr);
113113
}
114114
#else
115-
template<typename T>
115+
template <typename T>
116116
inline T unaligned_load(const T* ptr) {
117117
T copy;
118118
memcpy(&copy, ptr, sizeof(T));
@@ -502,7 +502,8 @@ static void handle_bp(u32 value, bool bigEndian) {
502502
g_gxState.bpRegCache[0xFE] = 0x00FFFFFF;
503503
const u32 merged = (g_gxState.bpRegCache[regId] & ~ssMask) | (value & ssMask);
504504
value = (regId << 24) | (merged & 0x00FFFFFF);
505-
if (g_gxState.bpRegCache[regId] == value) return;
505+
if (g_gxState.bpRegCache[regId] == value)
506+
return;
506507
g_gxState.bpRegCache[regId] = value;
507508
}
508509

@@ -1179,7 +1180,9 @@ static void handle_cp(u8 addr, u32 value, bool bigEndian) {
11791180
vf.attrs[GX_VA_POS].cnt = static_cast<GXCompCnt>(bp_get(value, 1, 0));
11801181
vf.attrs[GX_VA_POS].type = static_cast<GXCompType>(bp_get(value, 3, 1));
11811182
vf.attrs[GX_VA_POS].frac = static_cast<u8>(bp_get(value, 5, 4));
1182-
vf.attrs[GX_VA_NRM].cnt = static_cast<GXCompCnt>(bp_get(value, 1, 9));
1183+
const auto nrm_cnt = bp_get(value, 1, 9);
1184+
const auto nrm_nbt3 = bp_get(value, 1, 31);
1185+
vf.attrs[GX_VA_NRM].cnt = static_cast<GXCompCnt>(nrm_nbt3 ? GX_NRM_NBT3 : (nrm_cnt ? GX_NRM_NBT : GX_NRM_XYZ));
11831186
vf.attrs[GX_VA_NRM].type = static_cast<GXCompType>(bp_get(value, 3, 10));
11841187
if (vf.attrs[GX_VA_NRM].type == GX_U8 || vf.attrs[GX_VA_NRM].type == GX_S8) {
11851188
vf.attrs[GX_VA_NRM].frac = 6;
@@ -1280,8 +1283,10 @@ static void handle_xf(const u8* data, u32& pos, u32 size, bool bigEndian) {
12801283
u32 val = read_u32(xfData + i * 4, bigEndian);
12811284

12821285
// Skip scalar register writes that haven't changed (viewport/projection handled below)
1283-
if (reg <= 0x19 && val == g_gxState.xfRegCache[reg]) continue;
1284-
if (reg <= 0x19) g_gxState.xfRegCache[reg] = val;
1286+
if (reg <= 0x19 && val == g_gxState.xfRegCache[reg])
1287+
continue;
1288+
if (reg <= 0x19)
1289+
g_gxState.xfRegCache[reg] = val;
12851290

12861291
switch (reg) {
12871292
case 0x08:
@@ -1450,11 +1455,13 @@ static void handle_xf(const u8* data, u32& pos, u32 size, bool bigEndian) {
14501455
if (tgType == 0) {
14511456
tcg.type = proj ? GX_TG_MTX3x4 : GX_TG_MTX2x4;
14521457
} else if (tgType == 1) {
1453-
// Bump mapping
1458+
// Bump mapping: type encodes emboss light
14541459
tcg.type = static_cast<GXTexGenType>(bp_get(val, 3, 15) + 2);
14551460
} else if (tgType == 2 || tgType == 3) {
14561461
tcg.type = GX_TG_SRTG;
14571462
}
1463+
// Emboss source texcoord (bits 12-14); 0 for non-bump types
1464+
tcg.embossSrc = bp_get(val, 3, 12);
14581465

14591466
// Decode source from row
14601467
static const GXTexGenSrc rowToSrc[] = {GX_TG_POS, GX_TG_NRM, GX_TG_COLOR0, GX_TG_BINRM, GX_TG_TANGENT,
@@ -1516,10 +1523,10 @@ static u32 calculate_last_vtx_size(GXVtxFmt fmt) {
15161523
break;
15171524
}
15181525
case GX_INDEX8:
1519-
vtxSize += 1;
1526+
vtxSize += (i == GX_VA_NRM && vtxFmt.attrs[i].cnt == GX_NRM_NBT3) ? 3 : 1;
15201527
break;
15211528
case GX_INDEX16:
1522-
vtxSize += 2;
1529+
vtxSize += (i == GX_VA_NRM && vtxFmt.attrs[i].cnt == GX_NRM_NBT3) ? 6 : 2;
15231530
break;
15241531
}
15251532
}
@@ -1546,16 +1553,14 @@ static void handle_draw(u8 cmd, const u8* data, u32& pos, u32 size, bool bigEndi
15461553
pos += 2;
15471554

15481555
u32 vtxSize;
1549-
if (g_gxState.lastVtxFmt == fmt) LIKELY {
1550-
vtxSize = g_gxState.lastVtxSize;
1551-
} else UNLIKELY {
1552-
vtxSize = calculate_last_vtx_size(fmt);
1553-
}
1556+
if (g_gxState.lastVtxFmt == fmt)
1557+
LIKELY { vtxSize = g_gxState.lastVtxSize; }
1558+
else
1559+
UNLIKELY { vtxSize = calculate_last_vtx_size(fmt); }
15541560

15551561
u32 totalVtxBytes = vtxCount * vtxSize;
1556-
if (pos + totalVtxBytes > size) UNLIKELY {
1557-
handle_draw_overrun(totalVtxBytes, data, pos, size);
1558-
}
1562+
if (pos + totalVtxBytes > size)
1563+
UNLIKELY { handle_draw_overrun(totalVtxBytes, data, pos, size); }
15591564

15601565
// Push raw vertex data to buffer
15611566
gfx::Range vertRange = gfx::push_verts(data + pos, totalVtxBytes);

lib/gx/gx.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,9 @@ u8 comp_cnt_count(GXAttr attr, GXCompCnt cnt) noexcept {
720720
switch (cnt) {
721721
case GX_NRM_XYZ:
722722
return 3;
723+
case GX_NRM_NBT:
724+
case GX_NRM_NBT3:
725+
return 9;
723726
default:
724727
break;
725728
}
@@ -766,6 +769,7 @@ void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXV
766769
}
767770
const auto& attrFmt = vtxFmt.attrs[i];
768771
const auto cnt = comp_cnt_count(attr, attrFmt.cnt);
772+
const bool nbt3 = attr == GX_VA_NRM && attrFmt.cnt == GX_NRM_NBT3;
769773
mapping = AttrConfig{
770774
.attrType = static_cast<u8>(type),
771775
.cnt = cnt,
@@ -774,6 +778,7 @@ void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXV
774778
.stride = 0,
775779
.frac = attrFmt.frac,
776780
.le = false,
781+
.nbt3 = nbt3,
777782
};
778783
switch (type) {
779784
case GX_DIRECT: {
@@ -783,12 +788,12 @@ void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXV
783788
case GX_INDEX8:
784789
mapping.stride = g_gxState.arrays[i].stride;
785790
mapping.le = g_gxState.arrays[i].le;
786-
vtxOffset += 1;
791+
vtxOffset += nbt3 ? 3 : 1;
787792
break;
788793
case GX_INDEX16:
789794
mapping.stride = g_gxState.arrays[i].stride;
790795
mapping.le = g_gxState.arrays[i].le;
791-
vtxOffset += 2;
796+
vtxOffset += nbt3 ? 6 : 2;
792797
break;
793798
default:
794799
Log.fatal("populate_pipeline_config: Invalid vertex type {}", type);

lib/gx/gx.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ struct TcgConfig {
158158
GXTexMtx mtx = GX_IDENTITY;
159159
GXPTTexMtx postMtx = GX_PTIDENTITY;
160160
bool normalize = false;
161-
u8 _p1 = 0;
161+
u8 embossSrc = 0; // Emboss source texcoord (GX_TG_BUMP*)
162162
u8 _p2 = 0;
163163
u8 _p3 = 0;
164164

@@ -441,7 +441,7 @@ struct AttrConfig {
441441
u8 stride = 0; // Array stride
442442
u8 frac = 0;
443443
bool le = true;
444-
u8 _p1 = 0;
444+
bool nbt3 = false; // GX_NRM_NBT3
445445
};
446446
struct ShaderConfig {
447447
u8 fogType = GX_FOG_NONE;
@@ -492,6 +492,7 @@ struct BindGroupRanges {
492492
void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXVtxFmt fmt) noexcept;
493493
wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, ArrayRef<wgpu::VertexBufferLayout> vtxBuffers,
494494
wgpu::ShaderModule shader, const char* label) noexcept;
495+
std::string build_shader_source(const ShaderConfig& config) noexcept;
495496
wgpu::ShaderModule build_shader(const ShaderConfig& config) noexcept;
496497
GXBindGroups build_bind_groups(const ShaderInfo& info) noexcept;
497498

lib/gx/shader.cpp

Lines changed: 112 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -629,25 +629,35 @@ auto fetch_color_attr(const AttrConfig& mapping, std::string_view buf, std::stri
629629
}
630630
}
631631

632+
struct AttrAddress {
633+
std::string offs;
634+
std::string_view buf;
635+
bool le;
636+
};
637+
638+
auto attr_address(const AttrConfig& mapping, GXAttr attr, std::string_view vidx, u32 vtxStride, u32 dlExtra, u32 within)
639+
-> AttrAddress {
640+
const u32 dlOffset = mapping.offset + dlExtra;
641+
if (mapping.attrType == GX_INDEX8) {
642+
return {fmt::format("ubuf.array_start[{}] + raw_fetch_u8_1(&vbuf, ubuf.vtx_start + {} * {}u + {}u) * {}u + {}u",
643+
attr - GX_VA_POS, vidx, vtxStride, dlOffset, mapping.stride, within),
644+
"abuf"sv, mapping.le};
645+
}
646+
if (mapping.attrType == GX_INDEX16) {
647+
return {
648+
fmt::format("ubuf.array_start[{}] + raw_fetch_u16_1(&vbuf, ubuf.vtx_start + {} * {}u + {}u, false) * {}u + {}u",
649+
attr - GX_VA_POS, vidx, vtxStride, dlOffset, mapping.stride, within),
650+
"abuf"sv, mapping.le};
651+
}
652+
return {fmt::format("ubuf.vtx_start + {} * {}u + {}u", vidx, vtxStride, dlOffset + within), "vbuf"sv, false};
653+
}
654+
632655
auto attr_load(const ShaderConfig& config, GXAttr attr, std::string_view vidx) -> std::string {
633656
const auto& mapping = config.attrs[attr];
634657
if (mapping.attrType == GX_NONE) {
635658
return vtx_attr(config, attr);
636659
}
637-
auto buf = "vbuf"sv;
638-
auto offs = fmt::format("ubuf.vtx_start + {} * {}u + {}u", vidx, config.vtxStride, mapping.offset);
639-
auto le = false; // Vertex buffer is always big endian (for now)
640-
if (mapping.attrType == GX_INDEX8) {
641-
offs = fmt::format("ubuf.array_start[{}] + raw_fetch_u8_1(&{}, {}) * {}u", attr - GX_VA_POS, buf, offs,
642-
mapping.stride);
643-
buf = "abuf"sv;
644-
le = mapping.le;
645-
} else if (mapping.attrType == GX_INDEX16) {
646-
offs = fmt::format("ubuf.array_start[{}] + raw_fetch_u16_1(&{}, {}, {}) * {}u", attr - GX_VA_POS, buf, offs, le,
647-
mapping.stride);
648-
buf = "abuf"sv;
649-
le = mapping.le;
650-
}
660+
const auto [offs, buf, le] = attr_address(mapping, attr, vidx, config.vtxStride, 0u, 0u);
651661
switch (attr) {
652662
case GX_VA_PNMTXIDX:
653663
return fmt::format("(raw_fetch_u8_1(&{}, {}) / 3u)", buf, offs);
@@ -668,7 +678,12 @@ auto attr_load(const ShaderConfig& config, GXAttr attr, std::string_view vidx) -
668678
return posLoad;
669679
}
670680
case GX_VA_NRM:
671-
// TODO check for NBT/NBT3
681+
// NBT: normal only here; binormal/tangent loaded via attr_load_nbt_slice
682+
if (mapping.cnt > 3) {
683+
auto nrmMapping = mapping;
684+
nrmMapping.cnt = 3;
685+
return fetch_attr(nrmMapping, buf, offs, le);
686+
}
672687
return fetch_attr(mapping, buf, offs, le);
673688
case GX_VA_CLR0:
674689
case GX_VA_CLR1:
@@ -692,6 +707,42 @@ auto attr_load(const ShaderConfig& config, GXAttr attr, std::string_view vidx) -
692707
}
693708
}
694709

710+
enum class NbtSlice : u8 {
711+
N,
712+
B,
713+
T,
714+
};
715+
716+
auto attr_load_nbt_slice(const ShaderConfig& config, NbtSlice slice, std::string_view vidx) -> std::string {
717+
const auto& mapping = config.attrs[GX_VA_NRM];
718+
if (mapping.attrType == GX_NONE || mapping.cnt != 9) {
719+
Log.fatal("attr_load_nbt_slice: GX_TG_BINRM/TANGENT requires GX_NRM_NBT or GX_NRM_NBT3");
720+
}
721+
const auto sliceIdx = static_cast<u32>(slice);
722+
const auto compsize = comp_type_size(GX_VA_NRM, static_cast<GXCompType>(mapping.compType));
723+
u32 dlExtra = 0;
724+
if (mapping.nbt3) {
725+
if (mapping.attrType == GX_INDEX8) {
726+
dlExtra = sliceIdx;
727+
} else if (mapping.attrType == GX_INDEX16) {
728+
dlExtra = sliceIdx * 2u;
729+
}
730+
}
731+
const u32 within = sliceIdx * 3u * compsize;
732+
const auto [offs, buf, le] = attr_address(mapping, GX_VA_NRM, vidx, config.vtxStride, dlExtra, within);
733+
auto sliceMapping = mapping;
734+
sliceMapping.cnt = 3;
735+
return fetch_attr(sliceMapping, buf, offs, le);
736+
}
737+
738+
static constexpr std::string_view nbt_slice_local(NbtSlice slice) noexcept {
739+
return slice == NbtSlice::B ? "in_binrm" : "in_tangent";
740+
}
741+
742+
static constexpr bool is_emboss_texgen(GXTexGenType type) noexcept {
743+
return type >= GX_TG_BUMP0 && type <= GX_TG_BUMP7;
744+
}
745+
695746
auto lighting_func(const ShaderConfig& config, const ColorChannelConfig& cc, u8 i, bool alpha) -> std::string {
696747
std::string_view swizzle = alpha ? ".a"sv : ""sv;
697748
std::string outVar;
@@ -783,7 +834,7 @@ auto lighting_func(const ShaderConfig& config, const ColorChannelConfig& cc, u8
783834
alpha ? "a"sv : ""sv);
784835
}
785836

786-
wgpu::ShaderModule build_shader(const ShaderConfig& config) noexcept {
837+
std::string build_shader_source(const ShaderConfig& config) noexcept {
787838
ZoneScoped;
788839
const auto hash = xxh3_hash(config);
789840
const auto info = build_shader_info(config);
@@ -916,6 +967,24 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config) noexcept {
916967
vtxXfrAttrsPre += fmt::format("\n let {} = {};", vtx_attr(config, attr), attr_load(config, attr, vidxAttr));
917968
}
918969
}
970+
bool needsBinrm = false;
971+
bool needsTangent = false;
972+
for (int i = 0; i < info.sampledTexCoords.size(); ++i) {
973+
if (!info.sampledTexCoords.test(i)) {
974+
continue;
975+
}
976+
const bool emboss = is_emboss_texgen(config.tcgs[i].type);
977+
needsBinrm = needsBinrm || config.tcgs[i].src == GX_TG_BINRM || emboss;
978+
needsTangent = needsTangent || config.tcgs[i].src == GX_TG_TANGENT || emboss;
979+
}
980+
if (needsBinrm) {
981+
vtxXfrAttrsPre += fmt::format("\n let {} = {};", nbt_slice_local(NbtSlice::B),
982+
attr_load_nbt_slice(config, NbtSlice::B, vidxAttr));
983+
}
984+
if (needsTangent) {
985+
vtxXfrAttrsPre += fmt::format("\n let {} = {};", nbt_slice_local(NbtSlice::T),
986+
attr_load_nbt_slice(config, NbtSlice::T, vidxAttr));
987+
}
919988

920989
if (config.lineMode == 0) {
921990
vtxXfrAttrsPre += fmt::format(
@@ -1097,6 +1166,19 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config) noexcept {
10971166
} else {
10981167
vtxOutAttrs += fmt::format("\n @location({}) tex{}_uv: vec2f,", vtxOutIdx++, i);
10991168
}
1169+
if (is_emboss_texgen(tcg.type)) {
1170+
// Emboss bump: offset the source texcoord by the light projected onto tangent/binormal
1171+
const u32 lightIdx = tcg.type - GX_TG_BUMP0;
1172+
vtxXfrAttrs += fmt::format(
1173+
"\n let bump_ldir{0} = normalize(ubuf.lights[{1}].pos - mv_pos);"
1174+
"\n let bump_tan{0} = vec4f(in_tangent, 0.0) * ubuf.nrm_mtx[in_pnmtxidx];"
1175+
"\n let bump_bin{0} = vec4f(in_binrm, 0.0) * ubuf.nrm_mtx[in_pnmtxidx];"
1176+
"\n out.tex{0}_uv = tc{2}_proj.xy + vec2f(dot(bump_ldir{0}, bump_tan{0}), dot(bump_ldir{0}, "
1177+
"bump_bin{0}));",
1178+
i, lightIdx, tcg.embossSrc);
1179+
fragmentFnPre += fmt::format("\n var tex{0}_uv = in.tex{0}_uv.xy;", i);
1180+
continue;
1181+
}
11001182
if (tcg.src >= GX_TG_TEX0 && tcg.src <= GX_TG_TEX7) {
11011183
vtxXfrAttrs += fmt::format("\n var tc{} = vec4f({}, 1.0, 1.0);", i,
11021184
vtx_attr(config, GXAttr(GX_VA_TEX0 + (tcg.src - GX_TG_TEX0))));
@@ -1108,6 +1190,10 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config) noexcept {
11081190
vtxXfrAttrs += fmt::format("\n var tc{} = {};", i, vtx_attr(config, GX_VA_CLR0));
11091191
} else if (tcg.src == GX_TG_COLOR1) {
11101192
vtxXfrAttrs += fmt::format("\n var tc{} = {};", i, vtx_attr(config, GX_VA_CLR1));
1193+
} else if (tcg.src == GX_TG_BINRM) {
1194+
vtxXfrAttrs += fmt::format("\n var tc{} = vec4f({}, 1.0);", i, nbt_slice_local(NbtSlice::B));
1195+
} else if (tcg.src == GX_TG_TANGENT) {
1196+
vtxXfrAttrs += fmt::format("\n var tc{} = vec4f({}, 1.0);", i, nbt_slice_local(NbtSlice::T));
11111197
} else
11121198
UNLIKELY FATAL("unhandled tcg src {}", underlying(tcg.src));
11131199
if (tcg.type == GX_TG_MTX2x4 || tcg.type == GX_TG_MTX3x4) {
@@ -1468,8 +1554,9 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config) noexcept {
14681554
if (discard.constant == 1) {
14691555
fragmentFn += "\n // Alpha compare\n discard;";
14701556
} else if (discard.constant != 0) {
1471-
fragmentFn += "\n // Alpha compare"
1472-
"\n let alphaCompare = u32(round(clamp(prev.a, 0.0, 1.0) * 255.0));";
1557+
fragmentFn +=
1558+
"\n // Alpha compare"
1559+
"\n let alphaCompare = u32(round(clamp(prev.a, 0.0, 1.0) * 255.0));";
14731560
fragmentFn += fmt::format("\n if ({}) {{ discard; }}", discard.expr);
14741561
}
14751562
}
@@ -1844,6 +1931,13 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4f {{{6}{5}
18441931
Log.info("Generated shader: {}", shaderSource);
18451932
}
18461933

1934+
return shaderSource;
1935+
}
1936+
1937+
wgpu::ShaderModule build_shader(const ShaderConfig& config) noexcept {
1938+
ZoneScoped;
1939+
const auto shaderSource = build_shader_source(config);
1940+
const auto hash = xxh3_hash(config);
18471941
wgpu::ShaderSourceWGSL wgslDescriptor{};
18481942
wgslDescriptor.code = shaderSource.c_str();
18491943
const auto label = fmt::format("GX Shader {:x}", hash);

lib/gx/shader_info.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,18 @@ ShaderInfo build_shader_info(const ShaderConfig& config) noexcept {
257257
}
258258
}
259259

260+
// Emboss bump needs its source texcoord generated and a light enabled
261+
for (int i = 0; i < info.sampledTexCoords.size(); ++i) {
262+
if (!info.sampledTexCoords.test(i)) {
263+
continue;
264+
}
265+
const auto& tcg = config.tcgs[i];
266+
if (tcg.type >= GX_TG_BUMP0 && tcg.type <= GX_TG_BUMP7) {
267+
info.sampledTexCoords.set(tcg.embossSrc);
268+
info.lightingEnabled = true;
269+
}
270+
}
271+
260272
info.uniformSize += info.loadsTevReg.count() * sizeof(Vec4<float>);
261273
for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
262274
if (info.sampledColorChannels.test(i)) {

0 commit comments

Comments
 (0)