Shader optimizations

This commit is contained in:
2025-10-23 07:55:29 +02:00
parent 4ef1b04156
commit 40a6d832bc
3 changed files with 133 additions and 121 deletions

View File

@ -52,6 +52,8 @@ struct StackEntry
int nodeIndex; int nodeIndex;
float3 center; float3 center;
float halfSize; float halfSize;
float entry;
float exit;
}; };
// Buffers // Buffers
@ -167,125 +169,133 @@ void CSMain(uint3 id : SV_DispatchThreadID)
root.nodeIndex = rootIndex; root.nodeIndex = rootIndex;
root.center = rootCenter; root.center = rootCenter;
root.halfSize = rootHalfSize; root.halfSize = rootHalfSize;
stack[sp++] = root;
bool hasHit = false; if (IntersectAABB_fast(rootCenter, rootHalfSize, b.origin, r.direction, invDir, root.entry, root.exit))
StackEntry bestEntry;
// traversal
while (sp > 0)
{ {
StackEntry e = stack[--sp]; stack[sp++] = root;
if (e.nodeIndex < 0 || e.nodeIndex >= nodeCount) continue;
LinearNode n = nodes[e.nodeIndex]; bool hasHit = false;
StackEntry bestEntry;
float tEntry, tExit;
if (!IntersectAABB_fast(e.center, e.halfSize, b.origin, r.direction, invDir, tEntry, tExit)) continue; // traversal
while (sp > 0)
// prune with current best
if (tEntry >= outHit.maxDistance) continue;
if (n.isLeaf == 1u)
{ {
if (n.isOccupied == 1u) StackEntry e = stack[--sp];
if (e.nodeIndex < 0 || e.nodeIndex >= nodeCount) continue;
LinearNode n = nodes[e.nodeIndex];
// prune with current best
if (e.entry >= outHit.maxDistance) continue;
if (n.isLeaf == 1u)
{ {
float tHit = max(tEntry, 0.0); if (n.isOccupied == 1u)
if (tHit < outHit.maxDistance)
{ {
// found a closer hit — commit minimal info, defer heavy ops float tHit = max(e.entry, 0.0);
hasHit = true; if (tHit < outHit.maxDistance)
outHit.maxDistance = tHit; {
bestEntry = e; // found a closer hit — commit minimal info, defer heavy ops
hasHit = true;
outHit.maxDistance = tHit;
bestEntry = e;
}
} }
continue;
} }
continue;
} // Non-leaf: gather children that intersect and their entry (small array)
uint childMask = n.childMask;
// Non-leaf: gather children that intersect and their tEntry (small array) // small local arrays
uint childMask = n.childMask; int childIdx[8];
// small local arrays float3 childCenter[8];
float childT[8]; float childEntry[8];
int childIdx[8]; float childExit[8];
float3 childCenter[8];
int childCount = 0;
int childCount = 0;
float childHalf = e.halfSize * 0.5;
float childHalf = e.halfSize * 0.5; for (uint i = 0; i < 8; ++i)
for (uint i = 0; i < 8; ++i) {
{ if (((childMask >> i) & 1u) == 0u) continue;
if (((childMask >> i) & 1u) == 0u) continue;
uint offset = countbits(childMask & ((1u << i) - 1u));
uint offset = countbits(childMask & ((1u << i) - 1u)); int cIndex = int(n.childBase + offset);
int cIndex = int(n.childBase + offset);
// compute child center
// compute child center float3 offsetVec = childHalf * float3(
float3 offsetVec = childHalf * float3( (i & 4u) ? 1.0 : -1.0,
(i & 4u) ? 1.0 : -1.0, (i & 2u) ? 1.0 : -1.0,
(i & 2u) ? 1.0 : -1.0, (i & 1u) ? 1.0 : -1.0
(i & 1u) ? 1.0 : -1.0 );
); float3 cCenter = e.center + offsetVec;
float3 cCenter = e.center + offsetVec;
// pretest intersection with child AABB to get tEntry
// pretest intersection with child AABB to get tEntry if (!IntersectAABB_fast(cCenter, childHalf, b.origin, r.direction, invDir, e.entry, e.exit)) continue;
float ctEntry, ctExit; if (e.entry >= outHit.maxDistance) continue; // prune child if already farther than best hit
if (!IntersectAABB_fast(cCenter, childHalf, b.origin, r.direction, invDir, ctEntry, ctExit)) continue;
if (ctEntry >= outHit.maxDistance) continue; // prune child if already farther than best hit // store for near-first push
childIdx[childCount] = cIndex;
// store for near-first push childCenter[childCount] = cCenter;
childT[childCount] = ctEntry; childEntry[childCount] = e.entry;
childIdx[childCount] = cIndex; childExit[childCount] = e.exit;
childCenter[childCount] = cCenter; // temporarily store center and half? we recompute on push
// temporarily store center and half? we recompute on push childCount++;
childCount++;
}
// sort children by childT ascending (insertion sort on at most 8 elements)
for (int a = 1; a < childCount; ++a)
{
float keyT = childT[a];
int keyIdx = childIdx[a];
float3 keyCenter = childCenter[a];
int j = a - 1;
while (j >= 0 && childT[j] > keyT) {
childT[j+1] = childT[j];
childIdx[j+1] = childIdx[j];
childCenter[j+1] = childCenter[j];
j--;
} }
childT[j+1] = keyT;
childIdx[j+1] = keyIdx;
childCenter[j+1] = keyCenter;
}
StackEntry childEntry;
// push children in reverse order (so the nearest is popped first) if stack has room
for (int c = childCount - 1; c >= 0; --c)
{
// push
childEntry.nodeIndex = childIdx[c];
childEntry.center = childCenter[c];
childEntry.halfSize = childHalf;
stack[sp++] = childEntry;
}
}
if (hasHit)
{
// commit heavy ops only now
float tHit = outHit.maxDistance;
if( tHit > 0 )
{
float3 hitPos = b.origin + r.direction * tHit;
// closest point
outHit.origin = ClosestPointOnAABB(hitPos, bestEntry.center, bestEntry.halfSize);
outHit.origin += outHit.origin - bestEntry.center; // sort children by childT ascending (insertion sort on at most 8 elements)
for (int a = 1; a < childCount; ++a)
{
int keyIdx = childIdx[a];
float3 keyCenter = childCenter[a];
float keyEntry = childEntry[a];
float keyExit = childExit[a];
int j = a - 1;
while (j >= 0 && childEntry[j] > keyEntry) {
childIdx[j+1] = childIdx[j];
childCenter[j+1] = childCenter[j];
childEntry[j+1] = childEntry[j];
childExit[j+1] = childExit[j];
j--;
}
childIdx[j+1] = keyIdx;
childCenter[j+1] = keyCenter;
childEntry[j+1] = keyEntry;
childExit[j+1] = keyExit;
}
// append final StackEntry nextChildEntry;
hits.Append(outHit); // push children in reverse order (so the nearest is popped first) if stack has room
for (int c = childCount - 1; c >= 0; --c)
{
// push
nextChildEntry.nodeIndex = childIdx[c];
nextChildEntry.center = childCenter[c];
nextChildEntry.halfSize = childHalf;
nextChildEntry.entry = childEntry[c];
nextChildEntry.exit = childExit[c];
stack[sp++] = nextChildEntry;
}
}
if (hasHit)
{
// commit heavy ops only now
float tHit = outHit.maxDistance;
if( tHit > 0 )
{
float3 hitPos = b.origin + r.direction * tHit;
// closest point
outHit.origin = ClosestPointOnAABB(hitPos, bestEntry.center, bestEntry.halfSize);
outHit.origin += outHit.origin - bestEntry.center;
// append final
hits.Append(outHit);
}
} }
} }
} }

View File

@ -24,7 +24,7 @@ public class Player : MonoBehaviour
VoxelRaycastGPU.Ray[] rays = new VoxelRaycastGPU.Ray[rayCount]; VoxelRaycastGPU.Ray[] rays = new VoxelRaycastGPU.Ray[rayCount];
FillRaysArray(rays); FillRaysArray(rays);
voxelManager.gpuRayCaster.Init(rayCount, rays, 3); voxelManager.gpuRayCaster.Init(rayCount, rays, 5);
} }
void Cast( ref VoxelRaycastGPU.BatchData[] batchData, int batchCount, int iIteration ) void Cast( ref VoxelRaycastGPU.BatchData[] batchData, int batchCount, int iIteration )

View File

@ -95,18 +95,20 @@ public class VoxelRaycastGpuManager
countBuffer.GetData(countArr); countBuffer.GetData(countArr);
currentCount = countArr[0]; currentCount = countArr[0];
sw.Stop(); /**
sw.Stop();
VoxelRaycastGPU.BatchData[] hits = new VoxelRaycastGPU.BatchData[currentCount];
hitBuffer.GetData(hits, 0, 0, currentCount); VoxelRaycastGPU.BatchData[] hits = new VoxelRaycastGPU.BatchData[currentCount];
for( int i = 0; i < hits.Length; i++ ) hitBuffer.GetData(hits, 0, 0, currentCount);
{ for( int i = 0; i < hits.Length; i++ )
GameObject sphere = GameObject.CreatePrimitive(PrimitiveType.Sphere); {
sphere.transform.position = hits[i].origin; GameObject sphere = GameObject.CreatePrimitive(PrimitiveType.Sphere);
sphere.transform.localScale = Vector3.one * 0.5f; sphere.transform.position = hits[i].origin;
} sphere.transform.localScale = Vector3.one * 0.5f;
}
sw.Start();
sw.Start();
*/
iteration++; iteration++;