Shader optimizations
This commit is contained in:
@ -52,6 +52,8 @@ struct StackEntry
|
|||||||
int nodeIndex;
|
int nodeIndex;
|
||||||
float3 center;
|
float3 center;
|
||||||
float halfSize;
|
float halfSize;
|
||||||
|
float entry;
|
||||||
|
float exit;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Buffers
|
// Buffers
|
||||||
@ -167,125 +169,133 @@ void CSMain(uint3 id : SV_DispatchThreadID)
|
|||||||
root.nodeIndex = rootIndex;
|
root.nodeIndex = rootIndex;
|
||||||
root.center = rootCenter;
|
root.center = rootCenter;
|
||||||
root.halfSize = rootHalfSize;
|
root.halfSize = rootHalfSize;
|
||||||
stack[sp++] = root;
|
|
||||||
|
|
||||||
bool hasHit = false;
|
if (IntersectAABB_fast(rootCenter, rootHalfSize, b.origin, r.direction, invDir, root.entry, root.exit))
|
||||||
StackEntry bestEntry;
|
|
||||||
|
|
||||||
// traversal
|
|
||||||
while (sp > 0)
|
|
||||||
{
|
{
|
||||||
StackEntry e = stack[--sp];
|
stack[sp++] = root;
|
||||||
if (e.nodeIndex < 0 || e.nodeIndex >= nodeCount) continue;
|
|
||||||
|
|
||||||
LinearNode n = nodes[e.nodeIndex];
|
bool hasHit = false;
|
||||||
|
StackEntry bestEntry;
|
||||||
float tEntry, tExit;
|
|
||||||
if (!IntersectAABB_fast(e.center, e.halfSize, b.origin, r.direction, invDir, tEntry, tExit)) continue;
|
// traversal
|
||||||
|
while (sp > 0)
|
||||||
// prune with current best
|
|
||||||
if (tEntry >= outHit.maxDistance) continue;
|
|
||||||
|
|
||||||
if (n.isLeaf == 1u)
|
|
||||||
{
|
{
|
||||||
if (n.isOccupied == 1u)
|
StackEntry e = stack[--sp];
|
||||||
|
if (e.nodeIndex < 0 || e.nodeIndex >= nodeCount) continue;
|
||||||
|
|
||||||
|
LinearNode n = nodes[e.nodeIndex];
|
||||||
|
|
||||||
|
// prune with current best
|
||||||
|
if (e.entry >= outHit.maxDistance) continue;
|
||||||
|
|
||||||
|
if (n.isLeaf == 1u)
|
||||||
{
|
{
|
||||||
float tHit = max(tEntry, 0.0);
|
if (n.isOccupied == 1u)
|
||||||
if (tHit < outHit.maxDistance)
|
|
||||||
{
|
{
|
||||||
// found a closer hit — commit minimal info, defer heavy ops
|
float tHit = max(e.entry, 0.0);
|
||||||
hasHit = true;
|
if (tHit < outHit.maxDistance)
|
||||||
outHit.maxDistance = tHit;
|
{
|
||||||
bestEntry = e;
|
// found a closer hit — commit minimal info, defer heavy ops
|
||||||
|
hasHit = true;
|
||||||
|
outHit.maxDistance = tHit;
|
||||||
|
bestEntry = e;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
continue;
|
|
||||||
}
|
// Non-leaf: gather children that intersect and their entry (small array)
|
||||||
|
uint childMask = n.childMask;
|
||||||
// Non-leaf: gather children that intersect and their tEntry (small array)
|
// small local arrays
|
||||||
uint childMask = n.childMask;
|
int childIdx[8];
|
||||||
// small local arrays
|
float3 childCenter[8];
|
||||||
float childT[8];
|
float childEntry[8];
|
||||||
int childIdx[8];
|
float childExit[8];
|
||||||
float3 childCenter[8];
|
|
||||||
|
int childCount = 0;
|
||||||
int childCount = 0;
|
|
||||||
|
float childHalf = e.halfSize * 0.5;
|
||||||
float childHalf = e.halfSize * 0.5;
|
for (uint i = 0; i < 8; ++i)
|
||||||
for (uint i = 0; i < 8; ++i)
|
{
|
||||||
{
|
if (((childMask >> i) & 1u) == 0u) continue;
|
||||||
if (((childMask >> i) & 1u) == 0u) continue;
|
|
||||||
|
uint offset = countbits(childMask & ((1u << i) - 1u));
|
||||||
uint offset = countbits(childMask & ((1u << i) - 1u));
|
int cIndex = int(n.childBase + offset);
|
||||||
int cIndex = int(n.childBase + offset);
|
|
||||||
|
// compute child center
|
||||||
// compute child center
|
float3 offsetVec = childHalf * float3(
|
||||||
float3 offsetVec = childHalf * float3(
|
(i & 4u) ? 1.0 : -1.0,
|
||||||
(i & 4u) ? 1.0 : -1.0,
|
(i & 2u) ? 1.0 : -1.0,
|
||||||
(i & 2u) ? 1.0 : -1.0,
|
(i & 1u) ? 1.0 : -1.0
|
||||||
(i & 1u) ? 1.0 : -1.0
|
);
|
||||||
);
|
float3 cCenter = e.center + offsetVec;
|
||||||
float3 cCenter = e.center + offsetVec;
|
|
||||||
|
// pretest intersection with child AABB to get tEntry
|
||||||
// pretest intersection with child AABB to get tEntry
|
if (!IntersectAABB_fast(cCenter, childHalf, b.origin, r.direction, invDir, e.entry, e.exit)) continue;
|
||||||
float ctEntry, ctExit;
|
if (e.entry >= outHit.maxDistance) continue; // prune child if already farther than best hit
|
||||||
if (!IntersectAABB_fast(cCenter, childHalf, b.origin, r.direction, invDir, ctEntry, ctExit)) continue;
|
|
||||||
if (ctEntry >= outHit.maxDistance) continue; // prune child if already farther than best hit
|
// store for near-first push
|
||||||
|
childIdx[childCount] = cIndex;
|
||||||
// store for near-first push
|
childCenter[childCount] = cCenter;
|
||||||
childT[childCount] = ctEntry;
|
childEntry[childCount] = e.entry;
|
||||||
childIdx[childCount] = cIndex;
|
childExit[childCount] = e.exit;
|
||||||
childCenter[childCount] = cCenter;
|
// temporarily store center and half? we recompute on push
|
||||||
// temporarily store center and half? we recompute on push
|
childCount++;
|
||||||
childCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// sort children by childT ascending (insertion sort on at most 8 elements)
|
|
||||||
for (int a = 1; a < childCount; ++a)
|
|
||||||
{
|
|
||||||
float keyT = childT[a];
|
|
||||||
int keyIdx = childIdx[a];
|
|
||||||
float3 keyCenter = childCenter[a];
|
|
||||||
int j = a - 1;
|
|
||||||
while (j >= 0 && childT[j] > keyT) {
|
|
||||||
childT[j+1] = childT[j];
|
|
||||||
childIdx[j+1] = childIdx[j];
|
|
||||||
childCenter[j+1] = childCenter[j];
|
|
||||||
j--;
|
|
||||||
}
|
}
|
||||||
childT[j+1] = keyT;
|
|
||||||
childIdx[j+1] = keyIdx;
|
|
||||||
childCenter[j+1] = keyCenter;
|
|
||||||
}
|
|
||||||
|
|
||||||
StackEntry childEntry;
|
|
||||||
// push children in reverse order (so the nearest is popped first) if stack has room
|
|
||||||
for (int c = childCount - 1; c >= 0; --c)
|
|
||||||
{
|
|
||||||
// push
|
|
||||||
childEntry.nodeIndex = childIdx[c];
|
|
||||||
childEntry.center = childCenter[c];
|
|
||||||
childEntry.halfSize = childHalf;
|
|
||||||
stack[sp++] = childEntry;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hasHit)
|
|
||||||
{
|
|
||||||
// commit heavy ops only now
|
|
||||||
float tHit = outHit.maxDistance;
|
|
||||||
|
|
||||||
if( tHit > 0 )
|
|
||||||
{
|
|
||||||
float3 hitPos = b.origin + r.direction * tHit;
|
|
||||||
|
|
||||||
// closest point
|
|
||||||
outHit.origin = ClosestPointOnAABB(hitPos, bestEntry.center, bestEntry.halfSize);
|
|
||||||
|
|
||||||
outHit.origin += outHit.origin - bestEntry.center;
|
// sort children by childT ascending (insertion sort on at most 8 elements)
|
||||||
|
for (int a = 1; a < childCount; ++a)
|
||||||
|
{
|
||||||
|
int keyIdx = childIdx[a];
|
||||||
|
float3 keyCenter = childCenter[a];
|
||||||
|
float keyEntry = childEntry[a];
|
||||||
|
float keyExit = childExit[a];
|
||||||
|
|
||||||
|
int j = a - 1;
|
||||||
|
while (j >= 0 && childEntry[j] > keyEntry) {
|
||||||
|
childIdx[j+1] = childIdx[j];
|
||||||
|
childCenter[j+1] = childCenter[j];
|
||||||
|
childEntry[j+1] = childEntry[j];
|
||||||
|
childExit[j+1] = childExit[j];
|
||||||
|
j--;
|
||||||
|
}
|
||||||
|
childIdx[j+1] = keyIdx;
|
||||||
|
childCenter[j+1] = keyCenter;
|
||||||
|
childEntry[j+1] = keyEntry;
|
||||||
|
childExit[j+1] = keyExit;
|
||||||
|
}
|
||||||
|
|
||||||
// append final
|
StackEntry nextChildEntry;
|
||||||
hits.Append(outHit);
|
// push children in reverse order (so the nearest is popped first) if stack has room
|
||||||
|
for (int c = childCount - 1; c >= 0; --c)
|
||||||
|
{
|
||||||
|
// push
|
||||||
|
nextChildEntry.nodeIndex = childIdx[c];
|
||||||
|
nextChildEntry.center = childCenter[c];
|
||||||
|
nextChildEntry.halfSize = childHalf;
|
||||||
|
nextChildEntry.entry = childEntry[c];
|
||||||
|
nextChildEntry.exit = childExit[c];
|
||||||
|
stack[sp++] = nextChildEntry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasHit)
|
||||||
|
{
|
||||||
|
// commit heavy ops only now
|
||||||
|
float tHit = outHit.maxDistance;
|
||||||
|
|
||||||
|
if( tHit > 0 )
|
||||||
|
{
|
||||||
|
float3 hitPos = b.origin + r.direction * tHit;
|
||||||
|
|
||||||
|
// closest point
|
||||||
|
outHit.origin = ClosestPointOnAABB(hitPos, bestEntry.center, bestEntry.halfSize);
|
||||||
|
|
||||||
|
outHit.origin += outHit.origin - bestEntry.center;
|
||||||
|
|
||||||
|
// append final
|
||||||
|
hits.Append(outHit);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -24,7 +24,7 @@ public class Player : MonoBehaviour
|
|||||||
|
|
||||||
VoxelRaycastGPU.Ray[] rays = new VoxelRaycastGPU.Ray[rayCount];
|
VoxelRaycastGPU.Ray[] rays = new VoxelRaycastGPU.Ray[rayCount];
|
||||||
FillRaysArray(rays);
|
FillRaysArray(rays);
|
||||||
voxelManager.gpuRayCaster.Init(rayCount, rays, 3);
|
voxelManager.gpuRayCaster.Init(rayCount, rays, 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Cast( ref VoxelRaycastGPU.BatchData[] batchData, int batchCount, int iIteration )
|
void Cast( ref VoxelRaycastGPU.BatchData[] batchData, int batchCount, int iIteration )
|
||||||
|
|||||||
@ -95,18 +95,20 @@ public class VoxelRaycastGpuManager
|
|||||||
countBuffer.GetData(countArr);
|
countBuffer.GetData(countArr);
|
||||||
currentCount = countArr[0];
|
currentCount = countArr[0];
|
||||||
|
|
||||||
sw.Stop();
|
/**
|
||||||
|
sw.Stop();
|
||||||
VoxelRaycastGPU.BatchData[] hits = new VoxelRaycastGPU.BatchData[currentCount];
|
|
||||||
hitBuffer.GetData(hits, 0, 0, currentCount);
|
VoxelRaycastGPU.BatchData[] hits = new VoxelRaycastGPU.BatchData[currentCount];
|
||||||
for( int i = 0; i < hits.Length; i++ )
|
hitBuffer.GetData(hits, 0, 0, currentCount);
|
||||||
{
|
for( int i = 0; i < hits.Length; i++ )
|
||||||
GameObject sphere = GameObject.CreatePrimitive(PrimitiveType.Sphere);
|
{
|
||||||
sphere.transform.position = hits[i].origin;
|
GameObject sphere = GameObject.CreatePrimitive(PrimitiveType.Sphere);
|
||||||
sphere.transform.localScale = Vector3.one * 0.5f;
|
sphere.transform.position = hits[i].origin;
|
||||||
}
|
sphere.transform.localScale = Vector3.one * 0.5f;
|
||||||
|
}
|
||||||
sw.Start();
|
|
||||||
|
sw.Start();
|
||||||
|
*/
|
||||||
|
|
||||||
iteration++;
|
iteration++;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user