292 lines
7.8 KiB
Plaintext
292 lines
7.8 KiB
Plaintext
// VoxelRaycastOctree.compute
|
||
#pragma kernel CSMain
|
||
|
||
// Match the C# struct layout exactly
|
||
struct LinearNode
|
||
{
|
||
// ---- Bloc 1 (16 bytes)
|
||
float penetrationFactor; // 4
|
||
float reflexionFactor; // 4
|
||
uint childMask; // 4
|
||
uint childBase; // 4
|
||
|
||
// ---- Bloc 7 (16 bytes)
|
||
uint isLeaf; // 4
|
||
uint isOccupied; // 4
|
||
uint pad0; // 4
|
||
uint pad1; // 4
|
||
};
|
||
|
||
// Ray and Hit definitions used in buffers
|
||
struct RayData
|
||
{
|
||
float pad;
|
||
float3 direction;
|
||
};
|
||
|
||
struct BatchData
|
||
{
|
||
float3 origin;
|
||
float maxDistance;
|
||
};
|
||
|
||
struct HitData
|
||
{
|
||
float penetrationFactor;
|
||
float reflexionFactor;
|
||
float lastDistance;
|
||
float pad0;
|
||
|
||
float3 origin; // float3 + 1 padding
|
||
float pad1;
|
||
float3 position; // float3 + 1 padding
|
||
float pad2;
|
||
|
||
float distance;
|
||
uint hit;
|
||
float2 pad3;
|
||
};
|
||
|
||
struct StackEntry
|
||
{
|
||
int nodeIndex;
|
||
float3 center;
|
||
float halfSize;
|
||
};
|
||
|
||
// Buffers
|
||
StructuredBuffer<LinearNode> nodes;
|
||
StructuredBuffer<RayData> rays;
|
||
StructuredBuffer<BatchData> batchDatas;
|
||
AppendStructuredBuffer<BatchData> hits;
|
||
|
||
RWStructuredBuffer<uint> hitCount;
|
||
|
||
int nodeCount;
|
||
|
||
int raysPerBatch;
|
||
float3 rootCenter;
|
||
float rootHalfSize;
|
||
int rootIndex;
|
||
|
||
int startIndexY;
|
||
|
||
float3 ClosestPointOnAABB(float3 hitPos, float3 boxCenter, float halfSize)
|
||
{
|
||
float3 minB = boxCenter - halfSize;
|
||
float3 maxB = boxCenter + halfSize;
|
||
|
||
// Clamp dans le cube
|
||
float3 q = clamp(hitPos, minB, maxB);
|
||
|
||
// On mesure la distance à chaque face
|
||
float3 distToMin = abs(q - minB);
|
||
float3 distToMax = abs(maxB - q);
|
||
|
||
// On garde la plus proche face sur chaque axe
|
||
float3 faceDist = min(distToMin, distToMax);
|
||
|
||
// Trouver l’axe le plus "libre" (le plus éloigné d’une face)
|
||
// -> on veut les deux plus proches axes => arête
|
||
float3 result = q;
|
||
|
||
// Compter combien d'axes sont "libres"
|
||
int numInside = 0;
|
||
[unroll]
|
||
for (int i = 0; i < 3; i++)
|
||
{
|
||
float dMin = distToMin[i];
|
||
float dMax = distToMax[i];
|
||
if (dMin < dMax)
|
||
result[i] = minB[i];
|
||
else
|
||
result[i] = maxB[i];
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
inline bool IntersectAABB_fast(
|
||
float3 center,
|
||
float halfSize,
|
||
float3 origin,
|
||
float3 dir,
|
||
float3 invDir, // pré-calculé : 1.0 / dir (avec protection contre 0)
|
||
out float tEntry,
|
||
out float tExit)
|
||
{
|
||
float3 minB = center - halfSize;
|
||
float3 maxB = center + halfSize;
|
||
|
||
// Calcul des distances d'entrée et sortie sur chaque axe
|
||
float3 t1 = (minB - origin) * invDir;
|
||
float3 t2 = (maxB - origin) * invDir;
|
||
|
||
// Trouver les valeurs d'entrée et sortie globales
|
||
float3 tMin = min(t1, t2);
|
||
float3 tMax = max(t1, t2);
|
||
|
||
// tEntry = le moment où on entre dans le cube
|
||
// tExit = le moment où on sort
|
||
tEntry = max(max(tMin.x, tMin.y), tMin.z);
|
||
tExit = min(min(tMax.x, tMax.y), tMax.z);
|
||
|
||
// Test d'intersection
|
||
return tExit >= max(tEntry, 0.0);
|
||
}
|
||
|
||
#define STACK_SIZE 64
|
||
|
||
[numthreads(8,8,1)] // keep or change to [numthreads(64,1,1)] and 1D dispatch
|
||
void CSMain(uint3 id : SV_DispatchThreadID)
|
||
{
|
||
uint rayIndex = id.x;
|
||
uint batchIndex = id.y + startIndexY;
|
||
if (rayIndex >= rays.Length || batchIndex >= batchDatas.Length) return;
|
||
|
||
RayData r = rays[rayIndex];
|
||
BatchData b = batchDatas[batchIndex];
|
||
|
||
// initialize outHit as the current max distance (no hit yet)
|
||
BatchData outHit;
|
||
outHit.origin = b.origin;
|
||
outHit.maxDistance = b.maxDistance;
|
||
|
||
// safe inverse direction
|
||
float eps = 1e-6;
|
||
float3 invDir;
|
||
invDir.x = (abs(r.direction.x) < eps) ? (r.direction.x >= 0 ? 1e8 : -1e8) : 1.0 / r.direction.x;
|
||
invDir.y = (abs(r.direction.y) < eps) ? (r.direction.y >= 0 ? 1e8 : -1e8) : 1.0 / r.direction.y;
|
||
invDir.z = (abs(r.direction.z) < eps) ? (r.direction.z >= 0 ? 1e8 : -1e8) : 1.0 / r.direction.z;
|
||
|
||
// small stack per thread
|
||
StackEntry stack[STACK_SIZE];
|
||
int sp = 0;
|
||
|
||
StackEntry root;
|
||
root.nodeIndex = rootIndex;
|
||
root.center = rootCenter;
|
||
root.halfSize = rootHalfSize;
|
||
stack[sp++] = root;
|
||
|
||
bool hasHit = false;
|
||
StackEntry bestEntry;
|
||
|
||
// traversal
|
||
while (sp > 0)
|
||
{
|
||
StackEntry e = stack[--sp];
|
||
if (e.nodeIndex < 0 || e.nodeIndex >= nodeCount) continue;
|
||
|
||
LinearNode n = nodes[e.nodeIndex];
|
||
|
||
float tEntry, tExit;
|
||
if (!IntersectAABB_fast(e.center, e.halfSize, b.origin, r.direction, invDir, tEntry, tExit)) continue;
|
||
|
||
// prune with current best
|
||
if (tEntry >= outHit.maxDistance) continue;
|
||
|
||
if (n.isLeaf == 1u)
|
||
{
|
||
if (n.isOccupied == 1u)
|
||
{
|
||
float tHit = max(tEntry, 0.0);
|
||
if (tHit < outHit.maxDistance)
|
||
{
|
||
// found a closer hit — commit minimal info, defer heavy ops
|
||
hasHit = true;
|
||
outHit.maxDistance = tHit;
|
||
bestEntry = e;
|
||
}
|
||
}
|
||
continue;
|
||
}
|
||
|
||
// Non-leaf: gather children that intersect and their tEntry (small array)
|
||
uint childMask = n.childMask;
|
||
// small local arrays
|
||
float childT[8];
|
||
int childIdx[8];
|
||
float3 childCenter[8];
|
||
|
||
int childCount = 0;
|
||
|
||
float childHalf = e.halfSize * 0.5;
|
||
for (uint i = 0; i < 8; ++i)
|
||
{
|
||
if (((childMask >> i) & 1u) == 0u) continue;
|
||
|
||
uint offset = countbits(childMask & ((1u << i) - 1u));
|
||
int cIndex = int(n.childBase + offset);
|
||
|
||
// compute child center
|
||
float3 offsetVec = childHalf * float3(
|
||
(i & 4u) ? 1.0 : -1.0,
|
||
(i & 2u) ? 1.0 : -1.0,
|
||
(i & 1u) ? 1.0 : -1.0
|
||
);
|
||
float3 cCenter = e.center + offsetVec;
|
||
|
||
// pretest intersection with child AABB to get tEntry
|
||
float ctEntry, ctExit;
|
||
if (!IntersectAABB_fast(cCenter, childHalf, b.origin, r.direction, invDir, ctEntry, ctExit)) continue;
|
||
if (ctEntry >= outHit.maxDistance) continue; // prune child if already farther than best hit
|
||
|
||
// store for near-first push
|
||
childT[childCount] = ctEntry;
|
||
childIdx[childCount] = cIndex;
|
||
childCenter[childCount] = cCenter;
|
||
// temporarily store center and half? we recompute on push
|
||
childCount++;
|
||
}
|
||
|
||
// sort children by childT ascending (insertion sort on at most 8 elements)
|
||
for (int a = 1; a < childCount; ++a)
|
||
{
|
||
float keyT = childT[a];
|
||
int keyIdx = childIdx[a];
|
||
float3 keyCenter = childCenter[a];
|
||
int j = a - 1;
|
||
while (j >= 0 && childT[j] > keyT) {
|
||
childT[j+1] = childT[j];
|
||
childIdx[j+1] = childIdx[j];
|
||
childCenter[j+1] = childCenter[j];
|
||
j--;
|
||
}
|
||
childT[j+1] = keyT;
|
||
childIdx[j+1] = keyIdx;
|
||
childCenter[j+1] = keyCenter;
|
||
}
|
||
|
||
StackEntry childEntry;
|
||
// push children in reverse order (so the nearest is popped first) if stack has room
|
||
for (int c = childCount - 1; c >= 0; --c)
|
||
{
|
||
// push
|
||
childEntry.nodeIndex = childIdx[c];
|
||
childEntry.center = childCenter[c];
|
||
childEntry.halfSize = childHalf;
|
||
stack[sp++] = childEntry;
|
||
}
|
||
}
|
||
|
||
if (hasHit)
|
||
{
|
||
// commit heavy ops only now
|
||
float tHit = outHit.maxDistance;
|
||
|
||
if( tHit > 0 )
|
||
{
|
||
float3 hitPos = b.origin + r.direction * tHit;
|
||
|
||
// closest point
|
||
outHit.origin = ClosestPointOnAABB(hitPos, bestEntry.center, bestEntry.halfSize);
|
||
|
||
outHit.origin += outHit.origin - bestEntry.center;
|
||
|
||
// append final
|
||
hits.Append(outHit);
|
||
}
|
||
}
|
||
}
|