// (c) MX^Add

#include "MetaBalls/MetaBalls.h"
#include "MetaBalls/MetaBallsLockup.h"
#include "RendererTypes/Vector.h"
#include "Renderer/FrameBuffer.h"
#include "Renderer/SoftwareRasterizer.h"
#include "Renderer/Rasterizers.h"
#include "BaseTypes/PicoIntrinsics.h"

#include "Textures/Texture_MetaGradient.h"

#ifdef PI_PICO_TARGET
#include <hardware/interp.h> // Interpolators
#include <pico/time.h>
#undef MIN
#undef MAX
#endif

#define DO_SMOOTH_SHADING 1

constexpr sint32 MetaGridSize = 32;	// NOTE::Hard-coded, don't change without changes in code !
constexpr sint32 MetaGridHalf = MetaGridSize/2;
constexpr Scalar IsoLevel     = Scalar(32.0f);

static_assert(MetaGridSize*MetaGridSize*MetaGridSize <= ScratchPadSize, "To big!");

struct FEdgeConnection
{
	uint8 x, y;

			  FEdgeConnection()								     {}
	constexpr FEdgeConnection(uint8 _x, uint8 _y) : x(_x), y(_y) {}
};

static const FEdgeConnection EdgeConnections[12] =
{
	FEdgeConnection(0, 1), FEdgeConnection(1, 2), FEdgeConnection(2, 3), FEdgeConnection(3, 0), FEdgeConnection(4, 5), FEdgeConnection(5, 6), FEdgeConnection(6, 7), FEdgeConnection(7, 4), FEdgeConnection(0, 4), FEdgeConnection(1, 5), FEdgeConnection(2, 6), FEdgeConnection(3, 7)
};

static const FVector3D EdgeDirections[12] =
{
	FVector3D(1.0f, 0.0f, 0.0f), FVector3D(0.0f, 1.0f, 0.0f), FVector3D(-1.0f, 0.0f, 0.0f), FVector3D(0.0f, -1.0f, 0.0f),
	FVector3D(1.0f, 0.0f, 0.0f), FVector3D(0.0f, 1.0f, 0.0f), FVector3D(-1.0f, 0.0f, 0.0f), FVector3D(0.0f, -1.0f, 0.0f),
	FVector3D(0.0f, 0.0f, 1.0f), FVector3D(0.0f, 0.0f, 1.0f), FVector3D( 0.0f, 0.0f, 1.0f), FVector3D(0.0f,  0.0f, 1.0f)
};

static const FVector3D VertexOffsets[8] =
{
	FVector3D(0.0f, 0.0f, 0.0f), FVector3D(1.0f, 0.0f, 0.0f), FVector3D(1.0f, 1.0f, 0.0f), FVector3D(0.0f, 1.0f, 0.0f),
	FVector3D(0.0f, 0.0f, 1.0f), FVector3D(1.0f, 0.0f, 1.0f), FVector3D(1.0f, 1.0f, 1.0f), FVector3D(0.0f, 1.0f, 1.0f)
};

#ifdef PI_PICO_TARGET
__attribute__((noinline))
#endif
static void __not_in_flash_func(CalculateField)(const FVector3D& v0, const FVector3D& v1, const FVector3D& v2, Scalar r0, Scalar r1, Scalar r2, sint32 ZStart, sint32 ZEnd, uint32 GridOffset)
{
	__restrict uint8 *GridData = RasterizerScratchPad + GridOffset;

	for (sint32 Z = ZStart; Z < ZEnd; Z++)
	{
		for (sint32 Y = -MetaGridHalf; Y < MetaGridHalf; Y++)
		{
			for (sint32 X = -MetaGridHalf; X < MetaGridHalf; X++, GridData++)
			{
				FVector3D v = FVector3D(Scalar(X), Scalar(Y), Scalar(Z));

				FVector3D q0 = v + v0;
				FVector3D q1 = v + v1;
				FVector3D q2 = v + v2;

				Scalar Sum = r0 / (q0.x*q0.x + q0.y*q0.y + q0.z*q0.z);
					   Sum+= r1 / (q1.x*q1.x + q1.y*q1.y + q1.z*q1.z);
					   Sum+= r2 / (q2.x*q2.x + q2.y*q2.y + q2.z*q2.z);
				
				*GridData = uint8(MIN(Sum * 255.0f, 255.0f));
			}
		}
	}

	return;
}

static inline void MetaCoefs(Scalar Time, FVector3D& v0, FVector3D& v1, FVector3D& v2, Scalar& r0, Scalar& r1, Scalar &r2)
{
	v0 = FVector3D(FScalar::Sin(Time * 0.9f) * 7.0f, FScalar::Cos(Time * 1.2f) * 3.0f,				           FScalar::Sin(Time * 0.2f)  * 3.0f);
	v1 = FVector3D(FScalar::Sin(Time * 1.1f) * 2.0f, FScalar::Cos(Time * 0.6f) * 2.0f, Scalar(9.0f) - FScalar::Abs(FScalar::Sin(Time * 2.0f)) * 3.0f);
	v2 = FVector3D(FScalar::Sin(Time * 0.5f) * 5.0f, FScalar::Cos(Time * 1.2f) * 2.0f,				    	   FScalar::Cos(Time * 0.7f)  * 3.0f - 6.0f);
	r0 = FixedTwo + FScalar::Sin(Time * 0.75f) * 1.3f;
	r1 = FixedTwo + FScalar::Cos(Time * 3.31f) * 1.2f;
	r2 = FixedOne + FScalar::Abs(FScalar::Sin(Time * 0.92f));

	Scalar Beat = FScalar::Abs(FScalar::Sin(1.50f + Time * 5.0f));
		   Beat*= Beat * Beat;

	r0 += Beat * 0.66f;
	r1 += Beat * 0.66f;
	r2 += Beat * 0.66f;

	return;
}

void PrepareMetaGrid(uint16 Frame)
{
	Scalar Time = Scalar(sint32(Frame)) / 30;

	// Balls coefs

	FVector3D v0, v1, v2;
	Scalar    r0, r1, r2;
	MetaCoefs(Time, v0, v1, v2, r0, r1, r2);

	// Calculate field

	struct FLocalData
	{
		FVector3D v0, v1, v2;
		Scalar    r0, r1, r2;
	};

	FLocalData *Local = (FLocalData *)RasterizerGetInSpace();

	Local->v0 = v0;
	Local->v1 = v1;
	Local->v2 = v2;
	Local->r0 = r0;
	Local->r1 = r1;
	Local->r2 = r2;

	PushSecondCoreWork([](const void* Arg)
	{
		const FLocalData *Local = (FLocalData *)Arg;
		CalculateField(Local->v0, Local->v1, Local->v2, Local->r0, Local->r1, Local->r2, -MetaGridHalf, 0, 0);
		return;
	});

	CalculateField(v0, v1, v2, r0, r1, r2, 0, MetaGridHalf, MetaGridSize * MetaGridSize * MetaGridHalf);
	return;
}

#if DO_SMOOTH_SHADING
#ifdef PI_PICO_TARGET
__attribute__((always_inline))
#endif
static inline Scalar GetVolumeFactorS(const FVector3D &v, const FVector3D &v0, const FVector3D &v1, const FVector3D &v2, Scalar r0, Scalar r1, Scalar r2)
{ 
	FVector3D q0 = v + v0;
	FVector3D q1 = v + v1;
	FVector3D q2 = v + v2;

	Scalar Sum = r0 / (q0.x*q0.x + q0.y*q0.y + q0.z*q0.z);
		   Sum+= r1 / (q1.x*q1.x + q1.y*q1.y + q1.z*q1.z);
		   Sum+= r2 / (q2.x*q2.x + q2.y*q2.y + q2.z*q2.z);

	return Sum * 255.0f;
}

#ifdef PI_PICO_TARGET
__attribute__((always_inline))
#endif
static inline FVector4D GetVolumeFactor(const FVector3D &v, const FVector3D &v0, const FVector3D &v1, const FVector3D &v2, Scalar r0, Scalar r1, Scalar r2)
{
	return FVector4D(GetVolumeFactorS(FVector3D(v.x + 0.5f, v.y, v.z), v0, v1, v2, r0, r1, r2), GetVolumeFactorS(FVector3D(v.x, v.y + 0.5f, v.z), v0, v1, v2, r0, r1, r2), GetVolumeFactorS(FVector3D(v.x, v.y, v.z + 0.5f), v0, v1, v2, r0, r1, r2), GetVolumeFactorS(v, v0, v1, v2, r0, r1, r2));
}
#endif

void ExtractMetaTriangles(uint16 Frame, const FMatrix &CamProjMat, const FVector3D &LightDir, Scalar LightAmb)
{
	FSoftwareRasterizer *Rasterizer = FSoftwareRasterizer::Get();

	__restrict const uint8 *GridData = RasterizerScratchPad;

	#if DO_SMOOTH_SHADING
	FVertexXYXWF *Input = (FVertexXYXWF *)RasterizerGetInSpace();
	#else
	FVertexXYXW  *Input = (FVertexXYXW *)RasterizerGetInSpace();
	#endif
	Scalar        Cube[8];
	FVector3D     Vtx[3];

	const uint16 *Lockup = GetData_MetaGradient();

	#if DO_SMOOTH_SHADING
	Scalar Time = Scalar(sint32(Frame)) / 30;

	// Balls coefs

	FVector3D sv0, sv1, sv2;
	Scalar    sr0, sr1, sr2;
	MetaCoefs(Time, sv0, sv1, sv2, sr0, sr1, sr2);
	#endif

	for (sint32 Z = 0; Z < MetaGridSize-1; Z++)
	{
		for (sint32 Y = 0; Y < MetaGridSize-1; Y++)
		{
			for (sint32 X = 0; X < MetaGridSize-1; X++)
			{
				uint8 FlagIndex = 0;

				Cube[0] = GridData[(X+0) | ((Y+0) << 5) | ((Z+0) << 10)]; if (Cube[0] <= IsoLevel) FlagIndex |= 0x01;
				Cube[1] = GridData[(X+1) | ((Y+0) << 5) | ((Z+0) << 10)]; if (Cube[1] <= IsoLevel) FlagIndex |= 0x02;
				Cube[2] = GridData[(X+1) | ((Y+1) << 5) | ((Z+0) << 10)]; if (Cube[2] <= IsoLevel) FlagIndex |= 0x04;
				Cube[3] = GridData[(X+0) | ((Y+1) << 5) | ((Z+0) << 10)]; if (Cube[3] <= IsoLevel) FlagIndex |= 0x08;

				Cube[4] = GridData[(X+0) | ((Y+0) << 5) | ((Z+1) << 10)]; if (Cube[4] <= IsoLevel) FlagIndex |= 0x10;
				Cube[5] = GridData[(X+1) | ((Y+0) << 5) | ((Z+1) << 10)]; if (Cube[5] <= IsoLevel) FlagIndex |= 0x20;
				Cube[6] = GridData[(X+1) | ((Y+1) << 5) | ((Z+1) << 10)]; if (Cube[6] <= IsoLevel) FlagIndex |= 0x40;
				Cube[7] = GridData[(X+0) | ((Y+1) << 5) | ((Z+1) << 10)]; if (Cube[7] <= IsoLevel) FlagIndex |= 0x80;

				if (FlagIndex == 0 || FlagIndex == 255)
					continue; // No vertices ...

				const uint8 *TrisTab = TriangleConnectionTable[FlagIndex];

				FVector3D gc = FVector3D(Scalar(X - MetaGridHalf), Scalar(Y - MetaGridHalf), Scalar(Z - MetaGridHalf));
				uint8 TriCnt = 0;

				for (uint32 Prims = 0; Prims < 16; Prims++) 
				{
					const uint8 ID = TrisTab[Prims];
					if (ID == 0xFF)
						break;

					const FEdgeConnection ec = EdgeConnections[ID];
					const Scalar          v1 = Cube[ec.x];
					const Scalar          v2 = Cube[ec.y];
					const FVector3D       v  = VertexOffsets[ec.x] + EdgeDirections[ID] * ((IsoLevel - v1) / (v2 - v1 + 0.00001f)) + gc;

					Vtx[TriCnt]       = v;
					Input[TriCnt++].v = CamProjMat.TransformPositionFull(v);

					if (TriCnt == 3)
					{
						TriCnt = 0;

						#if DO_SMOOTH_SHADING

						Scalar Inv0W = 1.0f / Input[0].v.w;
						Scalar Inv1W = 1.0f / Input[1].v.w;
						Scalar Inv2W = 1.0f / Input[2].v.w;

						FVector2D a((Input[0].v.x * Inv0W) /* HalfScreenX + HalfScreenX*/, (Input[0].v.y * Inv0W) /* HalfScreenY - HalfScreenY*/);
						FVector2D b((Input[1].v.x * Inv1W) /* HalfScreenX + HalfScreenX*/, (Input[1].v.y * Inv1W) /* HalfScreenY - HalfScreenY*/);
						FVector2D c((Input[2].v.x * Inv2W) /* HalfScreenX + HalfScreenX*/, (Input[2].v.y * Inv2W) /* HalfScreenY - HalfScreenY*/);

						Scalar Area = (c.x - a.x) * (b.y - a.y) - (c.y - a.y) * (b.x - a.x);

						if (Area < 0.0f)
						{
							FVector3D nn;
							FVector4D s;

							s  = GetVolumeFactor(Vtx[0], sv0, sv1, sv2, sr0, sr1, sr2);
							nn = FVector3D(s.w - s.x, s.w - s.y, s.w - s.z); nn.Normalize();
							Input[0].f = MIN(31.5f, (LightAmb + MAX(0.0f, nn.Dot(LightDir))) * 31.5f);

							s  = GetVolumeFactor(Vtx[1], sv0, sv1, sv2, sr0, sr1, sr2);
							nn = FVector3D(s.w - s.x, s.w - s.y, s.w - s.z); nn.Normalize();
							Input[1].f = MIN(31.5f, (LightAmb + MAX(0.0f, nn.Dot(LightDir))) * 31.5f);

							s  = GetVolumeFactor(Vtx[2], sv0, sv1, sv2, sr0, sr1, sr2);
							nn = FVector3D(s.w - s.x, s.w - s.y, s.w - s.z); nn.Normalize();
							Input[2].f = MIN(31.5f, (LightAmb + MAX(0.0f, nn.Dot(LightDir))) * 31.5f);

							RasterizeTriangle3DVertexFactors(Lockup);
						}						

						#else

						FVector3D e0 = Vtx[1] - Vtx[0];
						FVector3D e1 = Vtx[2] - Vtx[1];
						FVector3D nn = e0.Cross(e1);

						nn.Normalize();

						uint8 Fct = MIN(0x1F, sint32((LightAmb + MAX(0.0f, nn.Dot(LightDir))) * 32.0f)); // NOTE::We are using special lockup

						RasterizeTriangle3DConstColor(Lockup[Fct]);

						#endif
					}
				}
			}
		}
	}

	return;
}
