// (c) MX^Add

#include "Renderer/SoftwareRasterizer.h"
#include "Renderer/FrameBuffer.h"
#include "RendererTypes/Scene.h"

static const sint32 ColorFactors[32] = { 0x0, 0x00001999, 0x000019ED, 0x00001AE9, 0x00001C8D, 0x00001ED7, 0x000021C5, 0x00002557, 0x00002989, 0x00002E5B, 0x000033CA, 0x000039D4, 0x0000407A, 0x000047BF, 0x00004FA6, 0x00005839, 0x00006185, 0x00006B9E, 0x000076A0, 0x000082B1, 0x00009000, 0x00009EC9, 0x0000AF53, 0x0000C1F6, 0x0000D715, 0x0000EF25, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF };

#ifdef DEMO_EDITOR
extern uint32 DebugTrissPass;
#endif

FSoftwareRasterizer* FSoftwareRasterizer::Get()
{
	static FSoftwareRasterizer Rasterizer;
	return &Rasterizer;
}

FSoftwareRasterizer::FSoftwareRasterizer()
{
    LockupColor = 0xFFFFFFFF;
	PrepareLockup(0);
	return;
}

FSoftwareRasterizer::~FSoftwareRasterizer()
{
	return;
}

const uint16 *FSoftwareRasterizer::EmptyTextureCallback(uint8 MaterialID, uint16 *TextureCache) const
{
	for (sint32 y = 0; y < TextureCacheSize; y++)
	{
		for (sint32 x = 0; x < TextureCacheSize; x++)
		{
			uint16 c = x ^ y;
			TextureCache[x + y * TextureCacheSize] = c | (c << (5+1)) | (c << 11);
		}
	}

	return nullptr;
}

void FSoftwareRasterizer::PrepareLockup(uint16 Color, bool Flush) const
{
	if (LockupColor == Color)
		return;

	if (Flush)
		FlushWorkItems();

	LockupColor = Color;

	const uint32 R = Color >> 11;
	const uint32 G = (Color >> 5) & 0x3F;
	const uint32 B = Color & 0x1F;

	const uint32 r = R > 1 ? R - 1 : R;
	const uint32 g = G > 2 ? G - 2 : G;
	const uint32 b = B > 1 ? B - 1 : B;

	for (uint8 i = 0; i < 32; i++)
	{
		uint32 d = ColorFactors[i];

		TextureCache[i     ] = (((d * MIN(0x1Fu, R + (i >> 2))) >> 16) << 11) |
							   (((d * MIN(0x3Fu, G + (i >> 1))) >> 16) <<  5) |
							   (((d * MIN(0x1Fu, B + (i >> 2))) >> 16)      ) ;

		TextureCache[i + 32] = (((d * MIN(0x1Fu, r + (i >> 2))) >> 16) << 11) |
							   (((d * MIN(0x3Fu, g + (i >> 1))) >> 16) <<  5) |
							   (((d * MIN(0x1Fu, b + (i >> 2))) >> 16)      ) ;
	}

	return;
}

void FSoftwareRasterizer::PrepareTexture(uint8 UID, bool Flush) const
{
	uint32 LockMask = 0xFFFF1000 | uint32(UID);

	if (LockupColor == LockMask)
		return;

	if (Flush)
		FlushWorkItems();

	if (TextureCacheCallback)
		TextureCachePtr = TextureCacheCallback(CurrentFrame, UID, TextureCache);
	else
		TextureCachePtr = EmptyTextureCallback(UID, TextureCache);

	if (TextureCachePtr == nullptr)
		TextureCachePtr = TextureCache;

	LockupColor = LockMask;

	return;
}

void FSoftwareRasterizer::SetDefaultState()
{
	ClearDepth = true;
	CameraUpdateCallback = nullptr;
	CameraUpdatePlanesCallback = nullptr;
	ColorClearCallback = nullptr;
	TextureCacheCallback = nullptr;
	BlendSlotRenderCallback = nullptr;
	PostDrawCallback = nullptr;
	TextureCachePtr = nullptr;
	PostBeforeTranslucent = true;
	ClearColor = true;
	ClearColorVal = 0;
	Wireframe = false;
	UnlitMode = false;
	DirectionalLightEnabled = true;
	DirectionalLightVector = FVector3D(FixedZero, FixedOne, FixedZero);
	DirectionalLightIntensity = FixedOne;
	PointLightEnabled = false;
	PointLightPosition = FVector3D(FixedZero, FixedZero, FixedZero);
	PointLightRange = FixedOne;
	PointLightInvRange = FixedOne;
	PointLightIntensity = FixedOne;
	LightAmbientFactor = Scalar(0.125f);

	return;
}

void FSoftwareRasterizer::SetState_CameraCallback(void (*CamCall)(uint16 Frame, FVector3D& CamPos, FVector3D& CamDir, FVector3D& CamUp, Scalar& Aspect, uint8& FOV, uint16& Near, uint16& Far))
{
	CameraUpdateCallback = CamCall;
	return;
}

void FSoftwareRasterizer::SetState_CameraPlanesCallback(void (*CamPCall)(uint16 Frame, FVector4D* Planes, const FMatrix& CamMat, const FMatrix& ProjMat, const FMatrix& CamProjMat))
{
	CameraUpdatePlanesCallback = CamPCall;
	return;
}

void FSoftwareRasterizer::SetState_SetTextureCallback(const uint16 * (*TextureCallback)(uint16 Frame, uint8 MID, uint16 *Dst))
{
	TextureCacheCallback = TextureCallback;
	return;
}

void FSoftwareRasterizer::SetState_SetBlendSlotRenderCallback(void (*BsCallback)(uint16 Frame, FVector3D& CamPos, FVector3D& CamDir, FVector3D& CamUp, const uint8* Base, const FMatrix& LocalToWorld, const FMesh* Mesh))
{
	BlendSlotRenderCallback = BsCallback;
	return;
}

void FSoftwareRasterizer::SetState_ClearColor(bool _ClearColor, uint16 ClearColorValue, void (*ClearCallback)(uint16 Frame, const FVector3D &CamDir, const FVector3D &CamUp, const FMatrix &CamMat, const FMatrix &ProjMat, const FMatrix &CamProjMat))
{
	ClearColor = _ClearColor;
	ClearColorVal = ClearColorValue;
	ColorClearCallback = ClearCallback;
	return;
}

void FSoftwareRasterizer::SetState_PostRender(bool BeforeTrans, void (*PostCallback)(uint16 Frame, const FVector3D &CamDir, const FVector3D &CamUp, const FMatrix &CamMat, const FMatrix &ProjMat, const FMatrix &CamProjMat))
{
	PostBeforeTranslucent = BeforeTrans;
	PostDrawCallback = PostCallback;
	return;
}

void FSoftwareRasterizer::SetState_ClearDepth(bool _ClearDepth)
{
	ClearDepth = _ClearDepth;
	return;
}

void FSoftwareRasterizer::SetState_RenderMode(bool WireFrameMode, bool _UnlitMode)
{
	Wireframe = WireFrameMode;
	UnlitMode = _UnlitMode;
	return;
}

void FSoftwareRasterizer::SetState_AmbientLight(Scalar Factor)
{
	LightAmbientFactor = Factor;
	return;
}

void FSoftwareRasterizer::SetState_DirectionalLight(bool Enabled, const FVector3D &LightDirection, Scalar Intensity)
{
	DirectionalLightEnabled = Enabled;
	DirectionalLightVector = LightDirection;
	DirectionalLightIntensity = Intensity;
	return;
}

void FSoftwareRasterizer::SetState_PointLightLight(bool Enabled, const FVector3D &LightPosition, Scalar Range, Scalar Intensity)
{
	PointLightEnabled = Enabled;
	PointLightPosition = LightPosition;
	PointLightRange = Range;
	PointLightInvRange = FixedOne / PointLightRange;
	PointLightIntensity = Intensity;
	return;
}

void FSoftwareRasterizer::DoColorClear() const
{
	if (ColorClearDone == 1)
		return;

	if (ColorClearDone == 0)
	{
		TriggerColorWriteBarrier();

		if (ColorClearCallback)
		{
			ColorClearCallback(CurrentFrame, CameraDirection, CameraUp, CameraMatrix, ProjectionMatrix, CameraProjectionMatrix);
		}
		else
		{
			const uint32 Value = uint32(ClearColorVal) | (uint32(ClearColorVal) << 16);
			__restrict uint32 *g = (uint32*)RasterizerFramebuffer;

			for (uint32 i = 0; i < (RasterizerSizeX * RasterizerSizeY) / 2; i++, g++)
				*g = Value;
		}

		
	}
	else
	{
		TriggerColorWriteBarrier();
	}

	ColorClearDone = 1;

	return;
}

bool FSoftwareRasterizer::ItetateSceneForCamera(const FGizmo *Parent, const FGizmo *Node)
{
	if (CameraPtr)
		return false; // Camera is found, do not continue...

	const uint8 *Base = ((uint8 *)ScenePtr) + sizeof(FScene);

	if (Node->ID == CameraID && Node->Type == FGizmo::ENodeType::Camera)
	{
		CameraPtr		  = (const FCamera *)Node;
		CameraWorldMatrix = Node->GetLocalTransform(Base, CurrentFrame);
		CameraBck		  = Parent;

		return false; // Camera is found, do not continue...
	}

	if (Node == CameraBck)
	{
		CameraBck		  = Parent;
		CameraWorldMatrix = CameraWorldMatrix * Node->GetLocalTransform(Base, CurrentFrame); // World = Local * Parent
	}

	if (Node->Childs != 0)
	{
		Parent = Node;

		const uint8   ChildsCount = Base[uint32(Parent->Childs) << 3];
		const uint16 *ChildsOffs  = (const uint16 *)&Base[(uint32(Parent->Childs) << 3)+2];
		for (uint32 i = 0; i < ChildsCount; i++)
		{
			const FGizmo *Child = (const FGizmo *)(Base + (uint32(ChildsOffs[i]) << 3));
			if (!ItetateSceneForCamera(Parent, Child))
			{
				if (Node == CameraBck)
				{
					CameraBck		  = Parent;
					CameraWorldMatrix = CameraWorldMatrix * Node->GetLocalTransform(Base, CurrentFrame); // World = Local * Parent
				}

				return false;
			}
		}
	}

	if (Node == CameraBck)
	{
		CameraBck		  = Parent;
		CameraWorldMatrix = CameraWorldMatrix * Node->GetLocalTransform(Base, CurrentFrame); // World = Local * Parent
	}

	return true;
}

void FSoftwareRasterizer::ItetateSceneForRendering(const FGizmo* Parent, FMatrix ToWorld, const FGizmo* Node)
{
	if (Node->Childs == 0 && Node->Type != FGizmo::ENodeType::Mesh)
		return;

	const uint8 *Base = ((uint8 *)ScenePtr) + sizeof(FScene);

	ToWorld = Node->GetLocalTransform(Base, CurrentFrame) * ToWorld;

	if (Node->Type == FGizmo::ENodeType::Mesh)
	{
		const FMesh *Mesh = (const FMesh *)Node;
		RenderMesh(Base, ToWorld, Mesh);
	}

	if (Node->Childs != 0)
	{
		Parent = Node;

		const uint8   ChildsCount = Base[uint32(Parent->Childs) << 3];
		const uint16 *ChildsOffs  = (const uint16 *)&Base[(uint32(Parent->Childs) << 3) + 2];
		for (uint32 i = 0; i < ChildsCount; i++)
		{
			const FGizmo *Child = (const FGizmo *)(Base + (uint32(ChildsOffs[i]) << 3));
			if (Child->Childs == 0 && Child->Type != FGizmo::ENodeType::Mesh)
				continue;
			ItetateSceneForRendering(Parent, ToWorld, Child);
		}
	}

	return;
}

bool FSoftwareRasterizer::EvaluateCamera()
{
	CameraBck = nullptr;

	ItetateSceneForCamera(nullptr, ScenePtr->GetRoot());
	
	CameraBck = nullptr;

	if (CameraPtr)
	{
		// CameraWorldMatrix should be OK

		Scalar AspectRatio = Scalar(RasterizerSizeX) / Scalar(RasterizerSizeY);
		uint8  FOV		   = CameraPtr->FOV;
		uint16 Near		   = CameraPtr->Near;
		uint16 Far         = CameraPtr->Far;

		CameraPosition  = FVector3D(CameraWorldMatrix.m[3][0], CameraWorldMatrix.m[3][1], CameraWorldMatrix.m[3][2]);
		CameraDirection = CameraWorldMatrix.TransformNormal(FVector3D(1, 0, 0));
		CameraUp        = CameraWorldMatrix.TransformNormal(FVector3D(0, 1, 0));

		if (CameraUpdateCallback)
			CameraUpdateCallback(CurrentFrame, CameraPosition, CameraDirection, CameraUp, AspectRatio, FOV, Near, Far);

		CameraMatrix    = FMatrix::MakeLookAt(CameraPosition, CameraPosition + CameraDirection, CameraUp);
		ProjectionMatrix= FMatrix::MakePerspectiveProjection(FOV, Near, Far, AspectRatio);

		CameraProjectionMatrix = CameraMatrix * ProjectionMatrix;

		FrustumPlanes[0] = CameraProjectionMatrix.Col(3) + CameraProjectionMatrix.Col(0); // Left
		FrustumPlanes[1] = CameraProjectionMatrix.Col(3) - CameraProjectionMatrix.Col(0); // Right
		FrustumPlanes[2] = CameraProjectionMatrix.Col(3) + CameraProjectionMatrix.Col(1); // Bottom
		FrustumPlanes[3] = CameraProjectionMatrix.Col(3) - CameraProjectionMatrix.Col(1); // Top
		FrustumPlanes[4] = CameraProjectionMatrix.Col(3) + CameraProjectionMatrix.Col(2); // Near
		FrustumPlanes[5] = CameraProjectionMatrix.Col(3) - CameraProjectionMatrix.Col(2); // Far

		for (uint8 i = 0; i < 6; i++)
			FrustumPlanes[i].NormalizePlane();

		if (CameraUpdatePlanesCallback)
			CameraUpdatePlanesCallback(CurrentFrame, FrustumPlanes, CameraMatrix, ProjectionMatrix, CameraProjectionMatrix);

		return true;
	}

	return false;
}

void FSoftwareRasterizer::Render(const FScene *_Scene, uint16 Frame, uint16 _CameraID)
{
	CurrentFrame	    = Frame;
	CameraID		    = _CameraID;
	ColorClearDone	    = ClearColor ? 0 : 2;
	ScenePtr		    = _Scene;
	CameraPtr		    = nullptr;
	LockupColor		    = 0xFFFFFFFF;
	TextureCachePtr		= nullptr;
	NumOfAdditiveMeshes = 0;
	
	//
	// Clear depth at start
	//
	if (ClearDepth)
		memset(RasterizerDepthbuffer, -1, RasterizerSizeX*RasterizerSizeY*sizeof(uint16));

	//
	// Find and evaluate camera
	//
	if (!EvaluateCamera())
	{
		CameraWorldMatrix.Identity();
		CameraMatrix.Identity();
		ProjectionMatrix.Identity();
		CameraProjectionMatrix.Identity();

		CameraPosition  = FVector3D(FixedZero, FixedZero, FixedZero);
		CameraDirection = FVector3D(FixedOne, FixedZero, FixedZero);
		CameraUp		= FVector3D(FixedZero, FixedOne, FixedZero);

		DoColorClear();
		return;
	}

	//
	// Evaluate and render whole scene
	//
	FMatrix SceneMatrix; SceneMatrix.Identity();
	#if ((defined VISIBILITY_CHECKER) && VISIBILITY_CHECKER)
	extern bool __PauseVisibilityReporting;
	__PauseVisibilityReporting = true;
	#endif
	ItetateSceneForRendering(nullptr, SceneMatrix, ScenePtr->GetRoot());
	FlushWorkItems();
	#if ((defined VISIBILITY_CHECKER) && VISIBILITY_CHECKER)
	__PauseVisibilityReporting = false;
	ItetateSceneForRendering(nullptr, SceneMatrix, ScenePtr->GetRoot());
	FlushWorkItems();
	#endif
	DoColorClear();

	if (PostBeforeTranslucent && PostDrawCallback)
	{
		PostDrawCallback(CurrentFrame, CameraDirection, CameraUp, CameraMatrix, ProjectionMatrix, CameraProjectionMatrix);
		FlushWorkItems();
	}

	if (NumOfAdditiveMeshes)
	{
		for (uint32 i = 0; i < NumOfAdditiveMeshes; i++)
		{
			if (BlendSlotRenderCallback)
				BlendSlotRenderCallback(CurrentFrame, CameraPosition, CameraDirection, CameraUp, ((uint8 *)ScenePtr) + sizeof(FScene), AdditiveBlendTransforms[i], AdditiveBlendMeshes[i]);
			else
				RenderMeshAdditive(((uint8 *)ScenePtr) + sizeof(FScene), AdditiveBlendTransforms[i], AdditiveBlendMeshes[i]);
		}
		FlushWorkItems();
	}

	if (!PostBeforeTranslucent && PostDrawCallback)
	{
		PostDrawCallback(CurrentFrame, CameraDirection, CameraUp, CameraMatrix, ProjectionMatrix, CameraProjectionMatrix);
		FlushWorkItems();
	}

	ScenePtr  = nullptr;
	CameraPtr = nullptr;

	return;
}

void FSoftwareRasterizer::RenderMesh(const uint8 *Base, const FMatrix& LocalToWorld, const FMesh* Mesh) const
{
	// Check of bounding sphere is in frustum

	const Scalar MatrixScaleX = FScalar::Abs(LocalToWorld.GetScaleX());
	const Scalar MatrixScaleY = FScalar::Abs(LocalToWorld.GetScaleY());
	const Scalar MatrixScaleZ = FScalar::Abs(LocalToWorld.GetScaleZ());

	const FVector3D BBCenter = LocalToWorld.TransformPosition(FVector3D(Mesh->BoundingSphere.x, Mesh->BoundingSphere.y, Mesh->BoundingSphere.z));
	const Scalar    BBRadius = MAX(MatrixScaleX, MatrixScaleY, MatrixScaleZ) * Mesh->BoundingSphere.w;

	if (!FVector4D::SphereInFrustum(BBCenter, BBRadius, FrustumPlanes))
	{
		#if SW_DEBUG_DRAW_BOUNDS
		DoColorClear();
		DrawDebugSphere(BBCenter, BBRadius, FColor16(255, 0, 0).toColorNoClip());
		#endif
		return;
	}

	#if SW_DEBUG_DRAW_BOUNDS
	DoColorClear();
	DrawDebugSphere(BBCenter, BBRadius, FColor16(255, 255, 255).toColorNoClip());
	#endif

	if (Mesh->MaterialID & MaterialFlagAdd)
	{
		if (NumOfAdditiveMeshes < AdditiveBlendSlots)
		{
			AdditiveBlendTransforms[NumOfAdditiveMeshes] = LocalToWorld;
			AdditiveBlendMeshes    [NumOfAdditiveMeshes] = Mesh;

			NumOfAdditiveMeshes++;
		}
		else
		{
			assert(false); // To many additive meshes for number of slots!
		}

		return;
	}

	#ifdef DEMO_EDITOR
	DebugTrissPass += Mesh->NumFaces/3;
	#endif
	
	// Render mesh

	const uint8  * __restrict VerticesPtr =                  Base + (uint32(Mesh->VerticesOffset) << 3);
	const uint16 * __restrict FacesPtr    = (const uint16 *)(Base + (uint32(Mesh->FacesOffset)    << 3));

	const bool LocalUnlit = UnlitMode || (Mesh->MaterialID & MaterialFlagUnlit) != 0;

	bool ContainsScale = false;

	if (!ContainsScale && FScalar::Abs(MatrixScaleX-FixedOne) > FixedEpsilon)
		ContainsScale = true;

	if (!ContainsScale && FScalar::Abs(MatrixScaleY-FixedOne) > FixedEpsilon)
		ContainsScale = true;

	if (!ContainsScale && FScalar::Abs(MatrixScaleZ-FixedOne) > FixedEpsilon)
		ContainsScale = true;

	FMatrix TotalMatrix = LocalToWorld * CameraProjectionMatrix;

	bool PointLightEnabledLocally = !LocalUnlit && PointLightEnabled;

	if (PointLightEnabledLocally)
	{
		// Light clip

		Scalar Dist = (BBCenter - PointLightPosition).Length();
		if (Dist > BBRadius + PointLightRange)
			PointLightEnabledLocally = false;
	}

	DBGCLRP();

	switch (Mesh->VertexFormat)
	{
		case FMesh::EVertexFormat::Simple:
			{
				const FMesh::FVertex *Vtxs = (const FMesh::FVertex *)VerticesPtr;

				DoColorClear();

				void * __restrict InSpacePtr = RasterizerGetInSpace();

				if (Wireframe || LocalUnlit || Mesh->MaterialColor == 0)
				{
					FVertexXYXW * __restrict Input = (FVertexXYXW *)InSpacePtr;

					for (uint32 i = 0; i < Mesh->NumFaces; i+=3)
					{
						#ifdef PI_PICO_TARGET
						#pragma GCC unroll 3
						#endif
						for (uint32 k = 0; k < 3; k++)
						{
							Input[k].v = TotalMatrix.TransformPositionFull(Vtxs[FacesPtr[i+k]].Pos);
						}

						if (Wireframe)
							RasterizeTriangle3DWireframeConstColor(Mesh->MaterialColor);
						else
							RasterizeTriangle3DConstColor(Mesh->MaterialColor);

						DBGF(Mesh, VerticesPtr, FacesPtr, i/3, __VisPixelsRendered); DBGCLRP();
					}
				}
				else
				{
					FVertexXYXWF * __restrict Input = (FVertexXYXWF *)InSpacePtr;

					InvalidateVertexCache();
					FVertexXYXWF * __restrict Cache = (FVertexXYXWF *)VertexCacheSpace;

					PrepareLockup(Mesh->MaterialColor);

					for (uint32 i = 0; i < Mesh->NumFaces; i+=3)
					{
						#ifdef PI_PICO_TARGET
						#pragma GCC unroll 3
						#endif
						for (uint32 k = 0; k < 3; k++)
						{
							const uint16 Index  = FacesPtr[i+k];
							const uint16 CEntry = Index & (VertexCacheSize-1);

							if (VertexCacheInx[CEntry] == Index)
							{ 
								Input[k] = Cache[CEntry];
								continue;
							}

							FMesh::FVertex V = Vtxs[Index];

							Input[k].v  = TotalMatrix.TransformPositionFull(V.Pos);
							FVector3D n = LocalToWorld.TransformNormal(FMesh::DecodeNormal(V.Nrm));

							if (ContainsScale)
								n.Normalize();

							Scalar lf = LightAmbientFactor;

							if (DirectionalLightEnabled)
								lf += FScalar::Max(FixedZero, n.Dot(DirectionalLightVector)) * DirectionalLightIntensity;

							if (PointLightEnabledLocally)
							{
								FVector3D vw = LocalToWorld.TransformPosition(V.Pos);
								Scalar l;

								vw  = PointLightPosition - vw; 
								l   = vw.Length();
								if (l > 0) 
									vw /= l;

								l = FixedOne - (l * PointLightInvRange);
								if (l > 0)
									lf += FScalar::Max(FixedZero, n.Dot(vw)) * l * PointLightIntensity;
							}

							Input[k].f = FScalar::Clamp01(lf) * 31.5f; // Round

							VertexCacheInx[CEntry] = Index;
							Cache[CEntry]          = Input[k];
						}
						
						uint16 fa = TextureCache[uint32(Input[0].f)];
						uint16 fb = TextureCache[uint32(Input[1].f)];
						uint16 fc = TextureCache[uint32(Input[2].f)];

						if (fa == fb && fa == fc)
						{
							FVector4D b = Input[1].v;
							FVector4D c = Input[2].v;

							FVertexXYXW *Flats      = (FVertexXYXW *)InSpacePtr;
										 Flats[1].v = b;
										 Flats[2].v = c;

							RasterizeTriangle3DConstColor(fa);
						}
						else
						{
							RasterizeTriangle3DVertexFactors(TextureCache);
						}

						DBGF(Mesh, VerticesPtr, FacesPtr, i/3, __VisPixelsRendered); DBGCLRP();
					}
				}
			}
		break;

		case FMesh::EVertexFormat::Color:
			{
				const FMesh::FVertexColor *Vtxs = (const FMesh::FVertexColor *)VerticesPtr;

				DoColorClear();

				void * __restrict InSpacePtr = RasterizerGetInSpace();

				if (Wireframe || LocalUnlit)
				{
					FVertexXYXWC * __restrict Input = (FVertexXYXWC *)InSpacePtr;

					for (uint32 i = 0; i < Mesh->NumFaces; i+=3)
					{
						#ifdef PI_PICO_TARGET
						#pragma GCC unroll 3
						#endif
						for (uint32 k = 0; k < 3; k++)
						{
							FMesh::FVertexColor V = Vtxs[FacesPtr[i+k]];

							Input[k].v = TotalMatrix.TransformPositionFull(V.Pos);
							Input[k].c = V.Clr;
						}

						if (LocalUnlit)
							RasterizeTriangle3DVertexColors(false);
						else
							RasterizeTriangle3DWireframeVertexColors();

						DBGF(Mesh, VerticesPtr, FacesPtr, i/3, __VisPixelsRendered); DBGCLRP();
					}
				}
				else
				{
					FVertexXYXWC * __restrict Input = (FVertexXYXWC *)InSpacePtr;

					InvalidateVertexCache();
					FVertexXYXWC * __restrict Cache = (FVertexXYXWC *)VertexCacheSpace;

					for (uint32 i = 0; i < Mesh->NumFaces; i+=3)
					{
						#ifdef PI_PICO_TARGET
						#pragma GCC unroll 3
						#endif
						for (uint32 k = 0; k < 3; k++)
						{
							const uint16 Index  = FacesPtr[i+k];
							const uint16 CEntry = Index & (VertexCacheSize-1);

							if (VertexCacheInx[CEntry] == Index)
							{ 
								Input[k] = Cache[CEntry];
								continue;
							}

							FMesh::FVertexColor V = Vtxs[Index];
	
							Input[k].v  = TotalMatrix.TransformPositionFull(V.Pos);
							Input[k].c  = V.Clr;
							FVector3D n = LocalToWorld.TransformNormal(FMesh::DecodeNormal(V.Nrm));

							if (ContainsScale)
								n.Normalize();

							Scalar lf = LightAmbientFactor;

							if (DirectionalLightEnabled)
								lf += FScalar::Max(FixedZero, n.Dot(DirectionalLightVector)) * DirectionalLightIntensity;

							if (PointLightEnabledLocally)
							{
								FVector3D vw = LocalToWorld.TransformPosition(V.Pos);
								Scalar l;

								vw  = PointLightPosition - vw; 
								l   = vw.Length();
								if (l > 0) 
									vw /= l;

								l = FixedOne - (l * PointLightInvRange);
								if (l > 0)
									lf += FScalar::Max(FixedZero, n.Dot(vw)) * l * PointLightIntensity;
							}

							Input[k].c = Input[k].c.ScaledValueToWhite(ColorFactors[sint32(FScalar::Clamp01(lf) * 31.5f)]);

							VertexCacheInx[CEntry] = Index;
							Cache[CEntry]          = Input[k];
						}

						RasterizeTriangle3DVertexColors(false);

						DBGF(Mesh, VerticesPtr, FacesPtr, i/3, __VisPixelsRendered); DBGCLRP();
					}
				}
			}
		break;

		case FMesh::EVertexFormat::UV:
			{
				const FMesh::FVertexUV *Vtxs = (const FMesh::FVertexUV *)VerticesPtr;

				DoColorClear();

				void * __restrict InSpacePtr = RasterizerGetInSpace();

				PrepareTexture(Mesh->MaterialID & MaterialUIDMask);

				if (Wireframe)
				{
					FVertexXYXW * __restrict Input = (FVertexXYXW *)InSpacePtr;

					for (uint32 i = 0; i < Mesh->NumFaces; i+=3)
					{
						#ifdef PI_PICO_TARGET
						#pragma GCC unroll 3
						#endif
						for (uint32 k = 0; k < 3; k++)
						{
							Input[k].v = TotalMatrix.TransformPositionFull(Vtxs[FacesPtr[i+k]].Pos);
						}
		
						RasterizeTriangle3DWireframeConstColor(Mesh->MaterialColor);

						DBGF(Mesh, VerticesPtr, FacesPtr, i/3, __VisPixelsRendered); DBGCLRP();
					}
				}
				else
				if (LocalUnlit)
				{
					FVertexXYXWUV * __restrict Input = (FVertexXYXWUV *)InSpacePtr;

					for (uint32 i = 0; i < Mesh->NumFaces; i+=3)
					{
						#ifdef PI_PICO_TARGET
						#pragma GCC unroll 3
						#endif
						for (uint32 k = 0; k < 3; k++)
						{
							FMesh::FVertexUV V = Vtxs[FacesPtr[i+k]];
	
							Input[k].v  = TotalMatrix.TransformPositionFull(V.Pos);
							Input[k].uv = FVector2D(Scalar(sint32(V.UV & 0xFF)), Scalar(sint32(V.UV >> 8)));;
						}

						RasterizeTriangle3DTexturedUnlit(TextureCachePtr);

						DBGF(Mesh, VerticesPtr, FacesPtr, i/3, __VisPixelsRendered); DBGCLRP();
					}
				}
				else
				{
					FVertexXYXWUVF * __restrict Input = (FVertexXYXWUVF *)InSpacePtr;

					InvalidateVertexCache();
					FVertexXYXWUVF * __restrict Cache = (FVertexXYXWUVF *)VertexCacheSpace;

					for (uint32 i = 0; i < Mesh->NumFaces; i+=3)
					{
						#ifdef PI_PICO_TARGET
						#pragma GCC unroll 3
						#endif
						for (uint32 k = 0; k < 3; k++)
						{
							const uint16 Index  = FacesPtr[i+k];
							const uint16 CEntry = Index & (VertexCacheSize-1);

							if (VertexCacheInx[CEntry] == Index)
							{ 
								Input[k] = Cache[CEntry];
								continue;
							}

							FMesh::FVertexUV V = Vtxs[Index];
	
							Input[k].v  = TotalMatrix.TransformPositionFull(V.Pos);
							Input[k].uv = FVector2D(Scalar(sint32(V.UV & 0xFF)), Scalar(sint32(V.UV >> 8)));;
							FVector3D n = LocalToWorld.TransformNormal(FMesh::DecodeNormal(V.Nrm));

							if (ContainsScale)
								n.Normalize();

							Scalar lf = LightAmbientFactor;

							if (DirectionalLightEnabled)
								lf += FScalar::Max(FixedZero, n.Dot(DirectionalLightVector)) * DirectionalLightIntensity;

							if (PointLightEnabledLocally)
							{
								FVector3D vw = LocalToWorld.TransformPosition(V.Pos);
								Scalar l;

								vw  = PointLightPosition - vw; 
								l   = vw.Length();
								if (l > 0) 
									vw /= l;

								l = FixedOne - (l * PointLightInvRange);
								if (l > 0)
									lf += FScalar::Max(FixedZero, n.Dot(vw)) * l * PointLightIntensity;
							}
				
							Input[k].f = FScalar::Clamp01(lf);

							VertexCacheInx[CEntry] = Index;
							Cache[CEntry]          = Input[k];
						}

						RasterizeTriangle3DTexturedLit(TextureCachePtr);

						DBGF(Mesh, VerticesPtr, FacesPtr, i/3, __VisPixelsRendered); DBGCLRP();
					}
				}
			}
		break;
	}

	return;
}

void FSoftwareRasterizer::RenderMeshAdditive(const uint8* Base, const FMatrix& LocalToWorld, const FMesh* Mesh) const
{
	// Render mesh

	const uint8  * __restrict VerticesPtr =                  Base + (uint32(Mesh->VerticesOffset) << 3);
	const uint16 * __restrict FacesPtr    = (const uint16 *)(Base + (uint32(Mesh->FacesOffset)    << 3));

	FMatrix TotalMatrix = LocalToWorld * CameraProjectionMatrix;

	#ifdef DEMO_EDITOR
	DebugTrissPass += Mesh->NumFaces/3;
	#endif

	DBGCLRP();

	switch (Mesh->VertexFormat)
	{
		case FMesh::EVertexFormat::Simple:
			{
				const FMesh::FVertex * __restrict Vtxs = (const FMesh::FVertex *)VerticesPtr;

				DoColorClear();

				void * __restrict InSpacePtr = RasterizerGetInSpace();

				FColor16 Color = Mesh->MaterialColor;

				FVertexXYXWC * __restrict Input = (FVertexXYXWC *)InSpacePtr;

				for (uint32 i = 0; i < Mesh->NumFaces; i+=3)
				{
					#ifdef PI_PICO_TARGET
					#pragma GCC unroll 3
					#endif
					for (uint32 k = 0; k < 3; k++)
					{
						Input[k].v = TotalMatrix.TransformPositionFull(Vtxs[FacesPtr[i+k]].Pos);
						Input[k].c = Color;
					}

					RasterizeTriangle3DVertexColors(true);

					DBGF(Mesh, VerticesPtr, FacesPtr, i/3, __VisPixelsRendered); DBGCLRP();
				}
			}
		break;

		case FMesh::EVertexFormat::Color:
			{
				const FMesh::FVertexColor * __restrict Vtxs = (const FMesh::FVertexColor *)VerticesPtr;

				DoColorClear();

				void * __restrict InSpacePtr = RasterizerGetInSpace();

				FVertexXYXWC * __restrict Input = (FVertexXYXWC *)InSpacePtr;

				for (uint32 i = 0; i < Mesh->NumFaces; i+=3)
				{
					#ifdef PI_PICO_TARGET
					#pragma GCC unroll 3
					#endif
					for (uint32 k = 0; k < 3; k++)
					{
						FMesh::FVertexColor V = Vtxs[FacesPtr[i+k]];
						Input[k].v = TotalMatrix.TransformPositionFull(V.Pos);
						Input[k].c = V.Clr;
					}
			
					RasterizeTriangle3DVertexColors(true);

					DBGF(Mesh, VerticesPtr, FacesPtr, i/3, __VisPixelsRendered); DBGCLRP();
				}
			}
		break;

		case FMesh::EVertexFormat::UV:
			{
				const FMesh::FVertexUV * __restrict Vtxs = (const FMesh::FVertexUV *)VerticesPtr;

				DoColorClear();

				void * __restrict InSpacePtr = RasterizerGetInSpace();

				FColor16 Color = Mesh->MaterialColor;

				FVertexXYXWC * __restrict Input = (FVertexXYXWC *)InSpacePtr;

				for (uint32 i = 0; i < Mesh->NumFaces; i+=3)
				{
					#ifdef PI_PICO_TARGET
					#pragma GCC unroll 3
					#endif
					for (uint32 k = 0; k < 3; k++)
					{
						Input[k].v = TotalMatrix.TransformPositionFull(Vtxs[FacesPtr[i+k]].Pos);
						Input[k].c = Color;
					}

					RasterizeTriangle3DVertexColors(true);

					DBGF(Mesh, VerticesPtr, FacesPtr, i/3, __VisPixelsRendered); DBGCLRP();
				}
			}
		break;
	}

	return;
}

#if SW_DEBUG_DRAW_BOUNDS
void FSoftwareRasterizer::DrawDebugSphere(const FVector3D& Center, Scalar Radius, uint16 Color) const
{
	constexpr sint32 NumSegments = 16;
	constexpr Scalar  AngleStep   = FixedTwoPi / Scalar(NumSegments);

	// XY Circle
	for (sint32 i = 0; i < NumSegments; i++)
	{
		Scalar Angle0 = Scalar(i)     * AngleStep;
		Scalar Angle1 = Scalar(i + 1) * AngleStep;

		Scalar Angle0Sin = FScalar::Sin(Angle0);
		Scalar Angle0Cos = FScalar::Cos(Angle0);
		Scalar Angle1Sin = FScalar::Sin(Angle1);
		Scalar Angle1Cos = FScalar::Cos(Angle1);

		// XZ Circle
		{
			FVector3D p0 = Center + FVector3D(Angle0Cos, Angle0Sin, FixedZero) * Radius;
			FVector3D p1 = Center + FVector3D(Angle1Cos, Angle1Sin, FixedZero) * Radius;

			FVector4D v0p = CameraProjectionMatrix.TransformPositionFull(p0);
			FVector4D v1p = CameraProjectionMatrix.TransformPositionFull(p1);

			RasterizeLine3D(v0p, v1p, Color, true);
		}

		// XZ Circle
		{
			FVector3D p0 = Center + FVector3D(Angle0Cos, FixedZero, Angle0Sin) * Radius;
			FVector3D p1 = Center + FVector3D(Angle1Cos, FixedZero, Angle1Sin) * Radius;

			FVector4D v0p = CameraProjectionMatrix.TransformPositionFull(p0);
			FVector4D v1p = CameraProjectionMatrix.TransformPositionFull(p1);

			RasterizeLine3D(v0p, v1p, Color, true);
		}

		// YZ Circle
		{
			FVector3D p0 = Center + FVector3D(FixedZero, Angle0Cos, Angle0Sin) * Radius;
			FVector3D p1 = Center + FVector3D(FixedZero, Angle1Cos, Angle1Sin) * Radius;

			FVector4D v0p = CameraProjectionMatrix.TransformPositionFull(p0);
			FVector4D v1p = CameraProjectionMatrix.TransformPositionFull(p1);

			RasterizeLine3D(v0p, v1p, Color, true);
		}
	}

	return;
}
#endif
