// (c) MX^Add
#pragma once

#include "BaseTypes/BaseTypes.h"

#ifdef PI_PICO_TARGET
#include <cmsis_gcc.h> // SIMD
#else
#define WIN32_LEAN_AND_MEAN
#define NOGDI
#define NOMINMAX
#include <windows.h>
//
// Specialmacros
//
#define __time_critical_func(x) x
#define __not_in_flash_func(x) x

//
// Standard intrinsics
//
__forceinline bool __sync_bool_compare_and_swap(uint32 *ptr, uint32 oldval, uint32 newval)
{
	uint32 original = InterlockedCompareExchange(ptr, newval, oldval);
	return (original == oldval);
}

//
// Emulation of PICO SIMD (only those used in code)
//
__forceinline uint32 __REV16(uint32 v)
{
	return ((v & 0x00FF00FF) << 8) | ((v & 0xFF00FF00) >> 8);
}

__forceinline uint32 __SADD16(uint32 v0, uint32 v1)
{
	sint16 v0lo = sint16(v0 & 0xFFFF);
	sint16 v0hi = sint16((v0 >> 16) & 0xFFFF);

	sint16 v1lo = sint16(v1 & 0xFFFF);
	sint16 v1hi = sint16((v1 >> 16) & 0xFFFF);

	return (sint32(uint16(v0hi + v1hi)) << 16) | (uint16_t(v0lo + v1lo));
}

__forceinline uint32 __SSUB16(uint32 v0, uint32 v1)
{
	sint16 v0lo = sint16(v0 & 0xFFFF);
	sint16 v0hi = sint16((v0 >> 16) & 0xFFFF);

	sint16 v1lo = sint16(v1 & 0xFFFF);
	sint16 v1hi = sint16((v1 >> 16) & 0xFFFF);

	return (sint32(uint16(v0hi - v1hi)) << 16) | (uint16_t(v0lo - v1lo));
}

__forceinline uint32 __SMLAD(uint32 v0, uint32 v1, uint32 v3)
{
	sint16 v0lo = sint16(v0 & 0xFFFF);
	sint16 v0hi = sint16((v0 >> 16) & 0xFFFF);

	sint16 v1lo = sint16(v1 & 0xFFFF);
	sint16 v1hi = sint16((v1 >> 16) & 0xFFFF);

	return sint32(v0lo) * sint32(v1lo) + sint32(v0hi) * sint32(v1hi) + v3;
}

__forceinline uint32 __UHADD16(uint32 v0, uint32 v1)
{
	uint32 va = (((v0 & 0xFFFF) + (v1 & 0xFFFF)) >> 1);
	uint32 vb = (((v0 >> 16) + (v1 >> 16)) >> 1);
	return va | (vb << 16);
}
#endif