/* SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only */
/* Copyright (c) 2022-2025 Brett Sheffield <bacs@librecast.net> */

#include "cpu.h"

#ifdef USE_SIMD_x86
static inline uint64_t xgetbv (int ctr)
{
	uint32_t a, d;
	__asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : );
	return a | (((uint64_t)d) << 32);
}

static inline void cpuid(uint32_t e_x[4])
{
	__asm__ __volatile__("cpuid" : "+a"(e_x[0]), "+b"(e_x[1]), "+c"(e_x[2]), "=d"(e_x[3]) : );
}

#define eax e_x[0]
#define ebx e_x[1]
#define ecx e_x[2]
#define edx e_x[3]
int cpu_instruction_set(void) {
	uint32_t e_x[4] = {0}; /* registers eax, ebx, ecx, edx */
	static int isets = -1;
	if (isets >= 0) return isets;
	isets = 0;
	cpuid(e_x);
	if (!eax) return isets;
	eax = 1, cpuid(e_x);

	/* check edx register */
	if (!(edx & (1 << 0))) return isets;
	/* floating point */
	if (!(edx & (1 << 23))) return isets;
	/* MMX */
	isets |= MMX;
	if (!(edx & (1 << 15))) return isets;
	/* conditional move */
	if (!(edx & (1 << 24))) return isets;
	/* FXSAVE */
	if (!(edx & (1 << 25))) return isets;
	/* SSE */
	isets |= SSE;
	if (!(edx & (1 << 26))) return isets;
	/* SSE2 */
	isets |= SSE2;

	/* check ecx register */
	if (!(ecx & (1 << 0))) return isets;
	/* SSE3 */
	isets |= SSE3;
	if (!(ecx & (1 << 9))) return isets;
	/* SSSE3 */
	isets |= SSSE3;
	if (!(ecx & (1 << 19))) return isets;
	/* SSE4.1 */
	isets |= SSE4_1;
	if (!(ecx & (1 << 23))) return isets;
	/* POPCNT */
	isets |= POPCNT;
	if (!(ecx & (1 << 20))) return isets;
	/* SSE4.2 */
	isets |= SSE4_2;
	if (!(ecx & (1 << 27))) return isets;
	/* OSXSAVE */
	if ((xgetbv(0) & 6) != 6) return isets;
	if (!(ecx & (1 << 28))) return isets;
	/* AVX */
	isets |= AVX;

	/* check ebx register */
	eax = 7, ecx = 0, cpuid(e_x);
	if (!(ebx & (1 << 5))) return isets;
	/* AVX2 */
	isets |= AVX2;

	/* AVX512 */

	/* If the O/S has enabled AVX512 support, bits 5,6,7 of XCR0 are set */
	const uint64_t avx512_bits = 7 << 5;
	uint64_t xcr0 = xgetbv(0);
	if (!((xcr0 & avx512_bits) == avx512_bits)) return isets;

	if ((ebx & (1 << 16)) == 0) return isets;
	eax = 0xd, ecx = 0;
	cpuid(e_x); /* get feature flags */
	if ((eax & 0x60) != 0x60) return isets;
	isets |= AVX512F;
	eax = 0x7, ecx = 0;
	cpuid(e_x); /* get feature flags */
	if ((ebx & (1 << 31)) == 0) return isets;
	isets |= AVX512VL;
	if ((ebx & 0x40020000) != 0x40020000) return isets;
	isets |= (AVX512BW | AVX512DQ);

	return isets;
}
#else
int cpu_instruction_set(void) { return 0; }
#endif /* __x86_64__ */
