use core::mem::MaybeUninit;
use core::ptr;

use simd_abstraction::SIMD256;

use crate::polyfill::SIMD256Ext;
use crate::{Error, ERROR};

macro_rules! specialize_for {
    ($feature:literal, $ty: ty) => {
        #[inline]
        #[target_feature(enable = $feature)]
        pub unsafe fn parse(s: &[u8]) -> Result<[u8; 16], $crate::Error> {
            let token = <$ty as simd_abstraction::InstructionSet>::new_unchecked();
            crate::generic::parse(token, s)
        }

        #[inline]
        #[target_feature(enable = $feature)]
        pub unsafe fn parse_simple(s: &[u8]) -> Result<[u8; 16], $crate::Error> {
            let token = <$ty as simd_abstraction::InstructionSet>::new_unchecked();
            crate::generic::parse_simple(token, s)
        }

        #[inline]
        #[target_feature(enable = $feature)]
        pub unsafe fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], $crate::Error> {
            let token = <$ty as simd_abstraction::InstructionSet>::new_unchecked();
            crate::generic::parse_hyphenated(token, s)
        }

        #[inline]
        #[target_feature(enable = $feature)]
        pub unsafe fn format_simple(src: &[u8; 16], upper: bool) -> [u8; 32] {
            let token = <$ty as simd_abstraction::InstructionSet>::new_unchecked();
            crate::generic::format_simple(token, src, upper)
        }

        #[inline]
        #[target_feature(enable = $feature)]
        pub unsafe fn format_hyphenated(src: &[u8; 16], upper: bool) -> [u8; 36] {
            let token = <$ty as simd_abstraction::InstructionSet>::new_unchecked();
            crate::generic::format_hyphenated(token, src, upper)
        }
    };
}

#[repr(C, align(32))]
struct Bytes32(pub [u8; 32]);

impl Bytes32 {
    fn load<S: SIMD256>(s: S, this: &Self) -> S::V256 {
        unsafe { s.v256_load(this.0.as_ptr()) }
    }
}

#[inline]
pub(crate) fn parse<S: SIMD256Ext>(s: S, mut input: &[u8]) -> Result<[u8; 16], Error> {
    #[inline]
    fn judge_other(input: &[u8]) -> Result<&[u8], Error> {
        match input.len() {
            // Microsoft GUID
            38 => {
                if input[0] == b'{' && input[37] == b'}' {
                    Ok(&input[1..37])
                } else {
                    Err(ERROR)
                }
            }
            // URN prefixed UUID
            45 => match input.strip_prefix(b"urn:uuid:") {
                Some(s) => Ok(s),
                None => Err(ERROR),
            },
            _ => Err(ERROR),
        }
    }

    let n = input.len();
    if n == 32 {
        return parse_simple(s, input);
    }
    if n != 36 {
        input = judge_other(input)?;
    }
    parse_hyphenated(s, input)
}

#[inline]
pub(crate) fn parse_simple<S: SIMD256>(s: S, input: &[u8]) -> Result<[u8; 16], Error> {
    if input.len() != 32 {
        return Err(ERROR);
    }
    let a = unsafe { s.v256_loadu(input.as_ptr()) };
    hex_decode(s, a)
}

#[inline]
pub(crate) fn parse_hyphenated<S: SIMD256Ext>(s: S, input: &[u8]) -> Result<[u8; 16], Error> {
    if input.len() != 36 {
        return Err(ERROR);
    }

    let x = unsafe {
        [
            *input.get_unchecked(8),
            *input.get_unchecked(13),
            *input.get_unchecked(18),
            *input.get_unchecked(23),
        ]
    };
    if !matches!(x, [b'-', b'-', b'-', b'-']) {
        return Err(ERROR);
    }

    let shuffle = Bytes32::load(
        s,
        &Bytes32([
            0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, //
            0x09, 0x0a, 0x0b, 0x0c, 0x0e, 0x0f, 0x80, 0x80, //
            0x03, 0x04, 0x05, 0x06, 0x08, 0x09, 0x0a, 0x0b, //
            0x0c, 0x0d, 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, //
        ]),
    );

    let base: *const u8 = input.as_ptr();
    unsafe {
        let a0 = s.v256_loadu(base.cast());
        let a1 = s.i8x32_shuffle(a0, shuffle);
        let a2 = s.i16x16_write_index7(a1, base.add(16).cast::<i16>().read_unaligned());
        let a3 = s.i32x8_write_index7(a2, base.add(32).cast::<i32>().read_unaligned());
        hex_decode(s, a3)
    }
}

#[inline]
pub(crate) fn format_simple<S: SIMD256>(s: S, src: &[u8; 16], upper: bool) -> [u8; 32] {
    unsafe {
        let a = s.v128_loadu(src.as_ptr().cast());
        let ans = hex_encode(s, a, upper);
        s.v256_to_bytes(ans)
    }
}

#[inline]
pub(crate) fn format_hyphenated<S: SIMD256>(s: S, src: &[u8; 16], upper: bool) -> [u8; 36] {
    let a = {
        let a = unsafe { s.v128_loadu(src.as_ptr().cast()) };
        hex_encode(s, a, upper)
    };

    const SHUFFLE: &Bytes32 = &Bytes32([
        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, //
        0x80, 0x08, 0x09, 0x0a, 0x0b, 0x80, 0x0c, 0x0d, //
        0x80, 0x80, 0x80, 0x00, 0x01, 0x02, 0x03, 0x80, //
        0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, //
    ]);

    const DASH: &Bytes32 = &Bytes32([
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
        0x2d, 0x00, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, //
        0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x00, 0x2d, //
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
    ]);

    let shuffle = Bytes32::load(s, SHUFFLE);
    let dash = Bytes32::load(s, DASH);

    let mut buf: MaybeUninit<[u8; 36]> = MaybeUninit::uninit();
    let dst = buf.as_mut_ptr().cast::<u8>();

    let ans = s.v256_or(s.i8x32_shuffle(a, shuffle), dash);
    unsafe { s.v256_storeu(dst, ans) };

    let bytes_14_15 = s.i16x8_extract::<7>(s.v128_from_low_v256(a)) as u16;
    let bytes_28_31 = s.i32x4_extract::<3>(s.v128_from_high_v256(a)) as u32;
    unsafe {
        ptr::write_unaligned(dst.add(16).cast(), bytes_14_15);
        ptr::write_unaligned(dst.add(32).cast(), bytes_28_31);
        buf.assume_init()
    }
}

#[inline]
fn hex_check<S: SIMD256>(s: S, hi: S::V256, lo: S::V256) -> bool {
    let hi_lut = Bytes32::load(
        s,
        &Bytes32([
            0x00, 0x00, 0x00, 0x0f, 0xf0, 0x00, 0xf0, 0x00, //
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
            0x00, 0x00, 0x00, 0x0f, 0xf0, 0x00, 0xf0, 0x00, //
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
        ]),
    );

    let lo_lut = Bytes32::load(
        s,
        &Bytes32([
            0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0f, //
            0x0f, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
            0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0f, //
            0x0f, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
        ]),
    );

    let hi_check = s.i8x32_shuffle(hi_lut, hi);
    let lo_check = s.i8x32_shuffle(lo_lut, lo);
    let check = s.v256_and(hi_check, lo_check);

    s.i8x32_all_non_zero(check)
}

#[inline]
fn hex_decode<S: SIMD256>(s: S, a: S::V256) -> Result<[u8; 16], Error> {
    let hi = s.i16x16_srl::<4>(s.v256_and(a, s.u8x32_splat(0xf0)));
    let lo = s.v256_and(a, s.u8x32_splat(0x0f));

    if !hex_check(s, hi, lo) {
        return Err(ERROR);
    }

    let offset_lut = Bytes32::load(
        s,
        &Bytes32([
            0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x09, 0x00, //
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
            0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x09, 0x00, //
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
        ]),
    );

    let shuffle = Bytes32::load(
        s,
        &Bytes32([
            0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, //
            0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, //
            0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, //
            0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, //
        ]),
    );

    let offset = s.i8x32_shuffle(offset_lut, hi);

    let a1 = s.i8x32_add(lo, offset);
    let a2 = s.i16x16_sll::<4>(a1);
    let a3 = s.i16x16_srl::<12>(a2);
    let a4 = s.v256_or(a2, a3);
    let a5 = s.i8x32_shuffle(a4, shuffle);
    let a6 = s.u64x2_from_low_u128x2(a5);

    Ok(s.v128_to_bytes(a6))
}

#[inline]
fn hex_encode<S: SIMD256>(s: S, a: S::V128, upper: bool) -> S::V256 {
    const UPPER: &Bytes32 = &Bytes32([
        b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', //
        b'8', b'9', b'A', b'B', b'C', b'D', b'E', b'F', //
        b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', //
        b'8', b'9', b'A', b'B', b'C', b'D', b'E', b'F', //
    ]);

    const LOWER: &Bytes32 = &Bytes32([
        b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', //
        b'8', b'9', b'a', b'b', b'c', b'd', b'e', b'f', //
        b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', //
        b'8', b'9', b'a', b'b', b'c', b'd', b'e', b'f', //
    ]);

    let char_lut = Bytes32::load(s, if upper { UPPER } else { LOWER });
    let a0 = s.i16x16_from_u8x16(a);
    let a1 = s.i16x16_sll::<8>(a0);
    let a2 = s.i16x16_srl::<4>(a0);
    let a3 = s.v256_and(s.v256_or(a1, a2), s.u8x32_splat(0x0f));
    s.i8x32_shuffle(char_lut, a3)
}
