#![allow(dead_code)]
/// See [7.9.2 String Object Types](https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#page=93)
///
/// This function decodes bytes of a Text String Type, specified in 7.9.2.2.
pub fn decode_text_string(bytes: &[u8]) -> String {
    if bytes.starts_with(&[254, 255]) {
        let utf16_bytes = &bytes[2..];
        // TODO: Handle odd lengths
        let u16_iterator = (0..(utf16_bytes.len() / 2))
            .map(|i| u16::from_be_bytes([utf16_bytes[2 * i], utf16_bytes[2 * i + 1]]));
        std::char::decode_utf16(u16_iterator)
            .map(|r| r.unwrap_or(std::char::REPLACEMENT_CHARACTER))
            .collect::<String>()
    } else if bytes.starts_with(&[239, 187, 191]) {
        // UTF-8, PDF 2.0 only.
        // TODO: Only parse as UTF-8 if reading a PDF 2.0 file?
        String::from_utf8_lossy(&bytes[3..]).into()
    } else {
        bytes
            .iter()
            .map(|&byte| match byte {
                // 0..=23 is undefined.
                24 => '\u{02D8}',
                25 => '\u{02C7}',
                26 => '\u{02C6}',
                27 => '\u{02D9}',
                28 => '\u{02DD}',
                29 => '\u{02DB}',
                30 => '\u{02DA}',
                31 => '\u{02DC}',
                32..=126 | 161..=172 | 174..=255 => char::from(byte),
                // 127 is undefined.
                128 => '\u{2022}',
                129 => '\u{2020}',
                130 => '\u{2021}',
                131 => '\u{2026}',
                132 => '\u{2014}',
                133 => '\u{2013}',
                134 => '\u{0192}',
                135 => '\u{2044}',
                136 => '\u{2039}',
                137 => '\u{203A}',
                138 => '\u{2212}',
                139 => '\u{2030}',
                140 => '\u{201E}',
                141 => '\u{201C}',
                142 => '\u{201D}',
                143 => '\u{2018}',
                144 => '\u{2019}',
                145 => '\u{201A}',
                146 => '\u{2122}',
                147 => '\u{FB01}',
                148 => '\u{FB02}',
                149 => '\u{0141}',
                150 => '\u{0152}',
                151 => '\u{0160}',
                152 => '\u{0178}',
                153 => '\u{017D}',
                154 => '\u{0131}',
                155 => '\u{0142}',
                156 => '\u{0153}',
                157 => '\u{0161}',
                158 => '\u{017E}',
                // 159 is undefined.
                160 => '\u{20AC}',
                // 161..172 is covered above.
                // 173 is undefined.
                // 174..=255 is covered above.
                _ => std::char::REPLACEMENT_CHARACTER,
            })
            .collect()
    }
}

#[test]
fn test_decode_text_string() {
    // PDFDocEncoding
    assert_eq!(decode_text_string(&[0]), "\u{FFFD}");
    assert_eq!(decode_text_string(&[23]), "\u{FFFD}");
    assert_eq!(decode_text_string(&[24]), "\u{02D8}");
    assert_eq!(decode_text_string(&[b' ']), " ");
    assert_eq!(decode_text_string(&[b'a']), "a");
    assert_eq!(decode_text_string(&[b'z']), "z");
    assert_eq!(decode_text_string(&[b'A']), "A");
    assert_eq!(decode_text_string(&[b'Z']), "Z");
    assert_eq!(decode_text_string(&[126]), "~");
    assert_eq!(decode_text_string(&[139]), "‰");
    assert_eq!(decode_text_string(&[160]), "€");
    assert_eq!(decode_text_string(&[255]), "ÿ");

    // UTF-16BE (TODO: Test error handling).
    assert_eq!(
        decode_text_string(&[0xFE, 0xFF, 0x04, 0x42, 0x04, 0x35, 0x04, 0x41, 0x04, 0x42]),
        "тест"
    );
    assert_eq!(
        decode_text_string(&[0xFE, 0xFF, 0xd8, 0x01, 0xdc, 0x00]),
        "\u{10400}"
    );

    // UTF-8.
    assert_eq!(
        decode_text_string(&[239, 187, 191, 0xe4, 0xb8, 0xb2]),
        "\u{4E32}"
    );
    assert_eq!(
        decode_text_string(&[239, 187, 191, 0xF0, 0x90, 0x90, 0x80]),
        "\u{10400}"
    );
}
