mod interactive_forms;
mod parse;
mod text_string;

use crate::parse::PdfReaderPosition;
use parse::PdfParser;
use std::collections::HashMap;

/// A PDF version.
///
/// Specified at the first line of PDF files in the format "%PDF-x.y". To support incremental
/// updates (new content being appended to the end of file instead of modifying previous bytes),
/// a Version entry in the document's catalog dictionary takes precedence if present.
#[derive(Eq, PartialEq, Debug)]
pub enum PdfVersion {
    /// Version 1.0.
    Version10,
    /// Version 1.1.
    Version11,
    /// Version 1.2.
    Version12,
    /// Version 1.3.
    Version13,
    /// Version 1.4.
    Version14,
    /// Version 1.5.
    Version15,
    /// Version 1.6.
    Version16,
    /// Version 1.7.
    Version17,
    /// Version 2.0.
    Version20,
}

/// A document in the Portable Document Format (PDF) format.
pub struct PdfDocument {
    pub version: PdfVersion,
    pub catalog: PdfCatalog,
}

impl<'a> PdfDocument {
    pub fn parse(bytes: &[u8]) -> Result<Self, String> {
        let document_data = PdfDocumentData::parse(bytes)?;

        let interactive_form = interactive_forms::extract_interactive_forms(&document_data);

        let catalog = PdfCatalog { interactive_form };
        Ok(Self {
            version: document_data.version,
            catalog,
        })
    }
}

/// A document in the Portable Document Format (PDF) format.
///
/// This is the raw data - see [`PdfDocument`] for typed data.
///
/// To read an existing file, use [`PdfDocumentData::parse`].
pub struct PdfDocumentData<'a> {
    pub version: PdfVersion,
    pub objects: HashMap<PdfObjectIdentifier, PdfObject<'a>>,
    pub trailer: PdfUntypedDictionary<'a>,
}

/// A name object specifying how the document shall be displayed when opened.
///
/// Specified in [7.7.2 Document catalog dictionary](https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#page=82).
pub enum PdfPageMode {
    /// Neither document outline nor thumbnail images visible.
    UseNone,
    /// Document outline visible.
    UseOutlines,
    /// Thumbnail images visible.
    UseThumbs,
    /// Full-screen mode, with no menu bar, window controls, or any other window visible.
    FullScreen,
    /// Optional content group panel visible.
    UseOC,
    /// Attachments panel visible.
    UseAttachments,
}

impl<'a> PdfDocumentData<'a> {
    pub fn parse(bytes: &'a [u8]) -> Result<PdfDocumentData<'a>, String> {
        if !bytes.starts_with(b"%PDF-") {
            return Err("File not starting with '%PDF-'".to_string());
        }

        let parser = PdfParser::new(bytes);
        let mut position = PdfReaderPosition::new();
        let comment = parser.read_line(&mut position);
        if comment.len() < 8 || !(comment[5] == b'1' || comment[5] == b'2') {
            return Err("Invalid first line".to_string());
        }

        let pdf_version = match (comment[5], comment[7]) {
            (b'1', b'0') => PdfVersion::Version10,
            (b'1', b'1') => PdfVersion::Version11,
            (b'1', b'2') => PdfVersion::Version12,
            (b'1', b'3') => PdfVersion::Version13,
            (b'1', b'4') => PdfVersion::Version14,
            (b'1', b'5') => PdfVersion::Version15,
            (b'1', b'6') => PdfVersion::Version16,
            (b'1', b'7') => PdfVersion::Version17,
            (b'2', b'0') => PdfVersion::Version20,
            _ => {
                return Err("Invalid first line".to_string());
            }
        };

        let mut objects = HashMap::new();
        while let Some((identifier, object)) = parser.next_indirect_object(&mut position) {
            objects.insert(identifier, object);
        }

        let trailer = loop {
            if let Some(next_word) = parser.next_word(&mut position) {
                if next_word == b"trailer" {
                    if let Some(PdfObject::Dictionary(trailer)) = parser.next_object(&mut position)
                    {
                        break trailer;
                    }
                } else {
                    continue;
                }
            }
            return Err("No trailer dictionary".to_string());
        };

        Ok(PdfDocumentData {
            version: pdf_version,
            objects,
            trailer,
        })
    }

    fn lookup(&self, object: &'a PdfObject) -> &'a PdfObject {
        if let PdfObject::Reference(identifier) = object {
            // Section "7.3.10 Indirect Objects" says the indirect reference to an undefined object
            // should not be treated as an error and instead by handled as a reference to the null
            // object.
            return self.objects.get(identifier).unwrap_or(&PdfObject::Null);
        }
        object
    }
}

/// From section 7.3.1 of the PDF specification:
/// "PDF includes eight basic types of objects: Boolean values, Integer and Real numbers, Strings,
/// Names, Arrays, Dictionaries, Streams, and the null object"
#[derive(PartialEq)]
pub enum PdfObject<'a> {
    /// Section 7.3.2: "Boolean objects represent the logical values of true and false. They appear
    /// in PDF files using the keywords true and false."
    Boolean(bool),
    /// Section 7.3.3: "Integer objects represent mathematical integers. Real objects represent
    /// mathematical real numbers. The range and precision of numbers may be limited by the internal
    /// representations used in the computer on which the conforming reader is running; Annex C
    /// gives these limits for typical implementations."
    ///
    /// Section C.2 specifies this as 32-bit signed integer.
    Integer(i32),
    /// Section 7.3.3: "Real objects represent mathematical real numbers. The range and precision of
    /// numbers may be limited by the internal representations used in the computer on which the
    /// conforming reader is running; Annex C gives these limits for typical implementations."
    ///
    /// Section C.2 specifies this as 32-bit floating point number.
    Real(f32),
    /// Section 7.3.3: "A string object shall consist of a series of zero or more bytes. String
    /// objects are not integer objects, but are stored in a more compact format. The length of a
    /// string may be subject to implementation limits; see Annex C."
    ///
    /// Annex C specifies the maximum string length as 32767.
    String(PdfString<'a>),
    /// Section 7.3.5: "[..] a name object is an atomic symbol uniquely defined by a sequence of any
    /// characters (8-bit values) except null (character code 0). Uniquely defined means that any
    /// two name objects made up of the same sequence of characters denote the same object. Atomic
    /// means that a name has no internal structure; although it is defined by a sequence of
    /// characters, those characters are not considered elements of the name."
    Name(&'a [u8]),
    /// Section 7.3.6: "An array object is a one-dimensional collection of objects arranged
    /// sequentially. Unlike arrays in many other computer languages, PDF arrays may be
    /// heterogeneous; that is, an array’s elements may be any combination of numbers, strings,
    /// dictionaries, or any other objects, including other arrays. An array may have zero elements."
    Array(Vec<PdfObject<'a>>),

    Dictionary(PdfUntypedDictionary<'a>),
    /// Section 7.3.8: "A stream object, like a string object, is a sequence of bytes. Furthermore,
    /// a stream may be of unlimited length, whereas a string shall be subject to an implementation
    /// limit. For this reason, objects with potentially large amounts of data, such as images and
    /// page descriptions, shall be represented as streams."
    Stream(PdfStreamObject<'a>),
    /// A reference to an indirect object, represented as "<obj> <gen> R".
    Reference(PdfObjectIdentifier),
    Null,
}

impl std::fmt::Debug for PdfObject<'_> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "PdfObject")?;
        match self {
            PdfObject::Boolean(value) => write!(f, "({})", value),
            PdfObject::Integer(value) => write!(f, "({})", value),
            PdfObject::Real(value) => write!(f, "({})", value),
            PdfObject::Array(value) => write!(f, "({:?})", value),
            PdfObject::Dictionary(value) => write!(f, "({:?})", value),
            PdfObject::String(value) => write!(
                f,
                "(\"{}\")",
                std::str::from_utf8(value.data).unwrap_or("**Invalid utf-8**")
            ),
            PdfObject::Name(value) => write!(
                f,
                "(/{})",
                std::str::from_utf8(value).unwrap_or("**Invalid utf-8**")
            ),
            PdfObject::Reference(value) => {
                write!(f, "({} {} R)", value.object_number, value.generation_number)
            }
            _ => write!(f, "TODO"),
        }
    }
}

#[derive(Debug, PartialEq)]
pub struct PdfUntypedDictionary<'a> {
    pub map: HashMap<&'a [u8], PdfObject<'a>>,
}

impl<'a> PdfUntypedDictionary<'a> {
    #[must_use]
    pub fn lookup(&self, name_key: &[u8], pdf: &'a PdfDocumentData) -> Option<&'a PdfObject> {
        if let Some(object_entry) = self.map.get(name_key) {
            if let PdfObject::Reference(identifier) = object_entry {
                return pdf.objects.get(identifier);
            }
            return Some(object_entry);
        }
        None
    }
}

#[derive(Debug, PartialEq)]
pub struct PdfStreamObject<'a> {
    pub dictionary: PdfUntypedDictionary<'a>,
    pub bytes: &'a [u8],
}

impl<'a> PdfUntypedDictionary<'a> {
    #[must_use]
    pub const fn new(map: HashMap<&'a [u8], PdfObject<'a>>) -> Self {
        Self { map }
    }
}

/// A [`PdfObject`] may be labelled as an "indirect object", in which case it is identified by this
/// object identifier.
///
/// Use the [`PdfDocument::objects`] map to lookup the object referenced by an identifier.
///
/// TODO: Is u16 a good type here? Why?
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
pub struct PdfObjectIdentifier {
    /// 7.3.10 Indirect Objects: "A positive integer object number. Indirect objects may be numbered
    /// sequentially within a PDF file, but this is not required; object numbers may be assigned in
    /// any arbitrary order."
    ///
    /// In Annex C Implementation Limits, table C.1, states that 8388607 is the maximum number of
    /// indirect objects in a PDF file.
    pub object_number: u16,
    /// 7.3.10 Indirect Objects: "A non-negative integer generation number. In a newly created file,
    /// all indirect objects shall have generation numbers of 0. Nonzero generation numbers may be
    /// introduced when the file is later updated; see sub- clauses 7.5.4, "Cross-Reference Table"
    /// and 7.5.6, "Incremental Updates."
    pub generation_number: u16,
}

impl PdfObjectIdentifier {
    /// Construct a new object identifier from an object and and a generation number.
    const fn new(object_number: u16, generation_number: u16) -> Self {
        Self {
            object_number,
            generation_number,
        }
    }
}

#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum PdfStringType {
    Literal,
    Hexadecimal,
}

#[derive(Debug, Eq, PartialEq)]
pub struct PdfString<'a> {
    // TODO: hide this, as this is raw bytes and needs to be decoded? or rename to something like
    // TODO: raw_bytes/unencoded_bytes?
    pub data: &'a [u8],
    string_type: PdfStringType,
}

impl<'a> PdfString<'a> {
    #[must_use]
    pub const fn new_literal(data: &'a [u8]) -> Self {
        PdfString {
            data,
            string_type: PdfStringType::Literal,
        }
    }

    #[must_use]
    pub const fn new_hexadecimal(data: &'a [u8]) -> Self {
        PdfString {
            data,
            string_type: PdfStringType::Hexadecimal,
        }
    }

    #[must_use]
    pub const fn decode_text_string(&self) -> &[u8] {
        self.data
    }

    #[must_use]
    pub fn decode_byte_string(&self) -> Vec<u8> {
        match self.string_type {
            PdfStringType::Literal => Vec::from(self.data),
            PdfStringType::Hexadecimal => {
                self.data
                    .iter()
                    .filter(|&&b| !PdfParser::is_whitespace(b))
                    // TODO: Be more efficient
                    .copied()
                    .collect::<Vec<u8>>()
                    .chunks(2)
                    .map(|nibbles| {
                        const fn ascii_nibble_value(ascii_nibble: u8) -> u8 {
                            match ascii_nibble {
                                b'0'..=b'9' => ascii_nibble - b'0',
                                b'a'..=b'f' => ascii_nibble - b'a' + 0xA,
                                b'A'..=b'F' => ascii_nibble - b'A' + 0xA,
                                _ => 0,
                            }
                        }

                        let high_nibble_ascii = nibbles[0];
                        // 7.3.4.3 Hexadecimal strings (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#page=24):
                        // "If the final digit of a hexadecimal string is missing — that is, if there is
                        // an odd number of digits — the final digit shall be assumed to be 0."
                        let low_nibble_ascii = *nibbles.get(1).unwrap_or(&b'0');

                        let high_nibble = ascii_nibble_value(high_nibble_ascii);
                        let low_nibble = ascii_nibble_value(low_nibble_ascii);

                        (high_nibble << 4) | low_nibble
                    })
                    .collect()
            }
        }
    }
}

/// Information about interactive form functionality in a PDF document.
///
/// Located at [`PdfCatalog::interactive_form`] in a PDF document catalog.
///
/// Specified in [12.7.2 Interactive Form Dictionary](https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#page=439).
pub struct PdfInteractiveForm {
    pub fields: Vec<PdfFormField>,
}

/// The various possible field types of a [`PdfFormField`].
///
/// Specified in [12.7.3 Field Dictionaries](https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#page=440).
#[derive(Debug, Eq, PartialEq)]
pub enum PdfFormField {
    Button,
    Text,
    Choice,
    Signature(PdfSignatureField),
}

#[derive(Debug, Eq, PartialEq)]
pub struct PdfSignatureField {
    pub signature: Option<PdfSignature>,
}

/// A PDF signature.
///
/// Can be obtained through [`PdfSignatureField::signature`].
///
/// Specified by [Table 252 - Entries in a signature dictionary](https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#page=475).
#[derive(Debug, Eq, PartialEq)]
pub struct PdfSignature {
    /// The signature value. When ByteRange is present, the value shall be a hexadecimal string
    /// representing the value of the byte range digest.
    /// For public-key signatures, Contents should be either a DER-encoded PKCS#1 binary data
    /// object, a DER-encoded CMS binary data object or a DER-encoded CMS SignedData binary data
    /// object.
    /// For document timestamp signatures, Contents shall be the TimeStampToken as specified in RFC
    /// 3161 as updated by RFC 5816. The value of the messageImprint field within the TimeStampToken
    /// shall be a hash of the bytes of the document indicated by the ByteRange and the ByteRange
    /// shall specify the complete PDF file contents (excepting the Contents value)
    ///
    /// Specified by [the row with Key=Contents of Table 252 - Entries in a signature dictionary](https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#page=475).
    pub contents: Vec<u8>,
}

/// The root of a PDF document's object hierarchy, located by the `PdfDocument::root` dictionary.
///
/// See section 7.7.2 of the PDF 1.7 specification:
/// "The catalog contains references to other objects defining the document’s contents, outline,
/// article threads, named destinations, and other attributes. In addition, it contains information
/// about how the document shall be displayed on the screen, such as whether its outline and
/// thumbnail page images shall be displayed automatically and whether some location other than the
/// first page shall be shown when the document is opened."
pub struct PdfCatalog {
    pub interactive_form: Option<PdfInteractiveForm>,
}

#[cfg(test)]
mod tests {
    use crate::PdfString;

    #[test]
    fn decode_hex_byte_string() {
        fn assert_decoding(string: &[u8], expected: &[u8]) {
            let s = PdfString::new_hexadecimal(string);
            assert_eq!(s.decode_byte_string(), expected);
        }
        assert_decoding(b"ff00f0ab", &[0xff, 0x00, 0xf0, 0xab]);
        assert_decoding(b"12f", &[0x12, 0xf0]);
        assert_decoding(b"12", &[0x12]);

        // "Each pair of hexadecimal digits defines one byte of the string. White-space characters
        // (see Table 1) shall be ignored":
        assert_decoding(b"1 2", &[0x12]);
    }

    #[test]
    #[ignore]
    fn decode_text_string() {
        fn assert_decoding(string: &[u8], expected: &[u8]) {
            let s = PdfString::new_literal(string);
            assert_eq!(s.decode_text_string(), expected);
        }

        assert_decoding(
            b"These \\\ntwo strings \\\nare the same.)",
            b"These two strings are the same.",
        );
        assert_decoding(
            b"These \\\rtwo strings \\\rare the same.)",
            b"These two strings are the same.",
        );
        assert_decoding(
            b"These \\\r\ntwo strings \\\r\nare the same.)",
            b"These two strings are the same.",
        );
    }
}
