/*
 * Copyright (c) 2021-2021 Thomas Kramer.
 *
 * This file is part of LibrEDA 
 * (see https://codeberg.org/libreda/liberty-io).
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

//! Functions for parsing ASCII-based formats from iterators over bytes.

use itertools::{Itertools, PeekingNext};
use std::iter::Peekable;
use std::str::FromStr;
use std::fmt;
use std::num::ParseIntError;

/// Error while parsing Liberty.
/// TODO: Separate lexer errors from Liberty specific errors.
#[derive(Clone, Debug)]
pub enum ParserError {
    InvalidCharacter,
    UnexpectedEndOfFile,
    /// Expected and actual token.
    UnexpectedToken(String, String),
    UnknownToken(String),
    InvalidLiteral(String),
    NotImplemented(&'static str),
    ParseIntError(ParseIntError),
    Other(&'static str),
}

impl fmt::Display for ParserError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            ParserError::InvalidCharacter => write!(f, "Invalid character."),
            ParserError::UnexpectedEndOfFile => write!(f, "Unexpected end of file."),
            ParserError::UnexpectedToken(actual, exp) =>
                write!(f, "Unexpected token. '{}' instead of '{}'", actual, exp),
            ParserError::UnknownToken(t) => write!(f, "Unknown token: '{}'.", t),
            ParserError::InvalidLiteral(n) => write!(f, "Invalid literal: '{}'.", n),
            ParserError::NotImplemented(n) => write!(f, "Not implemented: '{}'.", n),
            ParserError::Other(msg) => write!(f, "'{}'.", msg),
            ParserError::ParseIntError(e) => write!(f, "Illegal integer: '{}'", e)
        }
    }
}

impl From<ParseIntError> for ParserError {
    fn from(e: ParseIntError) -> Self {
        Self::ParseIntError(e)
    }
}

fn is_terminal_char(c: char) -> bool {
    match c {
        '{' | '}' | '(' | ')' | '[' | ']' | ';' | ':' | ',' | '/' | '*' => true,
        _ => false
    }
}

/// Read a token into the buffer. Tokens are separated by white space. Comments are ignored.
/// Quoted tokens can contain white space.
pub(crate) fn read_token<'a, I>(iter: &mut I, buffer: &'a mut String) -> Option<&'a str>
    where I: Iterator<Item=char> + PeekingNext {
    buffer.clear();

    let iter = iter.by_ref();

    loop {
        // Skip whitespace but not newlines.
        let _n = iter.peeking_take_while(|c| c.is_whitespace() && *c != '\n').count();

        // Look ahead.
        if let Some(c) = iter.peeking_next(|_| true) {
            debug_assert!(!c.is_whitespace() || c == '\n');

            match c {
                '#' => {
                    // Skip comments.
                    iter.peeking_take_while(|&c| c != '\n' && c != '\r').count();
                }
                '/' => {
                    if let Some(_) = iter.peeking_next(|&c| c == '*') {
                        // Skip comment.

                        loop {
                            // Consume until next '*'.
                            iter.peeking_take_while(|&c| c != '*').count();
                            if iter.next() != Some('*') {
                                break; // End of file.
                            }

                            if let Some(_) = iter.peeking_next(|&c| c == '/') {
                                // End of comment.
                                break;
                            }
                        }
                    } else if let Some(_) = iter.peeking_next(|&c| c == '/') {
                        // Skip comment.

                        // Consume until next '\n'.
                        iter.peeking_take_while(|&c| c != '\n').count();
                    } else {
                        buffer.push(c);
                    }
                }
                '\\' => {
                    if let Some(_) = iter.peeking_next(|&c| c == '\\') {
                        // Ignore masked newlines.
                        buffer.push('\\');
                    } else if let Some(_) = iter.peeking_next(|&c| c == '\n') {
                        // Ignore masked newlines.
                        iter.next();
                    } else if let Some(_) = iter.peeking_next(|&c| c == '\r') {
                        // Ignore masked newlines.
                        iter.next();
                        if let Some(_) = iter.peeking_next(|&c| c == '\n') {
                            iter.next();
                        }
                    } else {
                        buffer.push('\\');
                    }
                }
                '"' | '\'' => {
                    // Quoted string.
                    let quote_char = c;
                    buffer.push(quote_char);

                    let mut prev = None;
                    while let Some(c) = iter.next() {
                        if prev != Some('\\') {
                            if c == quote_char {
                                // Abort on quote char.
                                buffer.push(quote_char);
                                break;
                            } else {
                                buffer.push(c);
                            }
                        } else {
                            // '\'...
                            if c == '\n' {
                                // Skip quoted newline.
                            } else if c == '\r' {
                                // Consume a following \n, if any.
                                iter.peeking_next(|&c| c == '\n');
                            } else {
                                buffer.push(c);
                            }
                        }
                        prev = Some(c);
                    }
                    return Some(buffer.as_str());
                }
                '\n' => {
                    // Newline.
                    debug_assert!(buffer.is_empty());
                    buffer.push(c);

                    // Consume all other newlines.
                    iter.peeking_take_while(|&c| c.is_whitespace()).count();

                    return Some(buffer.as_str());
                }
                _ => {
                    // Normal token.
                    let mut prev = Some(c);
                    buffer.push(c);

                    if is_terminal_char(c) {
                        return Some(buffer.as_str());
                    }

                    while let Some(c) = iter.peeking_next(|&c| !is_terminal_char(c)) {
                        if prev != Some('\\') && (c.is_whitespace() || is_terminal_char(c)) {
                            // Abort on unmasked whitespace or terminal character.
                            break;
                        }

                        buffer.push(c);
                        prev = Some(c);
                    }
                    return Some(buffer.as_str());
                }
            }
        } else {
            return None;
        }
    }
}

/// Read simple tokens and skip comments.
#[test]
fn test_read_token() {
    let data = r#"
        # Comment 1
        # Comment 2
        /* multi
        line
        comment */
        token1
        # Comment 3
        token2(token3, "quoted token") {token4}
        /**/
        token5:token6;
    # Masked newline should be ignored.
    \
    token7
    "#;

    let mut iter = data.chars()
        .inspect(|c| print!("{}", c))
        .peekable();

    let mut buffer = String::new();

    assert_eq!(read_token(&mut iter, &mut buffer), Some("\n"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("\n"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("\n"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("\n"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("token1"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("\n"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("token2"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("("));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("token3"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some(","));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("\"quoted token\""));
    assert_eq!(read_token(&mut iter, &mut buffer), Some(")"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("{"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("token4"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("}"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("\n"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("\n"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("token5"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some(":"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("token6"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some(";"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("\n"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("\n"));
    assert_eq!(read_token(&mut iter, &mut buffer), Some("token7"));
}

/// Provide sequential access to tokens that are created on the fly by
/// splitting characters at whitespace.
pub struct Tokenized<I>
    where I: Iterator<Item=char> + PeekingNext {
    iter: I,
    has_current: bool,
    current_token: Option<String>,
}

impl<I> Tokenized<I>
    where I: Iterator<Item=char> + PeekingNext {
    pub fn next_str(&mut self) -> Option<&str> {
        self.advance();
        self.current_token_str()
    }

    pub fn next_string(&mut self) -> Option<String> {
        self.advance();
        self.current_token()
    }

    pub fn take(&mut self) -> Result<String, ParserError> {
        let s = self.current_token();
        self.advance();
        if let Some(s) = s {
            Ok(s)
        } else {
            Err(ParserError::UnexpectedEndOfFile)
        }
    }

    pub fn take_and_parse<F: FromStr>(&mut self) -> Result<F, ParserError> {
        let result = if let Some(s) = self.current_token_str() {
            if let Ok(parsed) = s.parse::<F>() {
                Ok(parsed)
            } else {
                Err(ParserError::InvalidLiteral(s.to_string()))
            }
        } else {
            Err(ParserError::UnexpectedEndOfFile)
        };


        self.advance();

        result
    }

    /// Advance to the next token.
    pub fn advance(&mut self) {
        let mut buffer = self.current_token.take()
            .unwrap_or_else(|| String::new());

        let next_token = read_token(&mut self.iter, &mut buffer);
        let has_next = next_token.is_some();

        self.current_token = Some(buffer);
        self.has_current = has_next;
    }

    pub fn current_token_str(&self) -> Option<&str> {
        if self.has_current {
            self.current_token.as_ref().map(|s| s.as_str())
        } else {
            None
        }
    }

    pub fn current_token(&self) -> Option<String> {
        self.current_token_str().map(|s| s.to_string())
    }

    /// Test if the current token equals to the expected token.
    /// Returns `Ok(())` if the token matches and advances the iterator.
    /// Returns the actual token otherwise.
    pub fn expect(&mut self, s: &str) -> Result<(), ParserError> {
        if self.current_token.is_none() {
            Err(ParserError::UnexpectedEndOfFile)?;
        }

        if self.current_token_str() == Some(s) {
            self.advance();
            Ok(())
        } else {
            Err(ParserError::UnexpectedToken(
                s.to_string(), self.current_token().unwrap().to_string(),
            ))
        }
    }

    /// Test if the current token matches with the string.
    /// The token is consumed only if it matches.
    pub fn test(&mut self, s: &str) -> Result<bool, ParserError> {
        let result = self.peeking_test(s)?;
        if result {
            self.advance();
        }
        Ok(result)
    }

    /// Test if the current token matches with the string.
    /// The token is not consumed.
    pub fn peeking_test(&mut self, s: &str) -> Result<bool, ParserError> {
        if self.current_token.is_none() {
            Err(ParserError::UnexpectedEndOfFile)?;
        }

        if self.current_token_str() == Some(s) {
            Ok(true)
        } else {
            Ok(false)
        }
    }

    /// Consume all tokens until and including `s`.
    pub fn skip_until(&mut self, s: &str) -> Result<(), ParserError> {
        while !self.test(s)? {
            self.advance()
        }
        Ok(())
    }
}

/// Split a stream of characters into tokens separated by whitespace.
/// Comments are ignored.
pub fn tokenize<I>(iter: I) -> Tokenized<Peekable<I>>
    where I: Iterator<Item=char> {
    Tokenized {
        iter: iter.peekable(),
        has_current: false,
        current_token: None,
    }
}
//
// #[test]
// fn test_tokenized() {
//     let data = r#"
//         # Comment 1
//
//         # Comment 2
//
//         /* multi
//         line
//         comment */
//
//         token1
//
//         # Comment 3
//
//         token2(token3, "quoted token") {token4}
//
//         /**/
//
//         token5:token6;
//
//     "#;
//
//
//     let mut tokens = tokenize(data.chars());
//
//     assert_eq!(tokens.next_str(), Some("token1"));
//     assert_eq!(tokens.next_str(), Some("token2"));
//     assert_eq!(tokens.next_str(), Some("("));
//     assert_eq!(tokens.next_str(), Some("token3"));
//     assert_eq!(tokens.next_str(), Some(","));
//     assert_eq!(tokens.next_str(), Some("\"quoted token\""));
//     assert_eq!(tokens.next_str(), Some(")"));
//     assert_eq!(tokens.next_str(), Some("{"));
//     assert_eq!(tokens.next_str(), Some("token4"));
// }
