//! Lexical analyzer of FjML

mod atoms;
mod util;

pub use self::atoms::*;

pub use super::common::keywords::*;

use self::util::*;

use regex::Regex;

use std::borrow::Borrow;
use std::iter::{Peekable, Enumerate};
use std::str::Chars;
use std::fmt::{self, Write, Display};
use std::ascii;



/// Adds [TokenValue::EOL] to mark where statements end.
/// Every statement is guaranteed to end with EOL, including the last one.
/// 
/// Adds [TokenValue::EOF] to mark the end of file.
/// 
/// The result is guaranteed NOT to have tokens that represent empty lines
/// (sequences `[..., Indent/Dedent, EOL, ...]` and `[..., EOL, EOL ...]` are impossible).
///
/// All indents are guaranteed to have respective dedents.
/// 
/// Does not append indents or dedents inside `(){}[]`.
///  
/// If an error occurs, immediately returns the error.
pub fn tokenize(src: String) -> Result<Vec<Token>, TokenizerError> {

    let dec_num_re = Regex::new(r"(?xm) [-+]? \d+ (\. \d+)? ([eE] [+-]? \d+)?").unwrap();
    
    /// Source
    let mut chars = src.chars().enumerate().peekable();
    
    /// Destination
    let mut tokens = Vec::<Token>::new();
    
    /// Current token, temporary result
    let mut token = Token::new();

    let mut unclosed_round = 0usize;
    let mut unclosed_square = 0usize;
    let mut unclosed_curly = 0usize;

    let mut last_indent_width = 0usize;

    /// If the last line appears to be empty, last_indent_width is no longer legitimate
    let mut backup_indent_width = 0usize;

    
    // The main loop where everything is magically turned into tokens
    while let Some((pos, c)) = chars.peek().clone() {
        let (pos, c) = (*pos, *c);

        // When reading tokens, only their start position is attached.
        // The following variables are applied to `token` in the end of this loop,
        // after `token` is pushed to `result`
        let mut scrolled_lines = 0;
        let mut scrolled_pos = 0;

        match c {

            ' ' | '\t' | '\r' => {
                chars.next();
                scrolled_pos += 1;
            }

            // Parse indent
            '\n' => {
                chars.next();

                token.incr_line();

                // While the last token is an indent/dedent
                while tokens.last().is_some() && tokens.last().unwrap().is_indentation() {
                    // The line appears to be empty
                    // Remove the indentation
                    tokens.pop();

                    // Forget the width of that indentation
                    last_indent_width = backup_indent_width;
                }

                
                // If not inside braces
                if unclosed_round==0 && unclosed_curly==0 && unclosed_square==0 {
                
                    // If the last token was not EOL
                    if tokens.last().is_some() && !tokens.last().unwrap().is_eol() {
                        // Append current EOL
                        token.set_value(TokenValue::EOL);
                        tokens.push(token.clone());
                    }
                    
                    backup_indent_width = last_indent_width;
                    
                    let (width, delta) = parse_indent(&mut chars);
                    
                    // Handle the indent
                    
                    let mut indent_delta = (width as isize) - (last_indent_width as isize);
                    
                    while indent_delta != 0 {
                        token.set_value(
                            if indent_delta>0 {TokenValue::Indent}
                            else {TokenValue::Dedent}
                        );
                        tokens.push(token.clone());
                        indent_delta -= indent_delta.signum();
                    }
                    
                    last_indent_width = width;
                    scrolled_pos += delta;
                }

            },

            // Comment or operator
            '/' => {
                chars.next();
                scrolled_pos += 1;

                // Comment
                if let Some((_, '/')) = chars.peek().clone() {
                    // Eat characters untill '\n' is found
                    while chars.peek().is_some() && chars.peek().unwrap().1 != '\n' {
                        chars.next();
                        scrolled_pos += 1;
                    }
                }
                // Operator
                else {
                    tokens.push(token.clone());
                    token.set_value(TokenValue::Op('/'));
                }
            },

            // Line continuation
            '\\' => {
                // Eat characters until '\n' ('\n' is eaten too)
                while chars.peek().is_some() {
                    scrolled_pos += 1;
                    if chars.next().unwrap().1 == '\n' {
                        scrolled_pos = 0;
                        scrolled_lines += 1;
                        break;
                    }
                }
            }, 

            // String
            '\'' | '"' | '`' => {
                let (result, lines, cols, len) = parse_string(&mut chars);

                if let Err(msg) = result {
                    return Err(TokenizerError::from(token, msg, &src[pos..pos+len]));
                }

                token.set_value(result.unwrap());
                tokens.push(token.clone());
                scrolled_lines += lines;
                scrolled_pos += cols;
            },

            // Parse color
            '#' => {
                let (result, len) = parse_color(&mut chars);

                if let Err(msg) = result {
                    return Err(TokenizerError::from(token, msg, &src[pos..pos+len]))
                }

                token.set_value(result.unwrap());
                tokens.push(token.clone());
                scrolled_pos += len;
            },

            // Operators
            // - and + will be parsed while trying to parse numbers
            '@'|'$'|'%'|'^'|'&'|'*'|';'|'<'|'>'|'.'|','|'?' => {
                token.set_value(TokenValue::Op(chars.next().unwrap().1));
                tokens.push(token.clone());
                scrolled_pos += 1;
            },

            '!' => {
                let (parsed, len) = parse_double_op(&mut chars, '=', 'U');
                token.set_value(parsed);
                tokens.push(token.clone());
                scrolled_pos += len;
            },

            ':' => {
                let (parsed, len) = parse_double_op(&mut chars, ':', 'C');
                token.set_value(parsed);
                tokens.push(token.clone());
                scrolled_pos += len;
            },

            '=' => {
                let (parsed, len) = parse_double_op(&mut chars, '=', 'E');
                token.set_value(parsed);
                tokens.push(token.clone());
                scrolled_pos += len;
            },

            // Groupers
            '(' | ')' | '[' | ']' | '{' | '}' => {
                let err = TokenizerError::from(token.clone(), "extra brace", c);

                match c {
                    '(' => unclosed_round += 1,
                    ')' => if unclosed_round == 0 {return Err(err);} else {unclosed_round -= 1;},
                    '{' => unclosed_curly += 1,
                    '}' => if unclosed_curly == 0 {return Err(err);} else {unclosed_curly -= 1;},
                    '[' => unclosed_square += 1,
                    ']' => if unclosed_square == 0 {return Err(err);} else {unclosed_square -= 1;},
                    _ => {}
                }

                token.set_value(TokenValue::Gr(chars.next().unwrap().1));
                tokens.push(token.clone());
                scrolled_pos += 1;
            },
            
            // Parse identifier/keyword
            c if is_id_begin(c) => {
                let (id_value, len) = parse_id(&mut chars);
                token.set_value(id_value);
                tokens.push(token.clone());
                scrolled_pos += len;
            },

    
            // Parse a dec/hex/bin number
            c if c.is_ascii_digit() || c == '-' || c == '+' => {
                let next_char = src.chars().nth(pos+1);

                let next_char_is_prefix = next_char
                    .and_then(|p| Some(is_num_prefix(p)))
                    == Some(true);

                if c == '0' && next_char_is_prefix {
                    chars.next(); // Eat 0
                    chars.next(); // Eat prefix
                    let (result, len) = parse_prefixed_num(next_char.unwrap(), &mut chars);
                    let len = len + 2; // + 2 chars representing 0 and prefix
                    if let Err(msg) = result {
                        return Err(TokenizerError::from(token, msg, &src[pos..pos+len]));
                    }
                    token.set_value(result.unwrap());
                    tokens.push(token.clone());
                    scrolled_pos += len; 
                }
                // Try to parse decimal using the power of regex
                else if let Some(m) = dec_num_re.find_at(&src, pos) {
                    if m.start() != pos {
                        // Regex didn't find a match at the exact position
                        if c == '-' || c == '+' {
                            // Leave -/+ as operators
                            chars.next();
                            token.set_value(TokenValue::Op(c));
                            tokens.push(token.clone());
                            scrolled_pos += 1;
                        }
                    } 
                    else {
                        let len = m.end() - m.start();

                        let source = &src[m.start()..m.end()];
                        let value = source.parse::<f32>();
                        if let Err(err) = value {
                            return Err(TokenizerError::from(token, err, source));
                        }
                        
                        let value = value.unwrap();
                        token.set_value(TokenValue::Float(value));
                        tokens.push(token.clone());
                        chars.nth(len-1); // -1 because the argument is index, not length
                        scrolled_pos += len;
                    }
                }
                // Fatal error! [unreachable?]
                else {
                    return Err(TokenizerError::from(
                        token, "cannot parse a number", c
                    ));
                }
            },

            c => {
                return Err(TokenizerError::from(token, "illegal character", c));
            }

        } // match *c

        token.scroll_lines(scrolled_lines);
        token.scroll_pos(scrolled_pos);

    } // while let ... = chars.peek()

    // Append EOL if necessary
    if tokens.len() > 0 {
        if let TokenValue::EOL = tokens[tokens.len() - 1].value
            { }
        else {
            token.set_value(TokenValue::EOL);
            tokens.push(token.clone());
            token.scroll_pos(1);
        }
    }

    // Append missing dedents
    if last_indent_width > 0 {
        token.set_value(TokenValue::Dedent);
        for _ in 0..last_indent_width {
            tokens.push(token.clone());
        }
    }

    // Append EOF
    //token.scroll_pos(1);
    token.set_value(TokenValue::EOF);
    tokens.push(token.clone());

    Ok(tokens)
}