/*
 * DMNTK - Decision Model and Notation Toolkit
 *
 * FEEL parser.
 *
 * Copyright 2018-2021 Dariusz Depta Engos Software <dariusz.depta@engos.software>
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

//! Implementation of the lexer for `FEEL` grammar.

use self::errors::*;
use crate::lalr::TokenType;
use dmntk_common::Result;
use dmntk_feel::context::FeelContext;
use dmntk_feel::values::VALUE_NULL;
use dmntk_feel::{Name, Scope};

/// Definition of a single space character.
const WS: char = ' ';

/// Buffer size for lexer input.
const BUF_SIZE: usize = 12;

/// Definition of decimal separator.
const DECIMAL_SEPARATOR: char = '.';

/// Semantic value associated with token type.
#[derive(Debug, Clone)]
pub enum TokenValue {
  YyEmpty,
  YyEof,
  YyUndef,
  YyState(usize),
  And,
  At,
  Between,
  BetweenAnd,
  Boolean(bool),
  Colon,
  Comma,
  Context,
  Div,
  Dot,
  Ellipsis,
  Else,
  Eq,
  Every,
  Exp,
  External,
  For,
  Function,
  LeftBrace,
  LeftBracket,
  LeftParen,
  Le,
  Lt,
  Ge,
  Gt,
  If,
  In,
  Instance,
  List,
  Minus,
  Mul,
  Not,
  Name(Name),
  NameDateTime(Name),
  Nq,
  Null,
  Numeric(String, String),
  Of,
  Or,
  Plus,
  Range,
  Return,
  RightArrow,
  RightBrace,
  RightBracket,
  RightParen,
  Satisfies,
  Some,
  StartBoxedExpression,
  StartTextualExpression,
  StartTextualExpressions,
  StartUnaryTests,
  String(String),
  Then,
}

/// FEEL lexer.
pub struct Lexer<'lexer> {
  /// Parsing scope.
  scope: &'lexer Scope,
  /// Starting token type, returned before the first token.
  start_token_type: Option<TokenType>,
  /// Input characters.
  input: Vec<char>,
  /// Current cursor position in input vector.
  position: usize,
  /// Flag indicating if the unary tests rule is the starting point.
  /// This flag is used to identify `not` keyword,
  /// which otherwise would be recognized as a name.
  /// Token `not` is a keyword at the very beginning of the unary tests rule,
  /// in all other contexts it is just a name.
  unary_tests: bool,
  /// Flag indicating if the `between` keyword was encountered.
  /// When this flag is set, the next `and` token is returned as `band` keyword,
  /// otherwise it is returned as `and`. This allows to disambiguate the `and`
  /// operator used in between clause from conjunction.
  /// After consuming first `and` ad `band` this flag is reset.
  between_begin: bool,
  ///
  type_name: bool,
}

/// FEEL lexer implementation.
impl<'lexer> Lexer<'lexer> {
  /// Creates a new lexer for specified input text.
  pub fn new(scope: &'lexer Scope, start_token_type: TokenType, input: &str) -> Self {
    Self {
      scope,
      start_token_type: Some(start_token_type),
      input: input.chars().collect(),
      position: 0,
      unary_tests: false,
      between_begin: false,
      type_name: false,
    }
  }

  pub fn scope(&self) -> &Scope {
    &self.scope
  }

  pub fn set_unary_tests(&mut self) {
    self.unary_tests = true;
  }

  pub fn set_between_begin(&mut self) {
    self.between_begin = true;
  }

  pub fn set_type_name(&mut self) {
    self.type_name = true;
  }

  pub fn push_to_scope(&mut self) {
    self.scope.push(FeelContext::default());
  }

  pub fn pop_from_scope(&mut self) {
    self.scope.pop();
  }

  pub fn add_name_to_scope(&mut self, name: &Name) {
    self.scope.set_entry(name, VALUE_NULL);
  }

  ///
  pub fn get_next_token(&mut self) -> Result<(TokenType, TokenValue)> {
    if let Some(start_token_type) = self.start_token_type.clone() {
      self.start_token_type = None;
      match start_token_type {
        tt @ TokenType::StartBoxedExpression => return Ok((tt, TokenValue::StartBoxedExpression)),
        tt @ TokenType::StartContext => return Ok((tt, TokenValue::StartBoxedExpression)),
        tt @ TokenType::StartTextualExpression => return Ok((tt, TokenValue::StartTextualExpression)),
        tt @ TokenType::StartTextualExpressions => return Ok((tt, TokenValue::StartTextualExpressions)),
        tt @ TokenType::StartUnaryTests => return Ok((tt, TokenValue::StartUnaryTests)),
        _ => {}
      }
    }
    let result = self.read_next_token();
    self.unary_tests = false;
    result
  }

  /// Reads the next token starting from current position.
  fn read_next_token(&mut self) -> Result<(TokenType, TokenValue)> {
    let chars = self.read_input();
    match chars {
      ['s', 'a', 't', 'i', 's', 'f', 'i', 'e', 's', WS, _, _] => {
        self.position += 9;
        Ok((TokenType::Satisfies, TokenValue::Satisfies))
      }
      ['e', 'x', 't', 'e', 'r', 'n', 'a', 'l', WS, _, _, _] => {
        self.position += 8;
        Ok((TokenType::External, TokenValue::External))
      }
      ['f', 'u', 'n', 'c', 't', 'i', 'o', 'n', ch, _, _, _] if is_function_separator(ch) => {
        self.position += 8;
        Ok((TokenType::Function, TokenValue::Function))
      }
      ['i', 'n', 's', 't', 'a', 'n', 'c', 'e', WS, _, _, _] => {
        self.position += 8;
        Ok((TokenType::Instance, TokenValue::Instance))
      }
      ['b', 'e', 't', 'w', 'e', 'e', 'n', WS, _, _, _, _] => {
        self.position += 7;
        Ok((TokenType::Between, TokenValue::Between))
      }
      ['c', 'o', 'n', 't', 'e', 'x', 't', ch, _, _, _, _] if is_context_separator(ch) => {
        self.position += 7;
        Ok((TokenType::Context, TokenValue::Context))
      }
      ['r', 'e', 't', 'u', 'r', 'n', WS, _, _, _, _, _] => {
        self.position += 6;
        Ok((TokenType::Return, TokenValue::Return))
      }
      ['e', 'v', 'e', 'r', 'y', WS, _, _, _, _, _, _] => {
        self.position += 5;
        Ok((TokenType::Every, TokenValue::Every))
      }
      ['f', 'a', 'l', 's', 'e', ch, _, _, _, _, _, _] if is_separator(ch) => {
        self.position += 5;
        Ok((TokenType::Boolean, TokenValue::Boolean(false)))
      }
      ['r', 'a', 'n', 'g', 'e', ch, _, _, _, _, _, _] if is_range_separator(ch) => {
        self.position += 5;
        Ok((TokenType::Range, TokenValue::Range))
      }
      ['n', 'u', 'l', 'l', ch, _, _, _, _, _, _, _] if is_separator(ch) => {
        self.position += 4;
        Ok((TokenType::Null, TokenValue::Null))
      }
      ['e', 'l', 's', 'e', WS, _, _, _, _, _, _, _] => {
        self.position += 4;
        Ok((TokenType::Else, TokenValue::Else))
      }
      ['l', 'i', 's', 't', ch, _, _, _, _, _, _, _] if is_separator(ch) => {
        self.position += 4;
        Ok((TokenType::List, TokenValue::List))
      }
      ['s', 'o', 'm', 'e', WS, _, _, _, _, _, _, _] => {
        self.position += 4;
        Ok((TokenType::Some, TokenValue::Some))
      }
      ['t', 'h', 'e', 'n', WS, _, _, _, _, _, _, _] => {
        self.position += 4;
        Ok((TokenType::Then, TokenValue::Then))
      }
      ['t', 'r', 'u', 'e', ch, _, _, _, _, _, _, _] if is_separator(ch) => {
        self.position += 4;
        Ok((TokenType::Boolean, TokenValue::Boolean(true)))
      }
      ['a', 'n', 'd', WS, _, _, _, _, _, _, _, _] if !self.between_begin => {
        self.position += 3;
        Ok((TokenType::And, TokenValue::And))
      }
      ['a', 'n', 'd', WS, _, _, _, _, _, _, _, _] if self.between_begin => {
        self.between_begin = false;
        self.position += 3;
        Ok((TokenType::BetweenAnd, TokenValue::BetweenAnd))
      }
      ['f', 'o', 'r', WS, _, _, _, _, _, _, _, _] => {
        self.position += 3;
        Ok((TokenType::For, TokenValue::For))
      }
      ['n', 'o', 't', ch, _, _, _, _, _, _, _, _] if self.unary_tests && is_keyword_not_separator(ch) => {
        self.position += 3;
        Ok((TokenType::Not, TokenValue::Not))
      }
      ['i', 'f', WS, _, _, _, _, _, _, _, _, _] => {
        self.position += 2;
        Ok((TokenType::If, TokenValue::If))
      }
      ['i', 'n', WS, _, _, _, _, _, _, _, _, _] => {
        self.position += 2;
        Ok((TokenType::In, TokenValue::In))
      }
      ['o', 'f', WS, _, _, _, _, _, _, _, _, _] => {
        self.position += 2;
        Ok((TokenType::Of, TokenValue::Of))
      }
      ['o', 'r', WS, _, _, _, _, _, _, _, _, _] => {
        self.position += 2;
        Ok((TokenType::Or, TokenValue::Or))
      }
      ['.', '.', _, _, _, _, _, _, _, _, _, _] => {
        self.position += 2;
        Ok((TokenType::Ellipsis, TokenValue::Ellipsis))
      }
      ['*', '*', _, _, _, _, _, _, _, _, _, _] => {
        self.position += 2;
        Ok((TokenType::Exp, TokenValue::Exp))
      }
      ['!', '=', _, _, _, _, _, _, _, _, _, _] => {
        self.position += 2;
        Ok((TokenType::Nq, TokenValue::Nq))
      }
      ['<', '=', _, _, _, _, _, _, _, _, _, _] => {
        self.position += 2;
        Ok((TokenType::Le, TokenValue::Le))
      }
      ['>', '=', _, _, _, _, _, _, _, _, _, _] => {
        self.position += 2;
        Ok((TokenType::Ge, TokenValue::Ge))
      }
      ['-', '>', _, _, _, _, _, _, _, _, _, _] => {
        self.position += 2;
        Ok((TokenType::RightArrow, TokenValue::RightArrow))
      }
      ['.', ch, _, _, _, _, _, _, _, _, _, _] if is_digit(ch) => {
        self.consume_character(DECIMAL_SEPARATOR)?;
        Ok((TokenType::Numeric, TokenValue::Numeric("0".to_string(), self.consume_digits())))
      }
      ['.', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::Dot, TokenValue::Dot))
      }
      [',', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::Comma, TokenValue::Comma))
      }
      [':', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::Colon, TokenValue::Colon))
      }
      ['"', _, _, _, _, _, _, _, _, _, _, _] => self.consume_string(),
      ['+', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::Plus, TokenValue::Plus))
      }
      ['-', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::Minus, TokenValue::Minus))
      }
      ['*', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::Mul, TokenValue::Mul))
      }
      ['/', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::Div, TokenValue::Div))
      }
      ['=', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::Eq, TokenValue::Eq))
      }
      ['<', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::Lt, TokenValue::Lt))
      }
      ['>', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::Gt, TokenValue::Gt))
      }
      ['(', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::LeftParen, TokenValue::LeftParen))
      }
      [')', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::RightParen, TokenValue::RightParen))
      }
      ['[', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::LeftBracket, TokenValue::LeftBracket))
      }
      [']', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::RightBracket, TokenValue::RightBracket))
      }
      ['{', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::LeftBrace, TokenValue::LeftBrace))
      }
      ['}', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::RightBrace, TokenValue::RightBrace))
      }
      ['@', _, _, _, _, _, _, _, _, _, _, _] => {
        self.position += 1;
        Ok((TokenType::At, TokenValue::At))
      }
      [ch, _, _, _, _, _, _, _, _, _, _, _] if is_digit(ch) => {
        let mut digits_before = String::new();
        let mut digits_after = String::new();
        digits_before.push_str(&self.consume_digits());
        if self.is_char_at(0, DECIMAL_SEPARATOR) && self.is_digit_at(1) {
          self.consume_character(DECIMAL_SEPARATOR)?;
          digits_after.push_str(&self.consume_digits());
        }
        Ok((TokenType::Numeric, TokenValue::Numeric(digits_before, digits_after)))
      }
      [ch, _, _, _, _, _, _, _, _, _, _, _] if is_name_start_char(ch) => self.consume_name(),
      [WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS] => Ok((TokenType::YyEof, TokenValue::YyEof)),
      _ => Ok((TokenType::YyUndef, TokenValue::YyUndef)),
    }
  }

  /// Reads characters from input.
  fn read_input(&mut self) -> [char; BUF_SIZE] {
    self.consume_whitespace();
    self.consume_comment();
    self.consume_whitespace();
    let mut buffer: [char; BUF_SIZE] = [WS; BUF_SIZE];
    for (offset, value) in buffer.iter_mut().enumerate() {
      if let Some(ch) = self.char_at(offset) {
        if !is_whitespace(ch) {
          *value = ch
        };
      }
    }
    buffer
  }

  /// Consumes all whitespace characters starting from the current position.
  /// After consuming a whitespace character the current position is advanced.
  fn consume_whitespace(&mut self) {
    while let Some(ch) = self.char_at(0) {
      if is_whitespace(ch) {
        self.position += 1;
      } else {
        break;
      }
    }
  }

  /// Consumes comments starting from current position.
  /// After consuming a comment, the current position is advanced.
  fn consume_comment(&mut self) {
    let pair = (self.char_at(0), self.char_at(1));
    match pair {
      (Some('/'), Some('/')) => {
        self.position += 2;
        while let Some(ch) = self.char_at(0) {
          if ch == '\n' {
            return;
          }
          self.position += 1;
        }
      }
      (Some('/'), Some('*')) => {
        self.position += 2;
        while let Some(ch) = self.char_at(0) {
          if ch == '*' {
            if let Some('/') = self.char_at(1) {
              self.position += 2;
              return;
            }
          }
          self.position += 1;
        }
      }
      _ => {}
    }
  }

  /// Consumes the string literal.
  fn consume_string(&mut self) -> Result<(TokenType, TokenValue)> {
    let mut string = "".to_string();
    self.consume_character('"')?;
    loop {
      let first = self.char_at(0);
      let second = self.char_at(1);
      match (first, second) {
        (Some('\\'), Some('\'')) => {
          self.consume_chars(&['\\', '\''])?;
          string.push('\'');
        }
        (Some('\\'), Some('"')) => {
          self.consume_chars(&['\\', '"'])?;
          string.push('"');
        }
        (Some('\\'), Some('\\')) => {
          self.consume_chars(&['\\', '\\'])?;
          string.push('\\');
        }
        (Some('\\'), Some('n')) => {
          self.consume_chars(&['\\', 'n'])?;
          string.push('\n');
        }
        (Some('\\'), Some('r')) => {
          self.consume_chars(&['\\', 'r'])?;
          string.push('\r');
        }
        (Some('\\'), Some('t')) => {
          self.consume_chars(&['\\', 't'])?;
          string.push('\t');
        }
        (Some('\\'), Some('u')) => {
          string.push(self.consume_unicode()?);
        }
        (Some('\\'), Some('U')) => {
          string.push(self.consume_unicode()?);
        }
        (Some('"'), _) => {
          self.consume_character('"')?;
          break;
        }
        (Some(ch1), _) if is_vertical_space(ch1) => {
          return Ok((TokenType::YyUndef, TokenValue::YyUndef));
        }
        (Some(ch1), _) => string.push(self.consume_character(ch1)?),
        _ => return Ok((TokenType::YyEof, TokenValue::YyEof)),
      }
    }
    Ok((TokenType::String, TokenValue::String(string)))
  }

  /// Consumes all digits available on input starting from the current position.
  /// When the digit is consumed, the current position is incremented by one.
  /// The return value is resulting string containing consumed digits or
  /// empty string, when encountered no digits.
  fn consume_digits(&mut self) -> String {
    let mut digits = "".to_string();
    while let Some(ch) = self.char_at(0) {
      if is_digit(ch) {
        digits.push(ch);
        self.position += 1;
      } else {
        break;
      }
    }
    digits
  }

  /// Consumes a name.
  fn consume_name(&mut self) -> Result<(TokenType, TokenValue)> {
    // collection of all name parts
    let mut parts = vec![];
    // currently parsed name part
    let mut current_part = "".to_string();
    // positions of consumed characters
    let mut consumed_positions = vec![];
    // the current character on input is already a name start character, so consume it
    let mut ch = self.peek_character()?;
    current_part.push(ch);
    // start parsing the rest of input using a state machine
    let mut state = 1;
    loop {
      match state {
        1 => {
          if self.is_next_name_part_char() {
            self.position += 1;
            ch = self.peek_character()?;
            current_part.push(ch);
          } else {
            parts.push(current_part.clone());
            consumed_positions.push(self.position);
            current_part = String::new();
            state = 2;
          }
        }
        2 => {
          if self.is_next_name_part_char() {
            state = 3;
          } else if self.is_next_additional_name_symbol() {
            state = 4;
          } else if self.is_next_whitespace() {
            state = 5;
          } else {
            self.position += 1;
            break;
          }
        }
        3 => {
          if self.is_next_name_part_char() {
            self.position += 1;
            ch = self.peek_character()?;
            current_part.push(ch);
          } else {
            parts.push(current_part.clone());
            consumed_positions.push(self.position);
            current_part = String::new();
            state = 2;
          }
        }
        4 => {
          if self.is_next_additional_name_symbol() {
            self.position += 1;
            ch = self.peek_character()?;
            current_part.push(ch);
            consumed_positions.push(self.position);
            parts.push(current_part.clone());
            current_part = String::new();
          } else {
            state = 2;
          }
        }
        5 => {
          if self.is_next_whitespace() {
            self.position += 1;
            self.peek_character()?;
          } else {
            state = 2;
          }
        }
        _ => {}
      }
    }
    // now the `parts` contains all parts of the longest possible name,
    // now decide what kind of name it is, by checking the parsing scope

    // ------------------------------------------------------------------------
    // tweak with name of the `item` in filter
    // ------------------------------------------------------------------------
    if let Some(part_name) = parts.get(0) {
      if part_name == "item" {
        self.position = consumed_positions[0] + 1;
        return Ok((TokenType::Name, TokenValue::Name(Name::from("item"))));
      }
    }

    // begin with with the longest name containing all parts
    let mut part_count = parts.len();
    let flattened_keys = self.scope.flatten_keys();
    while part_count > 0 {
      // take a sublist of the original part list until the list is empty
      let part_sublist = &parts[..part_count];
      // flatten the name parts to compare it with built-in names and keys in current context
      let name = flatten_name_parts(part_sublist);
      // check if the flattened name exists as a key in the current context
      if flattened_keys.contains(&name) {
        // return to the input all characters that do not belong to the name that was found
        self.position = consumed_positions[part_count - 1] + 1;
        // return the name that exists in the current context
        return Ok((TokenType::Name, TokenValue::Name(part_sublist.to_vec().into())));
      }
      part_count -= 1;
    }

    // build the name from name parts
    let name: Name = parts.to_vec().into();
    // ------------------------------------------------------------------------
    // tweak with names of built-in types
    // ------------------------------------------------------------------------
    if self.type_name
      && matches!(
        name.to_string().as_str(),
        "boolean" | "number" | "string" | "date" | "date and time" | "time" | "years and months duration" | "days and time duration"
      )
    {
      return Ok((TokenType::Name, TokenValue::Name(name)));
    }
    // ------------------------------------------------------------------------
    // tweak with date and time literals
    // ------------------------------------------------------------------------
    if matches!(name.to_string().as_str(), "date" | "date and time" | "time" | "duration") {
      return Ok((TokenType::NameDateTime, TokenValue::NameDateTime(name)));
    }
    // by default return the name as it appears in input
    Ok((TokenType::Name, TokenValue::Name(name)))
  }

  /// Takes a list of parts that make up the name and builds the name token.
  /// If there is one or more dots '.' as parts in the name,
  /// then this name is a [Token::QualifiedName].
  /// If there are no dots as parts, then this is a [Token::Name].
  // fn prepare_name_token(&self, parts: &[String]) -> (TokenType, TokenValue) {
  //   let mut names: Vec<Name> = vec![];
  //   for parts in parts.split(|v| v == ".") {
  //     names.push(parts.to_vec().into());
  //   }
  //   if names.len() > 1 {
  //     (TokenType::QualifiedName, Some(TokenValue::QualifiedName(names)))
  //   } else {
  //     (TokenType::Name, Some(TokenValue::Name(parts.to_vec().into())))
  //   }
  //   (TokenType::Name, TokenValue::Name(parts.to_vec().into()))
  // }

  /// Consumes a HEX digit from input or reports an error.
  fn consume_hex_digit(&mut self) -> Result<u64> {
    if let Some(ch) = self.char_at(0) {
      if is_hex_digit(ch) {
        self.position += 1;
        Ok(hex_to_decimal(ch))
      } else {
        Err(expected_hex_digit(ch))
      }
    } else {
      Err(unexpected_eof())
    }
  }

  /// Peeks the current character from input or returns
  /// an error when there are no more characters available.
  fn peek_character(&self) -> Result<char> {
    match self.char_at(0) {
      Some(ch) => Ok(ch),
      None => Err(unexpected_eof()),
    }
  }

  /// Consumes expected character from input or reports an error.
  fn consume_character(&mut self, expected: char) -> Result<char> {
    match self.char_at(0) {
      Some(actual) => {
        if actual == expected {
          self.position += 1;
          Ok(actual)
        } else {
          Err(expected_character(expected, actual))
        }
      }
      None => Err(unexpected_eof()),
    }
  }

  /// Consumes a single character from input when is one from expected
  /// characters. Reports an error when the current character is not on the list.
  fn consume_characters(&mut self, expected: &[char]) -> Result<char> {
    match self.char_at(0) {
      Some(actual) => {
        if expected.contains(&actual) {
          self.position += 1;
          Ok(actual)
        } else {
          Err(expected_characters(expected, actual))
        }
      }
      None => Err(unexpected_eof()),
    }
  }

  /// Consumes Unicode literal in one of the following forms:
  /// - \u0000 ('\' + 'u' + four hexadecimal characters), or
  /// - \U000000 ('\' + 'U' + six hexadecimal characters).
  fn consume_unicode_literal(&mut self) -> Result<u64> {
    self.consume_character('\\')?;
    let u = self.consume_characters(&['u', 'U'])?;
    let mut value = 0_u64;
    if u == 'U' {
      value = 1048576 * self.consume_hex_digit()?;
      value += 65536 * self.consume_hex_digit()?;
    }
    value += 4096 * self.consume_hex_digit()?;
    value += 256 * self.consume_hex_digit()?;
    value += 16 * self.consume_hex_digit()?;
    value += self.consume_hex_digit()?;
    Ok(value)
  }

  /// Consumes the UTF-8 encoded character given in one of the following forms:
  /// - \u0000 ('\' + 'u' + four hexadecimal characters), or
  /// - \U000000 ('\' + 'U' + six hexadecimal characters).
  fn consume_unicode(&mut self) -> Result<char> {
    let mut value = self.consume_unicode_literal()?;
    match value {
      // one byte UTF-8 value
      0x0000..=0x007F => {
        let b1 = (value & 0x7F) as u8;
        if let Ok(s) = String::from_utf8(vec![b1]) {
          return Ok(s.chars().next().unwrap());
        }
      }
      // two bytes UTF-8 value
      0x0080..=0x07FF => {
        let b2 = ((value & 0x3F) as u8) | 0x80;
        value >>= 6;
        let b1 = ((value & 0x1F) as u8) | 0xC0;
        if let Ok(s) = String::from_utf8(vec![b1, b2]) {
          return Ok(s.chars().next().unwrap());
        }
      }
      // three bytes UTF-8 value
      0x0800..=0xD7FF | 0xE000..=0xFFFF => {
        let b3 = ((value & 0x3F) as u8) | 0x80;
        value >>= 6;
        let b2 = ((value & 0x3F) as u8) | 0x80;
        value >>= 6;
        let b1 = ((value & 0xF) as u8) | 0xE0;
        if let Ok(s) = String::from_utf8(vec![b1, b2, b3]) {
          return Ok(s.chars().next().unwrap());
        }
      }
      // four bytes UTF-8 value
      0x10000..=0x10FFFF => {
        let b4 = ((value & 0x3F) as u8) | 0x80;
        value >>= 6;
        let b3 = ((value & 0x3F) as u8) | 0x80;
        value >>= 6;
        let b2 = ((value & 0x3F) as u8) | 0x80;
        value >>= 6;
        let b1 = ((value & 0x7) as u8) | 0xF0;
        if let Ok(s) = String::from_utf8(vec![b1, b2, b3, b4]) {
          return Ok(s.chars().next().unwrap());
        }
      }
      // value is the high surrogate of UTF-16
      0xD800..=0xDBFF => {
        let low_surrogate = self.consume_unicode_literal()?;
        match low_surrogate {
          0xDC00..=0xDFFF => {
            let mut code_point = 0x10000 + ((value - 0xD800) * 0x400) + (low_surrogate - 0xDC00);
            let b4 = ((code_point & 0xFF) as u8) | 0x80;
            code_point >>= 6;
            let b3 = ((code_point & 0x3F) as u8) | 0x80;
            code_point >>= 6;
            let b2 = ((code_point & 0x3F) as u8) | 0x80;
            code_point >>= 6;
            let b1 = ((code_point & 0x7) as u8) | 0xF0;
            if let Ok(s) = String::from_utf8(vec![b1, b2, b3, b4]) {
              return Ok(s.chars().next().unwrap());
            }
          }
          _ => return Err(unicode_surrogate_out_of_range(value)),
        }
      }
      _ => return Err(unicode_value_out_of_range(value)),
    }
    Err(unicode_conversion_failed(value))
  }

  /// Consumes expected characters from input or reports an error.
  fn consume_chars(&mut self, expected: &[char]) -> Result<String> {
    let mut consumed_string = String::new();
    for ch in expected {
      consumed_string.push(self.consume_character(*ch)?);
    }
    Ok(consumed_string)
  }

  /// Checks if the next value on input is whitespace character.
  fn is_next_whitespace(&self) -> bool {
    if let Some(ch) = self.char_at(1) {
      is_whitespace(ch)
    } else {
      false
    }
  }

  /// Checks if the next character is the name part character.
  fn is_next_name_part_char(&self) -> bool {
    if let Some(ch) = self.char_at(1) {
      is_name_part_char(ch)
    } else {
      false
    }
  }

  /// Returns **true* when the next character on input is the additional name symbol.
  fn is_next_additional_name_symbol(&self) -> bool {
    if let Some(ch) = self.char_at(1) {
      is_additional_name_symbol(ch)
    } else {
      false
    }
  }

  /// Returns the character at the current position advanced with specified offset.
  fn char_at(&self, offset: usize) -> Option<char> {
    if self.position + offset < self.input.len() {
      Some(self.input[self.position + offset])
    } else {
      None
    }
  }

  /// Checks if the next character on input is a decimal separator '.'.
  /// The dot is treated as decimal separator only if it is followed by a digit.
  fn is_char_at(&self, offset: usize, expected: char) -> bool {
    if let Some(actual) = self.char_at(offset) {
      actual == expected
    } else {
      false
    }
  }

  /// Returns **true** when the character at the current
  /// position advanced by the offset is a digit.
  fn is_digit_at(&self, offset: usize) -> bool {
    if let Some(ch) = self.char_at(offset) {
      is_digit(ch)
    } else {
      false
    }
  }
}

/// Returns **true** when the specified character is an ASCII digit.
fn is_digit(ch: char) -> bool {
  ch.is_ascii_digit()
}

/// Returns **true** when the specified character is a hexadecimal digit.
fn is_hex_digit(ch: char) -> bool {
  ch.is_digit(16)
}

/// Returns `true` when the specified character is a separator (white space equivalent).
fn is_separator(ch: char) -> bool {
  matches!(ch, WS | '=' | '!' | '<' | '>' | '+' | '-' | '*' | '/' | '%' | '.' | ',' | ')' | '[' | ']' | '}')
}

/// Returns `true` when the specified character is a context keyword separator,
/// that is a character that is allowed after keyword `context`.
fn is_context_separator(ch: char) -> bool {
  matches!(ch, WS | '<')
}

/// Returns `true` when the specified character is a function keyword separator,
/// that is a character that is allowed after keyword `function`.
fn is_function_separator(ch: char) -> bool {
  matches!(ch, WS | '(' | '<')
}

/// Returns `true` when the specified character is a range keyword separator,
/// that is a character that is allowed after keyword `range`.
fn is_range_separator(ch: char) -> bool {
  matches!(ch, WS | '<')
}

/// Returns `true` when the specified character is a keyword `not` separator,
/// that is a character that is allowed after keyword `not` at the beginning of unary tests.
fn is_keyword_not_separator(ch: char) -> bool {
  matches!(ch, WS | '(')
}

/// Returns `true` when the specified character is an additional name symbol.
/// Specification: 10.3.1.2 Grammar rule, p.120, grammar rule 30.
fn is_additional_name_symbol(ch: char) -> bool {
  matches!(ch, '.' | '/' | '-' | '\'' | '+' | '*')
}

/// Returns `true` when the specified character is name start character.
/// Specification: 10.3.1.2 Grammar rules, p.120, grammar rule 28.
fn is_name_start_char(ch: char) -> bool {
  matches!(ch, '?' | 'A'..='Z' | '_' | 'a'..='z' |
               '\u{00C0}'..='\u{00D6}' | '\u{00D8}'..='\u{00F6}' | '\u{00F8}'..='\u{02FF}' |
               '\u{0370}'..='\u{037D}' | '\u{037F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' |
               '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' |
               '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}')
}

/// Returns `true` when the specified character is name start character.
/// Specification: 10.3.1.2 Grammar rules, p.120, grammar rule 29.
fn is_name_part_char(ch: char) -> bool {
  is_name_start_char(ch) || is_digit(ch) || matches!(ch, '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}')
}

/// Returns `true` when the specified character is a whitespace character.
fn is_whitespace(ch: char) -> bool {
  is_vertical_space(ch)
    || matches!(
      ch,
      '\u{0009}' | '\u{0020}' | '\u{0085}' | '\u{00A0}' | '\u{1680}' | '\u{180E}' | '\u{2000}'
        ..='\u{200B}' | '\u{2028}' | '\u{2029}' | '\u{202F}' | '\u{205F}' | '\u{3000}' | '\u{FEFF}'
    )
}

/// Returns `true` when the specified character is a vertical space.
/// Specification: 10.3.1.2 Grammar rules, p.120, grammar rule 62.
fn is_vertical_space(ch: char) -> bool {
  matches!(ch, '\u{000A}'..='\u{000D}')
}

/// Returns `true` when the specified character is a whitespace character or left parenthesis.
// fn is_whitespace_or_left_paren(ch: char) -> bool {
//   is_whitespace(ch) || ch == '('
// }

/// Return the decimal value that corresponds to hexadecimal digit, case insensitive.
fn hex_to_decimal(ch: char) -> u64 {
  match ch {
    '0' => 0,
    '1' => 1,
    '2' => 2,
    '3' => 3,
    '4' => 4,
    '5' => 5,
    '6' => 6,
    '7' => 7,
    '8' => 8,
    '9' => 9,
    'A' | 'a' => 10,
    'B' | 'b' => 11,
    'C' | 'c' => 12,
    'D' | 'd' => 13,
    'E' | 'e' => 14,
    'F' | 'f' => 15,
    _ => 0,
  }
}

/// Joins the parts of the name with single whitespace separator. Every part is trimmed
/// from additional whitespaces at both sides. The final string is also trimmed.
/// After trimming, spaces around additional characters (`.`,`/`,`-`,`'`,`+`,`*`) are removed.
fn flatten_name_parts(parts: &[String]) -> String {
  //TODO remove this function, this should not be needed, converting Name to string should suffice
  parts
    .iter()
    .map(|s| s.trim().to_string())
    .collect::<Vec<String>>()
    .join(" ")
    .trim()
    .to_string()
    .replace(" . ", ".")
    .replace(" / ", "/")
    .replace(" - ", "-")
    .replace(" ' ", "'")
    .replace(" + ", "+")
    .replace(" * ", "*")
}

/// Definitions of errors raised by the lexer.
pub mod errors {
  use dmntk_common::DmntkError;

  /// Lexer errors.
  #[derive(Debug, PartialEq)]
  enum LexerError {
    // LexerNoMoreTokens,
    UnexpectedEof,
    // LexerUnexpectedCharacterAtPosition(char, usize),
    ExpectedCharacter(char, char),
    ExpectedCharacters(Vec<char>, char),
    ExpectedHexDigit(char),
    // NotBuiltInFunctionName(String),
    // NotBuiltInDateAndTimeFunctionName(String),
    // NotBuiltInTypeName(String),
    // NameNotFoundInContext(String),
    UnicodeValueOutOfRange(u64),
    UnicodeSurrogateOutOfRange(u64),
    UnicodeConversionFailed(u64),
  }

  impl From<LexerError> for DmntkError {
    fn from(e: LexerError) -> Self {
      DmntkError::new("LexerError", &format!("{}", e))
    }
  }

  impl std::fmt::Display for LexerError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
      match self {
        // LexerError::LexerNoMoreTokens => {
        //   write!(f, "no more tokens on input")
        // }
        LexerError::UnexpectedEof => {
          write!(f, "unexpected end of file")
        }
        // LexerError::LexerUnexpectedCharacterAtPosition(ch, pos) => {
        //   write!(f, "unexpected character: '{}' at position {}", ch, pos)
        // }
        LexerError::ExpectedCharacter(expected, actual) => {
          write!(f, "expected '{}' character but encountered '{}'", expected, actual)
        }
        LexerError::ExpectedCharacters(expected, actual) => {
          write!(f, "expected '{:?}' characters but encountered '{}'", expected, actual)
        }
        LexerError::ExpectedHexDigit(actual) => {
          write!(f, "expected hex digit but encountered '{}'", actual)
        }
        // LexerError::NotBuiltInFunctionName(name) => {
        //   write!(f, "encountered name is not built-in function name: {}", name)
        // }
        // LexerError::NotBuiltInDateAndTimeFunctionName(name) => {
        //   write!(f, "encountered name is not built-in date and time function name: {}", name)
        // }
        // LexerError::NotBuiltInTypeName(name) => {
        //   write!(f, "encountered name is not built-in type name: {}", name)
        // }
        // LexerError::NameNotFoundInContext(name) => {
        //   write!(f, "encountered name was not found in context: {}", name)
        // }
        LexerError::UnicodeValueOutOfRange(value) => {
          write!(f, "Unicode value is out of allowed range 0x0000..0x10FFFF : {:X}", value)
        }
        LexerError::UnicodeSurrogateOutOfRange(value) => {
          write!(f, "UTF-16 surrogate value is out of allowed range 0xD800..0xDFFF : {:X}", value)
        }
        LexerError::UnicodeConversionFailed(value) => {
          write!(f, "conversion of the value {:X} to Unicode character has failed.", value)
        }
      }
    }
  }

  pub fn unexpected_eof() -> DmntkError {
    LexerError::UnexpectedEof.into()
  }

  // pub fn no_more_tokens() -> DmntkError {
  //   LexerError::LexerNoMoreTokens.into()
  // }

  // pub fn name_not_found_in_context(name: &str) -> DmntkError {
  //   LexerError::NameNotFoundInContext(name.to_string()).into()
  // }

  // pub fn not_built_in_type_name(name: &str) -> DmntkError {
  //   LexerError::NotBuiltInTypeName(name.to_string()).into()
  // }

  // pub fn not_built_in_function_name(name: &str) -> DmntkError {
  //   LexerError::NotBuiltInFunctionName(name.to_string()).into()
  // }

  // pub fn not_built_in_date_time_function_name(name: &str) -> DmntkError {
  //   LexerError::NotBuiltInDateAndTimeFunctionName(name.to_string()).into()
  // }

  pub fn expected_character(expected: char, actual: char) -> DmntkError {
    LexerError::ExpectedCharacter(expected, actual).into()
  }

  pub fn expected_characters(expected: &[char], actual: char) -> DmntkError {
    LexerError::ExpectedCharacters(expected.to_vec(), actual).into()
  }

  pub fn expected_hex_digit(ch: char) -> DmntkError {
    LexerError::ExpectedHexDigit(ch).into()
  }

  pub fn unicode_value_out_of_range(value: u64) -> DmntkError {
    LexerError::UnicodeValueOutOfRange(value).into()
  }

  pub fn unicode_surrogate_out_of_range(value: u64) -> DmntkError {
    LexerError::UnicodeSurrogateOutOfRange(value).into()
  }

  pub fn unicode_conversion_failed(value: u64) -> DmntkError {
    LexerError::UnicodeConversionFailed(value).into()
  }
}

#[cfg(test)]
mod tests {
  use super::{flatten_name_parts, is_additional_name_symbol, is_separator};

  #[test]
  fn test_is_separator() {
    assert!(is_separator(' '));
    assert!(is_separator('='));
    assert!(is_separator('!'));
    assert!(is_separator('<'));
    assert!(is_separator('>'));
    assert!(is_separator('+'));
    assert!(is_separator('-'));
    assert!(is_separator('*'));
    assert!(is_separator('/'));
    assert!(is_separator('%'));
    assert!(is_separator('.'));
    assert!(is_separator(','));
    assert!(is_separator(')'));
    assert!(is_separator('['));
    assert!(is_separator(']'));
    assert!(is_separator('}'));
    assert!(!is_separator('('));
    assert!(!is_separator('{'));
    assert!(!is_separator('?'));
  }

  #[test]
  fn test_is_additional_name_symbol() {
    assert!(is_additional_name_symbol('.'));
    assert!(is_additional_name_symbol('/'));
    assert!(is_additional_name_symbol('-'));
    assert!(is_additional_name_symbol('\''));
    assert!(is_additional_name_symbol('+'));
    assert!(is_additional_name_symbol('*'));
    assert!(!is_additional_name_symbol(' '));
    assert!(!is_additional_name_symbol('$'));
    assert!(!is_additional_name_symbol(':'));
    assert!(!is_additional_name_symbol('('));
    assert!(!is_additional_name_symbol(')'));
    assert!(!is_additional_name_symbol('['));
    assert!(!is_additional_name_symbol(']'));
    assert!(!is_additional_name_symbol('{'));
    assert!(!is_additional_name_symbol('}'));
    assert!(!is_additional_name_symbol('?'));
  }

  #[test]
  fn test_flatten_name_parts() {
    assert_eq!(
      "left middle right",
      flatten_name_parts(&["left".to_string(), "middle".to_string(), "right".to_string()])
    );
    assert_eq!("a.b", flatten_name_parts(&["   a  ".to_string(), " . ".to_string(), "  b ".to_string()]));
    assert_eq!("a/b", flatten_name_parts(&["   a  ".to_string(), " / ".to_string(), "  b ".to_string()]));
    assert_eq!("a-b", flatten_name_parts(&["   a  ".to_string(), " - ".to_string(), "  b ".to_string()]));
    assert_eq!("a'b", flatten_name_parts(&["   a  ".to_string(), " ' ".to_string(), "  b ".to_string()]));
    assert_eq!("a+b", flatten_name_parts(&["   a  ".to_string(), " + ".to_string(), "  b ".to_string()]));
    assert_eq!("a*b", flatten_name_parts(&["   a  ".to_string(), " * ".to_string(), "  b ".to_string()]));
    assert_eq!("", flatten_name_parts(&["".to_string(), "".to_string(), "".to_string()]));
    assert_eq!("", flatten_name_parts(&[]));
  }
}
