// Filename: pipeline.rs
// Version:	 0.3
// Date:	 27-07-2021 (DD-MM-YYYY)
//
// Copyright (c) 2021 Kai Rese
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program. If not, see
// <https://www.gnu.org/licenses/>.

//! Configuration for pipelines, coming after the front end of fetch and decode.

use serde::{Deserialize, Serialize};

/// The properties of a CPU pipeline.
///
/// This models the complete execution back-end of a CPU, including register file access, an ALU
/// and/or AGU and access to the Load-Store-Unit.
/// As there can be many different things happening in the back-end, a pipeline is highly
/// configurable.
#[derive(Deserialize, Serialize)]
pub struct PipelineConfig {
    /// The ALU latency of each instruction class.
    execution_latencies: InstructionLatencies,
    /// The ID of the scheduler the pipeline uses, if any. Multiple pipelines with the same
    /// scheduler ID share a scheduler among them.
    scheduler_id: Option<usize>,
    /// If the pipeline can execute instructions exclusively using the general purpose register
    /// file.
    general_purpose: bool,
    /// If the pipeline can execute instructions using the vector register file.
    vector: bool,
    /// If the pipeline has can actually execute ALU instructions.
    alu: bool,
    /// If the pipeline can serve memory read operands.
    memory_load: bool,
    /// If the pipeline can serve memory write operands.
    memory_store: bool,
    /// If the pipeline can serve ALU instructions that also have a memory operand. If this is the
    /// case, there needs to be an AGU and the memory stage is set before the ALU stage.
    fused_memory_alu: bool,
    /// If the pipeline makes use of register renaming. If it doesn't, an instruction can only be
    /// issued to this pipeline if every register operand is ready and the output register doesn't
    /// have an instruction already wanting to write to it.
    renaming: bool,
}

/// Defines the amount of clock cycles each instruction type needs.
///
/// The fields of this struct are used to selectively overwrite default values. If a field is
/// `None`, the default value is used.
///
/// A value of zero means that the type isn't supported in a pipeline. By default, all types are
/// supported.
#[derive(Deserialize, Serialize)]
pub struct InstructionLatencies {
    /// Simple addition.
    pub integer_add: Option<u16>,
    /// Typically takes multiple, but few clock cycles.
    pub integer_multiply: Option<u16>,
    /// Takes many clock cycles, usually dependent on the operand size.
    pub integer_divide: Option<u16>,
    /// Combination of add- and mul-operation.
    pub integer_multiply_add: Option<u16>,
    /// Completes fast, but needs a shifter.
    pub integer_shift: Option<u16>,
    /// Might take longer than floating point multiplication.
    pub float_add: Option<u16>,
    /// Usually as fast as integer multiplication.
    pub float_multiply: Option<u16>,
    /// Takes many clock cycles, usually dependent on the operand size.
    pub float_divide: Option<u16>,
    /// Might need multiple execution ports if a design has separate add- and mul-pipes.
    pub float_multiply_add: Option<u16>,
    /// Usually completes fast, but needs a branch unit.
    pub branch: Option<u16>,
}

impl InstructionLatencies {
    /// Transforms the object into an array conforming to the [`gpcas_base::instruction_type`]
    /// definition as array indices.
    pub fn as_array(&self) -> [u16; 14] {
        [
            // register moves
            1,
            // moves
            1,
            // simple
            1,
            self.integer_add.unwrap_or(default_latencies::INT_ADD),
            self.integer_multiply.unwrap_or(default_latencies::INT_MUL),
            self.integer_divide.unwrap_or(default_latencies::INT_DIV),
            self.integer_multiply_add
                .unwrap_or(default_latencies::INT_MUL_ADD),
            self.integer_shift.unwrap_or(default_latencies::INT_SHIFT),
            self.float_add.unwrap_or(default_latencies::FLOAT_ADD),
            self.float_multiply.unwrap_or(default_latencies::FLOAT_MUL),
            self.float_divide.unwrap_or(default_latencies::FLOAT_DIV),
            self.float_multiply_add
                .unwrap_or(default_latencies::FLOAT_MUL_ADD),
            self.branch.unwrap_or(default_latencies::BRANCH),
            // jumps
            self.branch.unwrap_or(default_latencies::BRANCH),
        ]
    }
}

/// The default latency in clock cycles for all instruction types.
///
/// Types not mentioned in here either share their value with another one, or are hardcoded to one
/// clock cycle.
pub mod default_latencies {
    /// Simple addition.
    pub const INT_ADD: u16 = 1;
    /// Typically takes multiple, but few clock cycles.
    pub const INT_MUL: u16 = 3;
    /// Takes many clock cycles, usually dependent on the operand size.
    pub const INT_DIV: u16 = 22;
    /// Combination of add- and mul-operation.
    pub const INT_MUL_ADD: u16 = 3;
    /// Completes fast, but needs a shifter.
    pub const INT_SHIFT: u16 = 1;
    /// Might take longer than floating point multiplication.
    pub const FLOAT_ADD: u16 = 4;
    /// Usually as fast as integer multiplication.
    pub const FLOAT_MUL: u16 = 3;
    /// Takes many clock cycles, usually dependent on the operand size.
    pub const FLOAT_DIV: u16 = 22;
    /// Might take multiple ports if a design has separate add- and mul-pipes.
    pub const FLOAT_MUL_ADD: u16 = 5;
    /// Completes fast, but needs a branch unit.
    pub const BRANCH: u16 = 1;
}
