use crate::records::HtmlRecord;
use std::io::Error;

use crate::clients::fetch_html_record;

/*

Wax worker

*/
pub struct HtmlPresser {}

// All methods within this impl must be async
impl HtmlPresser {
    /// press_urls : harvests domain related urls for a blind crawl
    /// origin : is the url of the site you want to crawl
    /// link limit is the amount of links a user wants to grab
    /// page limit is the amount of pages willing to crawl to get the links
    /// Returns an option
    /// uses HTTP
    pub async fn press_urls(
        origin: &str,
        link_limit: usize,
        page_limit: usize,
    ) -> Result<Vec<String>, Error> {
        let mut urls: Vec<String> = vec![origin.to_string()];
        let mut indexer: usize = 0;

        loop {
            let record = match fetch_html_record(&urls[indexer]).await {
                Ok(s) => s,
                Err(_) => {
                    println!("nope");
                    continue;
                }
            };

            let domain_anchors = record.domain_anchors();

            indexer += 1;

            for anchor in domain_anchors {
                if urls.contains(&anchor) {
                    continue;
                } else {
                    urls.push(anchor)
                }
            }

            if urls.len() > link_limit || indexer >= page_limit || indexer >= urls.len() {
                break;
            }
        }

        if urls.len() > link_limit {
            urls = urls[..link_limit].to_vec();
        } else {
            urls = urls.to_vec();
        }

        Ok(urls)
    }

    /// press_curated_urls : presses domain related urls for a blind crawl.
    /// origin : is the url of the site you want to crawl,
    /// link limit is the amount of links a user wants to grab,
    /// page limit is the amount of pages willing to crawl to get the links.
    /// Returns an option,
    /// uses HTTP.
    /// Only will crawl anschors that match the regex.
    pub async fn press_curated_urls(
        origin: &str,
        pattern: &str,
        link_limit: usize,
        page_limit: usize,
    ) -> Result<Vec<String>, Error> {
        let mut urls: Vec<String> = vec![origin.to_string()];

        let mut curated_links: Vec<String> = vec![];

        let mut indexer: usize = 0;

        loop {
            let document = match fetch_html_record(&urls[indexer]).await {
                Ok(s) => s,
                Err(_) => {
                    println!("nope");
                    continue;
                }
            };

            let curated_anchors = document.anchors_curate(pattern);
            indexer += 1;

            for anchor in curated_anchors {
                if urls.contains(&anchor) {
                    continue;
                } else {
                    urls.push(anchor.clone());
                    curated_links.push(anchor);
                }
            }

            if urls.len() > link_limit || indexer >= page_limit || indexer >= urls.len() {
                break;
            }
        }

        Ok(curated_links)
    }

    /// public function
    /// intakes the string to do a blind press or crawl
    /// returns a result of Ok if good and std::io::Error
    /// meant to just crawl a site and get html records to a blind depth
    /// uses HTTP
    pub async fn press_records_blind(origin: &str, limit: usize) -> Result<Vec<HtmlRecord>, Error> {
        let mut html_records: Vec<HtmlRecord> = vec![];
        let mut urls: Vec<String> = vec![origin.to_string()];
        let mut indexer: usize = 0;

        loop {
            let record = match fetch_html_record(&urls[indexer]).await {
                Ok(s) => s,
                Err(_) => {
                    println!("nope");
                    indexer += 1;
                    if indexer >= limit || indexer >= urls.len() {
                        break;
                    }
                    continue;
                }
            };

            let domain_anchors = record.domain_anchors();

            indexer += 1;

            for anchor in domain_anchors {
                if urls.contains(&anchor) {
                    continue;
                } else {
                    urls.push(anchor)
                }
            }
            html_records.push(record);
            if indexer >= limit || indexer >= urls.len() {
                break;
            }
        }

        Ok(html_records)
    }

    ///Public function
    ///takes in a vector of url strings that need to be harvested
    ///outputs result Ok(Vec<HtmlRecord>)
    /// uses HTTP
    pub async fn press_records(links: Vec<&str>) -> Result<Vec<HtmlRecord>, Error> {
        let mut html_records: Vec<HtmlRecord> = vec![];

        for link in links {
            let record = fetch_html_record(link).await.unwrap();
            html_records.push(record);
        }

        Ok(html_records)
    }

    ///Public function
    /// takes in a string reference to a url to be pressed
    ///outputs an empty ok and result
    /// uses HTTP
    pub async fn press_record(link: &str) -> Result<HtmlRecord, Error> {
        let record = fetch_html_record(link).await.unwrap();
        Ok(record)
    }

    /// public function
    /// intakes the string to do a curated page harvest
    /// returns a result of Ok if good and std::io::Error
    /// meant to just crawl a site and get html record records to a full depth.
    /// default crawl size is 100 max @ a time.
    pub async fn press_curated_records(
        origin: &str,
        pattern: &str,
        limit: usize,
    ) -> Result<Vec<HtmlRecord>, std::io::Error> {
        let mut links: Vec<String> = vec![origin.to_string()];
        let mut indexer: usize = 0;
        let mut record_vec: Vec<HtmlRecord> = vec![];

        loop {
            let record = match fetch_html_record(&links[indexer]).await {
                Ok(s) => s,
                Err(_) => {
                    println!("nope");
                    continue;
                }
            };

            let curated_anchors = record.anchors_curate(pattern);

            record_vec.push(record);

            indexer += 1;

            for anchor in curated_anchors {
                if links.contains(&anchor) {
                    continue;
                } else {
                    links.push(anchor)
                }
            }

            if indexer >= links.len() || indexer >= limit {
                break;
            }
        }

        Ok(record_vec)
    }
}
