use crate::documents::HtmlDocument;
use std::io::Error;
use std::thread::sleep;
use std::time::Duration;

use crate::clients::fetch_html_document;

pub struct WaxWorker {}

// All methods within this impl must be async
impl WaxWorker {


    /// press_urls : harvests domain related urls for a blind crawl
    /// origin : is the url of the site you want to crawl
    /// link limit is the amount of links a user wants to grab
    /// page limit is the amount of pages willing to crawl to get the links
    /// Returns an option
    /// uses HTTP
    pub async fn press_urls(
        origin: &str,
        link_limit: usize,
        page_limit: usize,
    ) -> Result<Vec<String>, Error>  {
        let mut links: Vec<String> = vec![origin.to_string()];

        let mut indexer: usize = 0;

        loop {
            let document = match fetch_html_document(&links[indexer]).await {
                Ok(s) => s,
                Err(_) => {
                    println!("nope");
                    continue;
                }
            };

            let domain_anchors = document.domain_anchors().unwrap();

            indexer += 1;

            for anchor in domain_anchors {
                if links.contains(&anchor) {
                    continue;
                } else {
                    links.push(anchor)
                }
            }

            if links.len() > link_limit || indexer >= page_limit || indexer >= links.len() {
                break;
            }

            sleep(Duration::from_millis(300));
        }

        if links.len() > link_limit {
            links = links[..link_limit].to_vec();
        } else {
            links = links.to_vec();
        }

        Ok(links)
    }



    /// press_curated_urls : harvests domain related urls for a blind crawl
    /// origin : is the url of the site you want to crawl
    /// link limit is the amount of links a user wants to grab
    /// page limit is the amount of pages willing to crawl to get the links
    /// Returns an option
    /// uses HTTP. 
    /// Only will crawl anchors that match the regex.
    pub async fn press_curated_urls(
        origin: &str,
        pattern: &str,
        link_limit: usize,
        page_limit: usize,
    ) -> Result<Vec<String>, Error> {
        let mut links: Vec<String> = vec![origin.to_string()];

        let mut curated_links: Vec<String> = vec![];
        
        let mut indexer: usize = 0;

        loop {
            let document = match fetch_html_document(&links[indexer]).await {
                Ok(s) => s,
                Err(_) => {
                    println!("nope");
                    continue;
                }
            };


            let curated_anchors = document.anchors_curate(pattern).unwrap();
            indexer += 1;

            
            for anchor in curated_anchors {
                if links.contains(&anchor) {
                    continue;
                } else {
                    links.push(anchor.clone());
                    curated_links.push(anchor);
                }
            }

            if links.len() > link_limit || indexer >= page_limit || indexer >= links.len() {
                break;
            }
        }

        Ok(curated_links)
    }



    /// public function
    /// intakes the string to do a blind press or crawl
    /// returns a result of Ok if good and std::io::Error
    /// meant to just crawl a site and get html document records to a blind depth
    /// uses HTTP
    pub async fn press_documents_blind(
        origin: &str,
        page_limit: usize,
    ) -> Result<Vec<HtmlDocument>, Error> {
        /*let links  = harvest_urls(origin, 20, 4).await.unwrap();
        let mut html_documents:Vec<HtmlDocument> = vec![];

        for link in links {
            let document = get_html_document(&link).await.unwrap();
            html_documents.push(document);
        }*/
        let mut links: Vec<String> = vec![origin.to_string()];
        let mut documents_vector: Vec<HtmlDocument> = vec![];
        let mut indexer: usize = 0;

        loop {
            let document = match fetch_html_document(&links[indexer]).await {
                Ok(s) => s,
                Err(_) => {
                    println!("nope");
                    continue;
                }
            };

            let domain_anchors = document.domain_anchors().unwrap();

            documents_vector.push(document);

            indexer += 1;

            for anchor in domain_anchors {
                if links.contains(&anchor) {
                    continue;
                } else {
                    links.push(anchor)
                }
            }

            if indexer >= page_limit || indexer >= links.len() {
                break;
            }
        }

        Ok(documents_vector)
    }



    ///Public function
    ///takes in a vector of strings that need to be harvested
    ///outputs result Ok(Vec<HtmlDocument>)
    /// uses HTTP
    pub async fn press_documents(links: Vec<&str>) -> Result<Vec<HtmlDocument>, Error> {
        let mut html_documents: Vec<HtmlDocument> = vec![];

        for link in links {
            let document = fetch_html_document(&link).await.unwrap();
            html_documents.push(document);
        }

        Ok(html_documents)
    }



    ///Public function
    /// takes in a string reference to a url to be harvested
    ///outputs an empty ok and result
    /// uses HTTP
    pub async fn press_document(link: &str) -> Result<HtmlDocument, Error> {
        let document = fetch_html_document(&link).await.unwrap();
        Ok(document)
    }



    /// public function
    /// intakes the string to do a curated page harvest
    /// returns a result of Ok if good and std::io::Error
    /// meant to just crawl a site and get html document records to a full depth.
    /// default crawl size is 100 max @ a time.
    pub async fn press_curated_documents(
        origin: &str,
        pattern: &str,
        limit : usize
    ) -> Result<Vec<HtmlDocument>, std::io::Error> {
        let mut links: Vec<String> = vec![origin.to_string()];
        let mut indexer: usize = 0;
        let mut document_vec: Vec<HtmlDocument> = vec![];

        loop {
            let document = match fetch_html_document(&links[indexer]).await {
                Ok(s) => s,
                Err(_) => {
                    println!("nope");
                    continue;
                }
            };

            let curated_anchors = document.anchors_curate(pattern).unwrap();

            document_vec.push(document);

            indexer += 1;

            for anchor in curated_anchors {
                if links.contains(&anchor) {
                    continue;
                } else {
                    links.push(anchor)
                }
            }

            if indexer >= links.len() || indexer >= limit {
                break;
            }
        }

        Ok(document_vec)
    }
}
