package com.bytegriffin.get4j.fetch;

import com.bytegriffin.get4j.conf.Seed;
import com.bytegriffin.get4j.core.Globals;
import com.bytegriffin.get4j.core.Page;
import com.bytegriffin.get4j.core.Process;
import com.bytegriffin.get4j.core.UrlQueue;
import com.bytegriffin.get4j.net.http.HttpEngine;
import com.bytegriffin.get4j.net.http.UrlAnalyzer;
import com.bytegriffin.get4j.util.DateUtil;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/* loaded from: input_file:com/bytegriffin/get4j/fetch/SiteFetcher.class */
public class SiteFetcher implements Process {
    private static final Logger logger = LogManager.getLogger(SiteFetcher.class);
    private HttpEngine http = null;

    @Override // com.bytegriffin.get4j.core.Process
    public void init(Seed seed) {
        this.http = Globals.HTTP_ENGINE_CACHE.get(seed.getSeedName());
        FetchResourceSelector.init(seed);
        logger.info("种子[" + seed.getSeedName() + "]的组件SiteFetcher的初始化完成。");
    }

    @Override // com.bytegriffin.get4j.core.Process
    public void execute(Page page) {
        Page pageContent = this.http.getPageContent(page);
        UrlAnalyzer.custom(pageContent).sniffAndSetResources();
        pageContent.setFetchTime(DateUtil.getCurrentDate());
        UrlQueue.addUnVisitedLinks(pageContent.getSeedName(), UrlAnalyzer.custom(pageContent).sniffSiteLinks());
        logger.info("线程[" + Thread.currentThread().getName() + "]抓取种子[" + pageContent.getSeedName() + "]整站Url总数是[" + UrlQueue.getUnVisitedUrlCount(pageContent.getSeedName()) + "]个。");
        logger.info("线程[" + Thread.currentThread().getName() + "]抓取种子[" + pageContent.getSeedName() + "]的url[" + pageContent.getUrl() + "]完成。");
    }
}
