package edu.uci.ics.crawler4j.fetcher;

import edu.uci.ics.crawler4j.crawler.Configurable;
import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
import edu.uci.ics.crawler4j.url.WebURL;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.zip.GZIPInputStream;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
import org.apache.http.HttpEntity;
import org.apache.http.HttpException;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.HttpResponseInterceptor;
import org.apache.http.HttpVersion;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.scheme.PlainSocketFactory;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.scheme.SchemeSocketFactory;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.entity.HttpEntityWrapper;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpProtocolParamBean;
import org.apache.http.protocol.HttpContext;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/uci/ics/crawler4j/fetcher/PageFetcher.class */
public class PageFetcher extends Configurable {
    protected static final Logger logger = Logger.getLogger(PageFetcher.class);
    protected ThreadSafeClientConnManager connectionManager;
    protected DefaultHttpClient httpClient;
    protected final Object mutex;
    protected long lastFetchTime;
    protected IdleConnectionMonitorThread connectionMonitorThread;

    /* loaded from: input_file:edu/uci/ics/crawler4j/fetcher/PageFetcher$GzipDecompressingEntity.class */
    private static class GzipDecompressingEntity extends HttpEntityWrapper {
        public GzipDecompressingEntity(HttpEntity httpEntity) {
            super(httpEntity);
        }

        @Override // org.apache.http.entity.HttpEntityWrapper, org.apache.http.HttpEntity
        public InputStream getContent() throws IOException, IllegalStateException {
            return new GZIPInputStream(this.wrappedEntity.getContent());
        }

        @Override // org.apache.http.entity.HttpEntityWrapper, org.apache.http.HttpEntity
        public long getContentLength() {
            return -1L;
        }
    }

    public PageFetcher(CrawlConfig crawlConfig) {
        super(crawlConfig);
        this.mutex = new Object();
        this.lastFetchTime = 0L;
        this.connectionMonitorThread = null;
        BasicHttpParams basicHttpParams = new BasicHttpParams();
        HttpProtocolParamBean httpProtocolParamBean = new HttpProtocolParamBean(basicHttpParams);
        httpProtocolParamBean.setVersion(HttpVersion.HTTP_1_1);
        httpProtocolParamBean.setContentCharset("UTF-8");
        httpProtocolParamBean.setUseExpectContinue(false);
        basicHttpParams.setParameter("http.useragent", crawlConfig.getUserAgentString());
        basicHttpParams.setIntParameter("http.socket.timeout", crawlConfig.getSocketTimeout());
        basicHttpParams.setIntParameter("http.connection.timeout", crawlConfig.getConnectionTimeout());
        basicHttpParams.setBooleanParameter("http.protocol.handle-redirects", false);
        SchemeRegistry schemeRegistry = new SchemeRegistry();
        schemeRegistry.register(new Scheme("http", 80, (SchemeSocketFactory) PlainSocketFactory.getSocketFactory()));
        if (crawlConfig.isIncludeHttpsPages()) {
            schemeRegistry.register(new Scheme("https", 443, (SchemeSocketFactory) SSLSocketFactory.getSocketFactory()));
        }
        this.connectionManager = new ThreadSafeClientConnManager(schemeRegistry);
        this.connectionManager.setMaxTotal(crawlConfig.getMaxTotalConnections());
        this.connectionManager.setDefaultMaxPerRoute(crawlConfig.getMaxConnectionsPerHost());
        logger.setLevel(Level.INFO);
        this.httpClient = new DefaultHttpClient(this.connectionManager, basicHttpParams);
        if (crawlConfig.getProxyHost() != null) {
            if (crawlConfig.getProxyUsername() != null) {
                this.httpClient.getCredentialsProvider().setCredentials(new AuthScope(crawlConfig.getProxyHost(), crawlConfig.getProxyPort()), new UsernamePasswordCredentials(crawlConfig.getProxyUsername(), crawlConfig.getProxyPassword()));
            }
            this.httpClient.getParams().setParameter("http.route.default-proxy", new HttpHost(crawlConfig.getProxyHost(), crawlConfig.getProxyPort()));
        }
        this.httpClient.addResponseInterceptor(new HttpResponseInterceptor() { // from class: edu.uci.ics.crawler4j.fetcher.PageFetcher.1
            @Override // org.apache.http.HttpResponseInterceptor
            public void process(HttpResponse httpResponse, HttpContext httpContext) throws HttpException, IOException {
                Header contentEncoding = httpResponse.getEntity().getContentEncoding();
                if (contentEncoding != null) {
                    for (HeaderElement headerElement : contentEncoding.getElements()) {
                        if (headerElement.getName().equalsIgnoreCase("gzip")) {
                            httpResponse.setEntity(new GzipDecompressingEntity(httpResponse.getEntity()));
                            return;
                        }
                    }
                }
            }
        });
        if (this.connectionMonitorThread == null) {
            this.connectionMonitorThread = new IdleConnectionMonitorThread(this.connectionManager);
        }
        this.connectionMonitorThread.start();
    }

    public PageFetchResult fetchHeader(WebURL webURL) {
        HttpGet httpGet;
        HttpResponse execute;
        int statusCode;
        PageFetchResult pageFetchResult = new PageFetchResult();
        String url = webURL.getURL();
        HttpGet httpGet2 = null;
        try {
            try {
                try {
                    httpGet = new HttpGet(url);
                    synchronized (this.mutex) {
                        long time = new Date().getTime();
                        if (time - this.lastFetchTime < this.config.getPolitenessDelay()) {
                            Thread.sleep(this.config.getPolitenessDelay() - (time - this.lastFetchTime));
                        }
                        this.lastFetchTime = new Date().getTime();
                    }
                    httpGet.addHeader("Accept-Encoding", "gzip");
                    execute = this.httpClient.execute(httpGet);
                    pageFetchResult.setEntity(execute.getEntity());
                    statusCode = execute.getStatusLine().getStatusCode();
                } catch (Throwable th) {
                    try {
                        if (pageFetchResult.getEntity() == null && 0 != 0) {
                            httpGet2.abort();
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                    throw th;
                }
            } catch (Exception e2) {
                if (e2.getMessage() == null) {
                    logger.error("Error while fetching " + webURL.getURL());
                } else {
                    logger.error(e2.getMessage() + " while fetching " + webURL.getURL());
                }
                try {
                    if (pageFetchResult.getEntity() == null && 0 != 0) {
                        httpGet2.abort();
                    }
                } catch (Exception e3) {
                    e3.printStackTrace();
                }
            }
        } catch (IOException e4) {
            logger.error("Fatal transport error: " + e4.getMessage() + " while fetching " + url + " (link found in doc #" + webURL.getParentDocid() + ")");
            pageFetchResult.setStatusCode(CustomFetchStatus.FatalTransportError);
            try {
                if (pageFetchResult.getEntity() == null && 0 != 0) {
                    httpGet2.abort();
                }
            } catch (Exception e5) {
                e5.printStackTrace();
            }
            return pageFetchResult;
        } catch (IllegalStateException e6) {
            try {
                if (pageFetchResult.getEntity() == null && 0 != 0) {
                    httpGet2.abort();
                }
            } catch (Exception e7) {
                e7.printStackTrace();
            }
        }
        if (statusCode != 200) {
            if (statusCode != 404) {
                if (statusCode == 301 || statusCode == 302) {
                    Header firstHeader = execute.getFirstHeader("Location");
                    if (firstHeader != null) {
                        pageFetchResult.setMovedToUrl(URLCanonicalizer.getCanonicalURL(firstHeader.getValue(), url));
                    }
                    pageFetchResult.setStatusCode(statusCode);
                    try {
                        if (pageFetchResult.getEntity() == null && httpGet != null) {
                            httpGet.abort();
                        }
                    } catch (Exception e8) {
                        e8.printStackTrace();
                    }
                    return pageFetchResult;
                }
                logger.info("Failed: " + execute.getStatusLine().toString() + ", while fetching " + url);
            }
            pageFetchResult.setStatusCode(execute.getStatusLine().getStatusCode());
            try {
                if (pageFetchResult.getEntity() == null && httpGet != null) {
                    httpGet.abort();
                }
            } catch (Exception e9) {
                e9.printStackTrace();
            }
            return pageFetchResult;
        }
        pageFetchResult.setFetchedUrl(url);
        String uri = httpGet.getURI().toString();
        if (!uri.equals(url) && !URLCanonicalizer.getCanonicalURL(uri).equals(url)) {
            pageFetchResult.setFetchedUrl(uri);
        }
        if (pageFetchResult.getEntity() == null) {
            httpGet.abort();
            try {
                if (pageFetchResult.getEntity() == null && httpGet != null) {
                    httpGet.abort();
                }
            } catch (Exception e10) {
                e10.printStackTrace();
            }
            pageFetchResult.setStatusCode(CustomFetchStatus.UnknownError);
            return pageFetchResult;
        }
        long contentLength = pageFetchResult.getEntity().getContentLength();
        if (contentLength == -1) {
            Header lastHeader = execute.getLastHeader("Content-Length");
            if (lastHeader == null) {
                lastHeader = execute.getLastHeader("Content-length");
            }
            contentLength = lastHeader != null ? Integer.parseInt(lastHeader.getValue()) : -1L;
        }
        if (contentLength > this.config.getMaxDownloadSize()) {
            pageFetchResult.setStatusCode(CustomFetchStatus.PageTooBig);
            try {
                if (pageFetchResult.getEntity() == null && httpGet != null) {
                    httpGet.abort();
                }
            } catch (Exception e11) {
                e11.printStackTrace();
            }
            return pageFetchResult;
        }
        pageFetchResult.setStatusCode(200);
        try {
            if (pageFetchResult.getEntity() == null && httpGet != null) {
                httpGet.abort();
            }
        } catch (Exception e12) {
            e12.printStackTrace();
        }
        return pageFetchResult;
    }

    public synchronized void shutDown() {
        if (this.connectionMonitorThread != null) {
            this.connectionManager.shutdown();
            this.connectionMonitorThread.shutdown();
        }
    }

    public HttpClient getHttpClient() {
        return this.httpClient;
    }
}
