/*
 * Decompiled with CFR 0.152.
 */
package org.osjava.scraping;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.methods.GetMethod;
import org.osjava.norbert.NoRobotClient;
import org.osjava.scraping.Config;
import org.osjava.scraping.Fetcher;
import org.osjava.scraping.FetchingException;
import org.osjava.scraping.MemoryPage;
import org.osjava.scraping.Page;
import org.osjava.scraping.Session;

public abstract class AbstractHttpFetcher
implements Fetcher {
    public abstract int getDefaultPort();

    protected abstract void startSession(URL var1, int var2, HttpClient var3, Config var4, Session var5);

    public Page fetch(String uri, Config cfg, Session session) throws FetchingException {
        try {
            String path;
            int idx;
            int result;
            URL url = new URL(uri);
            if (!cfg.has("norobots.override") && this.checkIllegal(url)) {
                throw new FetchingException("Not allowed to fetch url: " + uri + " due to the NoRobots RFQ. ");
            }
            HttpClient client = new HttpClient();
            GetMethod get = new GetMethod(url.getFile());
            int port = url.getPort();
            if (port == -1) {
                port = this.getDefaultPort();
            }
            this.startSession(url, port, client, cfg, session);
            if (cfg.has("timeout")) {
                client.setTimeout(cfg.getInt("timeout"));
            }
            if ((result = client.executeMethod((HttpMethod)get)) != 200) {
                throw new FetchingException("Unable to fetch from " + uri + " due to error code " + result);
            }
            Header hdr = get.getResponseHeader("Content-Type");
            String type = "unknown";
            if (hdr != null) {
                type = hdr.toExternalForm();
                if (!(type = type.toLowerCase()).startsWith("content-type: text") && !type.startsWith("content-type: plain")) {
                    throw new FetchingException("Not going to fetch a non-text file. Type is: " + type);
                }
            }
            String txt = get.getResponseBodyAsString();
            get.releaseConnection();
            MemoryPage page = new MemoryPage(txt, type);
            String base = url.getProtocol() + "://" + url.getHost();
            if (url.getPort() != -1) {
                base = base + ":" + url.getPort();
            }
            if ((idx = (path = url.getPath()).lastIndexOf("/")) != -1) {
                base = base + path.substring(0, idx);
            }
            page.setDocumentBase(base);
            return page;
        }
        catch (IOException ioe) {
            throw new FetchingException("Error. " + ioe.getMessage(), ioe);
        }
    }

    private boolean checkIllegal(URL url) throws MalformedURLException {
        NoRobotClient nrc = new NoRobotClient("osjava-scraping-engine");
        nrc.parse(this.toBase(url));
        return !nrc.isUrlAllowed(url);
    }

    private URL toBase(URL url) throws MalformedURLException {
        return new URL(url.getProtocol() + "://" + url.getHost() + (url.getPort() == -1 ? "" : ":" + url.getPort()) + "/");
    }
}

