/*
 * Decompiled with CFR 0.152.
 */
package org.osjava.scraping;

import com.generationjava.config.Config;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.lang.StringUtils;
import org.osjava.norbert.NoRobotClient;
import org.osjava.norbert.NoRobotException;
import org.osjava.oscube.container.Session;
import org.osjava.scraping.Fetcher;
import org.osjava.scraping.FetchingException;
import org.osjava.scraping.MemoryPage;
import org.osjava.scraping.Page;

public abstract class AbstractHttpFetcher
implements Fetcher {
    private static final String SESSION_CACHE_CODE = "HTTPCLIENT";

    public abstract int getDefaultPort();

    protected abstract void startSession(URL var1, int var2, HttpClient var3, Config var4, Session var5);

    public Page fetch(String uri, Config cfg, Session session) throws FetchingException {
        try {
            String path;
            int idx;
            int result;
            int port;
            List list;
            HttpClient client;
            int idx2;
            String postQuery = null;
            if (cfg.has("method") && "POST".equalsIgnoreCase("" + cfg.get("method")) && (idx2 = uri.indexOf("?")) != -1) {
                postQuery = uri.substring(idx2 + 1);
                uri = uri.substring(0, idx2);
            }
            URL url = new URL(uri);
            if (!cfg.has("norobots.override")) {
                List list2;
                String userAgent = "osjava-scraping-engine";
                if (cfg.has("header") && (list2 = cfg.getList("header")) != null) {
                    Iterator itr = list2.iterator();
                    while (itr.hasNext()) {
                        String str = (String)itr.next();
                        String header = StringUtils.substringBefore((String)str, (String)"=");
                        String value = StringUtils.substringAfter((String)str, (String)"=");
                        if (!"User-Agent".equals(header)) continue;
                        userAgent = value;
                    }
                }
                if (this.checkIllegal(url, userAgent)) {
                    throw new FetchingException("Not allowed to fetch url: " + uri + " due to the NoRobots RFQ. ");
                }
            }
            if ((client = (HttpClient)session.get(SESSION_CACHE_CODE)) == null) {
                client = new HttpClient();
                session.put(SESSION_CACHE_CODE, (Object)client);
            }
            GetMethod method = null;
            if (postQuery != null) {
                PostMethod post = new PostMethod(url.getFile());
                String[] elements = StringUtils.split((String)postQuery, (String)"&");
                for (int i = 0; i < elements.length; ++i) {
                    String[] keyValue = StringUtils.split((String)elements[i], (String)"=");
                    if (keyValue.length == 2) {
                        post.addParameter(keyValue[0], keyValue[1]);
                        continue;
                    }
                    System.err.println("Bad post pair: " + elements[i]);
                }
                method = post;
            } else {
                method = new GetMethod(url.getFile());
            }
            if (cfg.has("header") && (list = cfg.getList("header")) != null) {
                Iterator itr = list.iterator();
                while (itr.hasNext()) {
                    String str = (String)itr.next();
                    String header = StringUtils.substringBefore((String)str, (String)"=");
                    String value = StringUtils.substringAfter((String)str, (String)"=");
                    method.addRequestHeader(header, value);
                }
            }
            if ((port = url.getPort()) == -1) {
                port = this.getDefaultPort();
            }
            this.startSession(url, port, client, cfg, session);
            if (cfg.has("timeout")) {
                client.setTimeout(cfg.getInt("timeout"));
            }
            if ((result = client.executeMethod((HttpMethod)method)) != 200) {
                throw new FetchingException("Unable to fetch from " + uri + " due to error code " + result);
            }
            Header hdr = method.getResponseHeader("Content-Type");
            String type = "unknown";
            if (hdr != null) {
                type = hdr.toExternalForm();
                if (!(type = type.toLowerCase()).startsWith("content-type: text") && !type.startsWith("content-type: plain")) {
                    throw new FetchingException("Not going to fetch a non-text file from " + uri + ". Type is: " + type);
                }
            }
            String txt = method.getResponseBodyAsString();
            method.releaseConnection();
            MemoryPage page = new MemoryPage(txt, type);
            String base = url.getProtocol() + "://" + url.getHost();
            if (url.getPort() != -1) {
                base = base + ":" + url.getPort();
            }
            if ((idx = (path = url.getPath()).lastIndexOf("/")) != -1) {
                base = base + path.substring(0, idx);
            }
            page.setDocumentBase(base);
            return page;
        }
        catch (IOException ioe) {
            throw new FetchingException("Error fetching from " + uri + ". " + ioe.getMessage(), ioe);
        }
    }

    private boolean checkIllegal(URL url, String userAgent) throws MalformedURLException {
        NoRobotClient nrc = new NoRobotClient(userAgent);
        try {
            nrc.parse(this.toBase(url));
        }
        catch (NoRobotException nre) {
            return false;
        }
        return !nrc.isUrlAllowed(url);
    }

    private URL toBase(URL url) throws MalformedURLException {
        return new URL(url.getProtocol() + "://" + url.getHost() + (url.getPort() == -1 ? "" : ":" + url.getPort()) + "/");
    }
}

