package us.codecraft.webmagic.utils;

import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import us.codecraft.webmagic.Request;

/* loaded from: input_file:us/codecraft/webmagic/utils/UrlUtils.class */
public class UrlUtils {
    private static Pattern patternForProtocal = Pattern.compile("[\\w]+://");
    private static final Pattern patternForCharset = Pattern.compile("charset\\s*=\\s*['\"]*([^\\s;'\"]*)");

    public static String canonicalizeUrl(String str, String str2) {
        try {
            try {
                URL url = new URL(str2);
                if (str.startsWith("?")) {
                    str = url.getPath() + str;
                }
                return encodeIllegalCharacterInUrl(new URL(url, str).toExternalForm());
            } catch (MalformedURLException e) {
                return new URL(str2).toExternalForm();
            }
        } catch (MalformedURLException e2) {
            return "";
        }
    }

    public static String encodeIllegalCharacterInUrl(String str) {
        return str.replace(" ", "%20");
    }

    public static String getHost(String str) {
        String str2 = str;
        int ordinalIndexOf = StringUtils.ordinalIndexOf(str, "/", 3);
        if (ordinalIndexOf > 0) {
            str2 = StringUtils.substring(str, 0, ordinalIndexOf);
        }
        return str2;
    }

    public static String removeProtocol(String str) {
        return patternForProtocal.matcher(str).replaceAll("");
    }

    public static String getDomain(String str) {
        String removeProtocol = removeProtocol(str);
        int indexOf = StringUtils.indexOf(removeProtocol, "/", 1);
        if (indexOf > 0) {
            removeProtocol = StringUtils.substring(removeProtocol, 0, indexOf);
        }
        return removePort(removeProtocol);
    }

    public static String removePort(String str) {
        int indexOf = str.indexOf(":");
        return indexOf != -1 ? str.substring(0, indexOf) : str;
    }

    public static List<Request> convertToRequests(Collection<String> collection) {
        ArrayList arrayList = new ArrayList(collection.size());
        Iterator<String> it = collection.iterator();
        while (it.hasNext()) {
            arrayList.add(new Request(it.next()));
        }
        return arrayList;
    }

    public static List<String> convertToUrls(Collection<Request> collection) {
        ArrayList arrayList = new ArrayList(collection.size());
        Iterator<Request> it = collection.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getUrl());
        }
        return arrayList;
    }

    public static String getCharset(String str) {
        Matcher matcher = patternForCharset.matcher(str);
        if (!matcher.find()) {
            return null;
        }
        String group = matcher.group(1);
        if (Charset.isSupported(group)) {
            return group;
        }
        return null;
    }
}
