package org.apache.tika.parser.geo.topic;

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import org.apache.tika.config.Field;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.geo.topic.gazetteer.GeoGazetteerClient;
import org.apache.tika.parser.geo.topic.gazetteer.Location;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:tika-parsers-1.28.2.jar:org/apache/tika/parser/geo/topic/GeoParser.class */
public class GeoParser extends AbstractParser {
    private static final long serialVersionUID = -2241391757440215491L;
    private static final Logger LOG = LoggerFactory.getLogger(GeoParser.class);
    private static final MediaType MEDIA_TYPE = MediaType.application("geotopic");
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MEDIA_TYPE);
    private GeoParserConfig defaultConfig = new GeoParserConfig();
    private GeoGazetteerClient gazetteerClient;
    private boolean initialized;
    private URL modelUrl;
    private NameFinderME nameFinder;
    private boolean available;

    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    public void initialize(GeoParserConfig geoParserConfig) {
        try {
            if (this.modelUrl != null) {
                if (this.modelUrl.toURI().equals(this.modelUrl.toURI())) {
                    return;
                }
            }
            this.modelUrl = geoParserConfig.getNerModelUrl();
            this.gazetteerClient = new GeoGazetteerClient(geoParserConfig);
            this.available = this.modelUrl != null && this.gazetteerClient.checkAvail();
            if (this.available) {
                try {
                    this.nameFinder = new NameFinderME(new TokenNameFinderModel(this.modelUrl));
                } catch (Exception e) {
                    LOG.warn("Named Entity Extractor setup failed: {}", e.getMessage(), e);
                    this.available = false;
                }
            }
            this.initialized = true;
        } catch (URISyntaxException e2) {
            throw new RuntimeException(e2.getMessage());
        }
    }

    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        GeoParserConfig geoParserConfig = (GeoParserConfig) parseContext.get(GeoParserConfig.class, this.defaultConfig);
        initialize(geoParserConfig);
        if (isAvailable(geoParserConfig)) {
            try {
                NameEntityExtractor nameEntityExtractor = new NameEntityExtractor(this.nameFinder);
                nameEntityExtractor.getAllNameEntitiesfromInput(inputStream);
                nameEntityExtractor.getBestNameEntity();
                ArrayList<String> arrayList = nameEntityExtractor.locationNameEntities;
                String str = nameEntityExtractor.bestNameEntity;
                Map<String, List<Location>> searchGeoNames = searchGeoNames(arrayList);
                GeoTag geoTag = new GeoTag();
                geoTag.toGeoTag(searchGeoNames, str);
                metadata.add("Geographic_NAME", geoTag.location.getName());
                metadata.add("Geographic_LONGITUDE", geoTag.location.getLongitude());
                metadata.add("Geographic_LATITUDE", geoTag.location.getLatitude());
                for (int i = 0; i < geoTag.alternatives.size(); i++) {
                    GeoTag geoTag2 = geoTag.alternatives.get(i);
                    metadata.add("Optional_NAME" + (i + 1), geoTag2.location.getName());
                    metadata.add("Optional_LONGITUDE" + (i + 1), geoTag2.location.getLongitude());
                    metadata.add("Optional_LATITUDE" + (i + 1), geoTag2.location.getLatitude());
                }
            } catch (Exception e) {
                LOG.warn("Named Entity Extractor setup failed: {}", e.getMessage(), e);
            }
        }
    }

    public Map<String, List<Location>> searchGeoNames(ArrayList<String> arrayList) {
        return this.gazetteerClient.getLocations(arrayList);
    }

    public boolean isAvailable(GeoParserConfig geoParserConfig) {
        if (!this.initialized) {
            initialize(geoParserConfig);
        }
        return this.available;
    }

    @Field
    public void setGazetteerRestEndpoint(String str) {
        this.defaultConfig.setGazetteerRestEndpoint(str);
    }

    @Field
    public void setNerModelUrl(String str) {
        try {
            this.defaultConfig.setNerModelUrl(new URL(str));
        } catch (MalformedURLException e) {
            throw new IllegalArgumentException("malformed url " + str, e);
        }
    }

    public String getGazetteerRestEndpoint() {
        return this.defaultConfig.getGazetteerRestEndpoint();
    }

    public URL getNerModelUrl() {
        return this.defaultConfig.getNerModelUrl();
    }
}
