/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.speciesassignment.mlcandidateranker;

import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.Token2FeatureVector;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.Label;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.Token;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.GeneSpeciesOccurrence;
import de.julielab.geneexpbase.genemodel.MentionMappingResult;
import de.julielab.speciesassignment.SpeciesAssignmentException;
import de.julielab.speciesassignment.mlcandidateranker.GeneCandidateSpeciesPipe;
import de.julielab.speciesassignment.mlcandidateranker.SpeciesAPrioriScorePipe;
import de.julielab.speciesassignment.mlcandidateranker.SpeciesCandidates2TokenPipe;
import de.julielab.speciesassignment.mlcandidateranker.SpeciesOccurrencePipe;
import de.julielab.speciesassignment.mlcandidateranker.ToNonBinaryVectorPipe;
import de.julielab.speciesassignment.spi.SpeciesDocumentScoringService;
import de.julielab.speciesassignment.spi.SynonymSpeciesCooccurrenceService;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.OptionalDouble;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import javax.inject.Inject;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SpeciesInstanceTools {
    public static final String PROP_OCCURRENCES = "occurrences";
    public static final String PROP_FOCUS_TAX = "focusTaxId";
    public static final String PROP_GENE_MENTION = "gm";
    public static final String PARAM_FEATURE_PIPE = "featurePipe";
    private static final Logger log = LoggerFactory.getLogger(SpeciesInstanceTools.class);
    private final SpeciesDocumentScoringService speciesDocumentScoringService;
    private final SynonymSpeciesCooccurrenceService synonymSpeciesCooccurrenceService;

    @Inject
    public SpeciesInstanceTools(SpeciesDocumentScoringService speciesDocumentScoringService, SynonymSpeciesCooccurrenceService synonymSpeciesCooccurrenceService) {
        this.speciesDocumentScoringService = speciesDocumentScoringService;
        this.synonymSpeciesCooccurrenceService = synonymSpeciesCooccurrenceService;
    }

    public InstanceList createSpeciesCandidateInstances(GeneMention gm, Pipe pipe) {
        InstanceList instances = new InstanceList(pipe);
        MentionMappingResult mmr = gm.getMentionMappingResult();
        assert (mmr != null) : "The gene candidates have not yet been assigned to this gene.";
        Set<String> taxKeys = gm.getTaxonomyOccurrences().keySet();
        for (String taxCandidate : taxKeys) {
            Collection<GeneSpeciesOccurrence> occurrences = gm.getTaxonomyOccurrences().get(taxCandidate);
            ImmutablePair<Collection<GeneSpeciesOccurrence>, String> pair = new ImmutablePair<Collection<GeneSpeciesOccurrence>, String>(occurrences, taxCandidate);
            String instanceName = gm.getDocId() + "_" + gm.getBegin() + "_" + gm.getEnd();
            Instance instance = new Instance(pair, gm.getGoldTaxonomyId(), instanceName, taxCandidate);
            instance.setProperty(PROP_GENE_MENTION, gm);
            instances.addThruPipe(instance);
        }
        return instances;
    }

    public List<Instance> createSpeciesCandidateInstances(GeneDocument doc) {
        ArrayList<Instance> instances = new ArrayList<Instance>();
        Optional<GeneMention> geneOpt = doc.getGenes().findAny();
        if (geneOpt.isPresent()) {
            GeneMention gm = geneOpt.get();
            Set<String> taxonomyCandidates = gm.getTaxonomyOccurrences().keySet();
            for (String taxCandidate : taxonomyCandidates) {
                ImmutablePair<GeneDocument, String> pair = new ImmutablePair<GeneDocument, String>(doc, taxCandidate);
                instances.add(new Instance(pair, null, taxCandidate, doc));
            }
        }
        return instances;
    }

    public Pair<Token, Label> createLabeledSpeciesCandidateToken(GeneMention gm, String focusTaxId, LabelAlphabet targetAlphabet) {
        Token token = new Token(gm.getText());
        Collection<GeneSpeciesOccurrence> speciesOccurrences = gm.getTaxonomyOccurrences().get(focusTaxId);
        token.setProperty(PROP_OCCURRENCES, speciesOccurrences);
        token.setProperty(PROP_GENE_MENTION, gm);
        token.setProperty(PROP_FOCUS_TAX, focusTaxId);
        float rawLabel = gm.getGeneDocument().isGoldHasOffsets() ? (focusTaxId.equals(gm.getAnyGoldId()) ? 1.0f : 0.0f) : (gm.getGeneDocument().getGoldIds().contains(focusTaxId) ? 1.0f : 0.0f);
        Label label = targetAlphabet.lookupLabel(Float.valueOf(rawLabel));
        return new ImmutablePair<Token, Label>(token, label);
    }

    public InstanceList createInstanceListForMaxEnt(Iterable<GeneMention> geneMentions, Parameters parameters) {
        return this.createInstanceListForMaxEnt(geneMentions, null, parameters);
    }

    public InstanceList createInstanceListsForMaxEntPerGeneMention(GeneDocument geneDocument, Pipe pipe, Parameters parameters) {
        return this.createInstanceListForMaxEnt(geneDocument.getGenesIterable(), pipe, parameters);
    }

    public InstanceList createInstanceListForMaxEnt(Iterable<GeneMention> geneMentions, Pipe pipe, Parameters parameters) {
        try {
            InstanceList instances;
            if (pipe == null) {
                List<Pipe> maxEntPipes = this.createMaxEntPipes(parameters);
                instances = new InstanceList(new SerialPipes(maxEntPipes));
            } else {
                instances = new InstanceList(pipe);
            }
            for (GeneMention gm : geneMentions) {
                InstanceList candidateInstances = this.createSpeciesCandidateInstances(gm, instances.getPipe());
                instances.addAll(candidateInstances);
                gm.setInstances(candidateInstances);
            }
            return instances;
        }
        catch (SpeciesAssignmentException e) {
            log.error("Could not create MALLET pipes", e);
            return null;
        }
    }

    private void normalizeFeaturesGroupWise(String featureName, double scalingFactor, Predicate<Instance> instanceFilter, List<Instance> candidateInstances, InstanceList instances) {
        int featureIndex = instances.getDataAlphabet().lookupIndex(featureName);
        OptionalDouble maxMentionScore = candidateInstances.stream().filter(instanceFilter).map(Instance::getData).map(FeatureVector.class::cast).filter(fv -> fv.location(featureIndex) >= 0).mapToDouble(fv -> fv.value(fv.location(featureIndex))).filter(value -> value != 0.0).max();
        if (maxMentionScore.isPresent()) {
            double max = maxMentionScore.getAsDouble();
            for (FeatureVector fv2 : () -> candidateInstances.stream().map(Instance::getData).map(FeatureVector.class::cast).iterator()) {
                if (fv2.isBinary() || fv2.location(featureIndex) < 0) continue;
                double value2 = fv2.value(featureIndex);
                double newValue = value2 / max * scalingFactor;
                fv2.setValue(featureIndex, newValue);
            }
        }
    }

    public InstanceList createInstanceListForMaxEntDocWise(Iterable<GeneDocument> documents, Parameters parameters) {
        List instanceLists = StreamSupport.stream(documents.spliterator(), false).map(GeneDocument::getGenes).map(genes -> this.createInstanceListForMaxEnt(() -> genes.iterator(), parameters)).collect(Collectors.toList());
        if (!instanceLists.isEmpty()) {
            InstanceList ret = new InstanceList(((InstanceList)instanceLists.get(0)).getPipe());
            instanceLists.stream().flatMap(Collection::stream).forEach(ret::add);
            return ret;
        }
        return null;
    }

    private List<Pipe> createMaxEntPipes(Parameters parameters) throws SpeciesAssignmentException {
        ArrayList<Pipe> pipes = new ArrayList<Pipe>();
        try {
            pipes.add(new SpeciesCandidates2TokenPipe(this));
            pipes.add(new SpeciesOccurrencePipe());
            pipes.add(new SpeciesAPrioriScorePipe(parameters, this.speciesDocumentScoringService, this.synonymSpeciesCooccurrenceService));
            pipes.add(new GeneCandidateSpeciesPipe());
            pipes.add(new Token2FeatureVector());
            pipes.add(new ToNonBinaryVectorPipe());
        }
        catch (IOException e) {
            throw new SpeciesAssignmentException(e);
        }
        return pipes;
    }

    public void injectServices(Pipe instancePipe) {
        SerialPipes sp = (SerialPipes)instancePipe;
        for (Pipe p : sp.pipes()) {
            if (!(p instanceof SpeciesAPrioriScorePipe)) continue;
            SpeciesAPrioriScorePipe apsp = (SpeciesAPrioriScorePipe)p;
            apsp.setDocumentScoringService(this.speciesDocumentScoringService);
            apsp.setSpeciesCooccurrenceService(this.synonymSpeciesCooccurrenceService);
        }
    }
}

