/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.test.unit.chunk;

import com.aliasi.chunk.BioTagChunkCodec;
import com.aliasi.chunk.Chunk;
import com.aliasi.chunk.ChunkFactory;
import com.aliasi.chunk.Chunking;
import com.aliasi.chunk.ChunkingImpl;
import com.aliasi.chunk.TagChunkCodec;
import com.aliasi.crf.ForwardBackwardTagLattice;
import com.aliasi.symbol.SymbolTable;
import com.aliasi.tag.StringTagging;
import com.aliasi.tag.TagLattice;
import com.aliasi.test.unit.Asserts;
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.Math;
import com.aliasi.util.ScoredObject;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import junit.framework.Assert;
import org.junit.Test;

public class BioTagChunkCodecTest {
    @Test
    public void testNBestChunks() {
        BioTagChunkCodec codec = new BioTagChunkCodec(IndoEuropeanTokenizerFactory.INSTANCE, false);
        ForwardBackwardTagLattice<String> lattice = new ForwardBackwardTagLattice<String>(Collections.emptyList(), Collections.emptyList(), new double[0][0], new double[0][0], new double[0][0][0], 0.0);
        Iterator<Chunk> it = codec.nBestChunks(lattice, new int[0], new int[0], 100);
        Assert.assertFalse((boolean)it.hasNext());
        lattice = new ForwardBackwardTagLattice<String>(Arrays.asList("John"), Arrays.asList("O", "B_PER", "I_PER"), new double[][]{{-1.0, -3.0, -100.0}}, new double[][]{{0.0, 0.0, 0.0}}, new double[0][0][0], -1.5);
        it = codec.nBestChunks(lattice, new int[1], new int[]{4}, 100);
        this.assertIterator(it, ChunkFactory.createChunk(0, 4, "PER", -1.5));
        lattice = new ForwardBackwardTagLattice<String>(Arrays.asList("John"), Arrays.asList("O", "B_PER", "I_PER", "B_LOC", "I_LOC"), new double[][]{{-1.0, -3.0, -100.0, -5.0, -200.0}}, new double[][]{{0.0, 0.0, 0.0, 0.0, 0.0}}, new double[0][0][0], -2.5);
        it = codec.nBestChunks(lattice, new int[1], new int[]{4}, 100);
        this.assertIterator(it, ChunkFactory.createChunk(0, 4, "PER", -0.5), ChunkFactory.createChunk(0, 4, "LOC", -2.5));
        it = codec.nBestChunks(lattice, new int[1], new int[]{4}, 1);
        this.assertIterator(it, ChunkFactory.createChunk(0, 4, "PER", -0.5));
        Random random = new Random(42L);
        this.assertRandomLattice(random, "John", new HashSet<String>(Arrays.asList("PER")), 100, codec);
        this.assertRandomLattice(random, "John", new HashSet<String>(Arrays.asList("PER", "LOC")), 100, codec);
        this.assertRandomLattice(random, "John ran", new HashSet<String>(Arrays.asList("PER", "LOC")), 100, codec);
        this.assertRandomLattice(random, "Mary jumped", new HashSet<String>(Arrays.asList("PER", "LOC")), 100, codec);
        this.assertRandomLattice(random, "Mary likes John", new HashSet<String>(Arrays.asList("PER", "LOC")), 100, codec);
    }

    void assertRandomLattice(Random random, String cs, Set<String> chunkTypes, int max, TagChunkCodec codec) {
        ArrayList<Integer> tokenStartList = new ArrayList<Integer>();
        ArrayList<Integer> tokenEndList = new ArrayList<Integer>();
        ArrayList<String> tokenList = new ArrayList<String>();
        Tokenizer tokenizer = IndoEuropeanTokenizerFactory.INSTANCE.tokenizer(cs.toCharArray(), 0, cs.length());
        for (String token : tokenizer) {
            tokenList.add(token);
            tokenStartList.add(tokenizer.lastTokenStartPosition());
            tokenEndList.add(tokenizer.lastTokenEndPosition());
        }
        int[] starts = BioTagChunkCodecTest.toInts(tokenStartList);
        int[] ends = BioTagChunkCodecTest.toInts(tokenEndList);
        TagLattice lattice = this.randomLattice(chunkTypes, tokenList, random);
        this.assertNBestChunks(lattice, starts, ends, max, cs, codec);
    }

    static int[] toInts(List<Integer> xs) {
        int[] ys = new int[xs.size()];
        int i = 0;
        while (i < ys.length) {
            ys[i] = xs.get(i);
            ++i;
        }
        return ys;
    }

    TagLattice randomLattice(Set<String> chunkTypes, List<String> tokens, Random random) {
        ArrayList<String> tags = new ArrayList<String>(1 + 2 * chunkTypes.size());
        tags.add("O");
        for (String type : chunkTypes) {
            tags.add("B_" + type);
            tags.add("I_" + type);
        }
        double[][] logForwards = new double[tokens.size()][tags.size()];
        logForwards[0] = BioTagChunkCodecTest.randomArray(random, tags.size());
        int i = 2;
        while (i < tags.size()) {
            logForwards[0][i] = Double.NEGATIVE_INFINITY;
            i += 2;
        }
        double[][][] logTransitions = new double[tokens.size() - 1][tags.size()][];
        int i2 = 0;
        while (i2 < logTransitions.length) {
            int j = 0;
            while (j < logTransitions[i2].length) {
                logTransitions[i2][j] = BioTagChunkCodecTest.randomArray(random, tags.size());
                ++j;
            }
            ++i2;
        }
        i2 = 0;
        while (i2 < logTransitions.length) {
            int kTo = 2;
            while (kTo < logTransitions[i2].length) {
                int kFrom = 0;
                while (kFrom < logTransitions[i2].length) {
                    if (kFrom != kTo - 1 && kFrom != kTo) {
                        logTransitions[i2][kFrom][kTo] = Double.NEGATIVE_INFINITY;
                    }
                    ++kFrom;
                }
                kTo += 2;
            }
            ++i2;
        }
        double[] basis = new double[tags.size()];
        int i3 = 1;
        while (i3 < tokens.size()) {
            int j = 0;
            while (j < tags.size()) {
                int k = 0;
                while (k < tags.size()) {
                    basis[k] = logForwards[i3 - 1][k] + logTransitions[i3 - 1][k][j];
                    ++k;
                }
                logForwards[i3][j] = Math.logSumOfExponentials(basis);
                ++j;
            }
            ++i3;
        }
        double logZ = Math.logSumOfExponentials(logForwards[logForwards.length - 1]);
        double[][] logBackwards = new double[tokens.size()][tags.size()];
        int i4 = tokens.size() - 1;
        while (--i4 >= 0) {
            int j = 0;
            while (j < tags.size()) {
                int k = 0;
                while (k < tags.size()) {
                    basis[k] = logBackwards[i4 + 1][k] + logTransitions[i4][j][k];
                    ++k;
                }
                logBackwards[i4][j] = Math.logSumOfExponentials(basis);
                ++j;
            }
        }
        return new ForwardBackwardTagLattice<String>(tokens, tags, logForwards, logBackwards, logTransitions, logZ);
    }

    static double[] randomArray(Random random, int length) {
        double[] xs = new double[length];
        int n = 0;
        while (n < xs.length) {
            xs[n] = -10.0 * random.nextDouble();
            ++n;
        }
        return xs;
    }

    void assertNBestChunks(TagLattice lattice, int[] starts, int[] ends, int max, String cs, TagChunkCodec codec) {
        List<Chunk> chunks = this.bruteForce(lattice, starts, ends, cs, codec);
        Iterator<Chunk> it = codec.nBestChunks(lattice, starts, ends, max);
        int i = 0;
        while (i < max && it.hasNext()) {
            Chunk c1 = chunks.get(i);
            Chunk c2 = it.next();
            this.assertEqualScoredChunks(c1, c2);
            ++i;
        }
        Assert.assertFalse((boolean)it.hasNext());
    }

    void assertEqualScoredChunks(Chunk c1, Chunk c2) {
        Assert.assertEquals((int)c1.start(), (int)c2.start());
        Assert.assertEquals((int)c1.end(), (int)c2.end());
        Assert.assertEquals((String)c1.type(), (String)c2.type());
        Assert.assertEquals((double)c1.score(), (double)c2.score(), (double)0.1);
    }

    List<Chunk> bruteForce(TagLattice<String> lattice, int[] tokenStarts, int[] tokenEnds, String charSequence, TagChunkCodec codec) {
        HashMap<Chunk, ArrayList<Double>> chunkToScores = new HashMap<Chunk, ArrayList<Double>>();
        if (lattice.numTokens() == 0) {
            return new ArrayList<Chunk>(0);
        }
        SymbolTable tagSymbolTable = lattice.tagSymbolTable();
        ArrayList<String[]> tagSeqs = new ArrayList<String[]>();
        this.add(tagSeqs, 0, lattice, new String[lattice.numTokens()]);
        ArrayList<Double> logProbs = new ArrayList<Double>();
        for (String[] tags : tagSeqs) {
            StringTagging tagging = new StringTagging(lattice.tokenList(), Arrays.asList(tags), (CharSequence)charSequence, tokenStarts, tokenEnds);
            if (!codec.legalTags(tags)) continue;
            Chunking chunking = codec.toChunking(tagging);
            double logScore = lattice.logForward(0, tagSymbolTable.symbolToID(tags[0])) - lattice.logZ();
            int n = 1;
            while (n < tags.length) {
                logScore += lattice.logTransition(n - 1, tagSymbolTable.symbolToID(tags[n - 1]), tagSymbolTable.symbolToID(tags[n]));
                ++n;
            }
            logProbs.add(logScore += lattice.logBackward(lattice.numTokens() - 1, tagSymbolTable.symbolToID(tags[lattice.numTokens() - 1])));
            for (Chunk chunk : chunking.chunkSet()) {
                ArrayList<Double> scores = (ArrayList<Double>)chunkToScores.get(chunk);
                if (scores == null) {
                    scores = new ArrayList<Double>();
                    chunkToScores.put(chunk, scores);
                }
                scores.add(logScore);
            }
        }
        double[] logScoreArray = new double[logProbs.size()];
        int i = 0;
        while (i < logScoreArray.length) {
            logScoreArray[i] = (Double)logProbs.get(i);
            ++i;
        }
        double totalProb = Math.logSumOfExponentials(logScoreArray);
        ArrayList<Chunk> chunks = new ArrayList<Chunk>();
        for (Map.Entry entry : chunkToScores.entrySet()) {
            Chunk c = (Chunk)entry.getKey();
            List vals = (List)entry.getValue();
            double[] xs = new double[vals.size()];
            int i2 = 0;
            while (i2 < xs.length) {
                xs[i2] = (Double)vals.get(i2);
                ++i2;
            }
            double logSumExp = Math.logSumOfExponentials(xs);
            chunks.add(ChunkFactory.createChunk(c.start(), c.end(), c.type(), logSumExp));
        }
        Collections.sort(chunks, ScoredObject.reverseComparator());
        return chunks;
    }

    void add(List<String[]> tagSeqs, int pos, TagLattice<String> lattice, String[] tags) {
        if (pos == lattice.numTokens()) {
            tagSeqs.add((String[])tags.clone());
            return;
        }
        int k = 0;
        while (k < lattice.numTags()) {
            tags[pos] = lattice.tag(k);
            this.add(tagSeqs, pos + 1, lattice, tags);
            ++k;
        }
    }

    void assertIterator(Iterator<Chunk> it, Chunk ... chunks) {
        Chunk[] chunkArray = chunks;
        int n = chunks.length;
        int n2 = 0;
        while (n2 < n) {
            Chunk chunk = chunkArray[n2];
            Assert.assertTrue((boolean)it.hasNext());
            Chunk next = it.next();
            Assert.assertEquals((int)chunk.start(), (int)next.start());
            Assert.assertEquals((int)chunk.end(), (int)next.end());
            Assert.assertEquals((String)chunk.type(), (String)next.type());
            Assert.assertEquals((double)chunk.score(), (double)next.score(), (double)0.1);
            ++n2;
        }
        Assert.assertFalse((boolean)it.hasNext());
    }

    @Test
    public void testLegalTagSubSequence() {
        BioTagChunkCodec codec = new BioTagChunkCodec(IndoEuropeanTokenizerFactory.INSTANCE, false);
        Assert.assertTrue((boolean)codec.legalTagSubSequence("O"));
        Assert.assertTrue((boolean)codec.legalTagSubSequence("B_PER"));
        Assert.assertTrue((boolean)codec.legalTagSubSequence("I_PER"));
        Assert.assertFalse((boolean)codec.legalTagSubSequence("F"));
        Assert.assertFalse((boolean)codec.legalTagSubSequence("M_PER"));
        Assert.assertTrue((boolean)codec.legalTagSubSequence("O", "B_PER"));
        Assert.assertTrue((boolean)codec.legalTagSubSequence("I_PER", "O"));
        Assert.assertTrue((boolean)codec.legalTagSubSequence("B_PER", "B_PER"));
        Assert.assertTrue((boolean)codec.legalTagSubSequence("B_PER", "I_PER"));
        Assert.assertTrue((boolean)codec.legalTagSubSequence("B_PER", "I_PER", "I_PER"));
        Assert.assertTrue((boolean)codec.legalTagSubSequence("B_PER", "I_PER", "I_PER", "O"));
        Assert.assertTrue((boolean)codec.legalTagSubSequence("O", "B_PER", "I_PER", "I_PER", "O"));
        Assert.assertFalse((boolean)codec.legalTagSubSequence("O", "I_PER"));
        Assert.assertFalse((boolean)codec.legalTagSubSequence("B_LOC", "I_PER"));
    }

    @Test
    public void testLegalTags() {
        BioTagChunkCodec codec = new BioTagChunkCodec(IndoEuropeanTokenizerFactory.INSTANCE, false);
        Assert.assertTrue((boolean)codec.legalTags("O"));
        Assert.assertTrue((boolean)codec.legalTags("B_PER"));
        Assert.assertFalse((boolean)codec.legalTags("F"));
        Assert.assertFalse((boolean)codec.legalTags("M_PER"));
        Assert.assertTrue((boolean)codec.legalTags("O", "B_PER"));
        Assert.assertTrue((boolean)codec.legalTags("B_PER", "B_PER"));
        Assert.assertTrue((boolean)codec.legalTags("B_PER", "I_PER"));
        Assert.assertTrue((boolean)codec.legalTags("B_PER", "I_PER", "I_PER"));
        Assert.assertTrue((boolean)codec.legalTags("B_PER", "I_PER", "I_PER", "O"));
        Assert.assertTrue((boolean)codec.legalTags("O", "B_PER", "I_PER", "I_PER", "O"));
        Assert.assertFalse((boolean)codec.legalTags("I_PER", "O"));
        Assert.assertFalse((boolean)codec.legalTags("I_PER"));
        Assert.assertFalse((boolean)codec.legalTags("O", "I_PER"));
        Assert.assertFalse((boolean)codec.legalTags("B_LOC", "I_PER"));
    }

    @Test
    public void testBioCodecTagSet() {
        BioTagChunkCodec codec = new BioTagChunkCodec(IndoEuropeanTokenizerFactory.INSTANCE, false);
        HashSet<String> chunkTypes = new HashSet<String>(Arrays.asList("PER", "LOC"));
        HashSet<String> expectedTags = new HashSet<String>(Arrays.asList("O", "B_PER", "I_PER", "B_LOC", "I_LOC"));
        Assert.assertEquals(expectedTags, codec.tagSet(chunkTypes));
    }

    @Test
    public void testEncodable() throws IOException, ClassNotFoundException {
        BioTagChunkCodec codec = new BioTagChunkCodec(IndoEuropeanTokenizerFactory.INSTANCE, true);
        ChunkingImpl chunkingOk = new ChunkingImpl("John Jones Mary and Mr. J. J. Jones ran to Washington.");
        Chunk chunk1 = ChunkFactory.createChunk(0, 10, "PER");
        Chunk chunk2 = ChunkFactory.createChunk(11, 15, "PER");
        Chunk chunk3 = ChunkFactory.createChunk(24, 35, "PER");
        Chunk chunk4 = ChunkFactory.createChunk(43, 53, "LOC");
        chunkingOk.add(chunk2);
        chunkingOk.add(chunk4);
        chunkingOk.add(chunk3);
        chunkingOk.add(chunk1);
        this.assertEncodable(codec, chunkingOk);
        ChunkingImpl chunkingBad = new ChunkingImpl("John ran");
        Chunk chunk2_1 = ChunkFactory.createChunk(0, 4, "PER");
        Chunk chunk2_2 = ChunkFactory.createChunk(0, 8, "LOC");
        chunkingBad.add(chunk2_1);
        chunkingBad.add(chunk2_2);
        this.assertNotEncodable(codec, chunkingBad);
        ChunkingImpl chunkingBad3 = new ChunkingImpl("John ran");
        Chunk chunk3_1 = ChunkFactory.createChunk(0, 5, "PER");
        chunkingBad3.add(chunk3_1);
        this.assertNotEncodable(codec, chunkingBad3);
        ChunkingImpl chunkingBad4 = new ChunkingImpl("John ran");
        Chunk chunk4_1 = ChunkFactory.createChunk(1, 4, "PER");
        chunkingBad4.add(chunk4_1);
        this.assertNotEncodable(codec, chunkingBad4);
        this.assertNotEncodable(codec, chunkingBad4);
        ChunkingImpl chunkingBad5 = new ChunkingImpl("John ran");
        Chunk chunk5_1 = ChunkFactory.createChunk(5, 5, "LOC");
        chunkingBad5.add(chunk5_1);
        this.assertNotEncodable(codec, chunkingBad5);
        ChunkingImpl chunkingOk2 = new ChunkingImpl("John ran");
        Assert.assertTrue((boolean)codec.isEncodable(chunkingOk2));
        Chunk chunk6_1 = ChunkFactory.createChunk(0, 8, "LOC");
        chunkingOk2.add(chunk6_1);
        this.assertEncodable(codec, chunkingOk2);
        ChunkingImpl chunkingOk3 = new ChunkingImpl("Mr. John Jones ran to Washington.");
        Chunk jj = ChunkFactory.createChunk(4, 14, "PER");
        Chunk w = ChunkFactory.createChunk(22, 32, "LOC");
        chunkingOk3.add(jj);
        chunkingOk3.add(w);
        this.assertEncodable(codec, chunkingOk3);
    }

    @Test
    public void testDecodable() throws IOException, ClassNotFoundException {
        BioTagChunkCodec codec = new BioTagChunkCodec(IndoEuropeanTokenizerFactory.INSTANCE, true);
        int[] nArray = new int[5];
        nArray[1] = 5;
        nArray[2] = 9;
        nArray[3] = 12;
        nArray[4] = 23;
        StringTagging taggingOk = new StringTagging(Arrays.asList("John", "ran", "to", "Washington", "DC"), Arrays.asList("B_PER", "O", "O", "B_LOC", "I_LOC"), (CharSequence)"John ran to Washington DC", nArray, new int[]{4, 8, 11, 22, 25});
        this.assertDecodable(codec, taggingOk);
        int[] nArray2 = new int[4];
        nArray2[1] = 4;
        nArray2[2] = 7;
        nArray2[3] = 10;
        StringTagging taggingBad = new StringTagging(Arrays.asList("John", "ny", "ran", "."), Arrays.asList("B_PER", "I_PER", "O", "O"), (CharSequence)"Johnny ran.", nArray2, new int[]{4, 6, 10, 11});
        this.assertNotDecodable(codec, taggingBad);
    }

    void assertEncodable(TagChunkCodec codec, Chunking chunking) throws IOException, ClassNotFoundException {
        this.assertEncodable2(codec, chunking);
        TagChunkCodec codec2 = (TagChunkCodec)AbstractExternalizable.serializeDeserialize((Serializable)((Object)codec));
        this.assertEncodable2(codec2, chunking);
    }

    void assertEncodable2(TagChunkCodec codec, Chunking chunking) {
        Assert.assertTrue((boolean)codec.isEncodable(chunking));
        StringTagging tagging = codec.toStringTagging(chunking);
        Assert.assertTrue((boolean)codec.isDecodable(tagging));
        Chunking chunking2 = codec.toChunking(tagging);
        Assert.assertEquals((Object)chunking, (Object)chunking2);
        StringTagging tagging2 = codec.toStringTagging(chunking2);
        Assert.assertEquals((Object)tagging, (Object)tagging2);
    }

    void assertNotEncodable(TagChunkCodec codec, Chunking chunking) {
        Assert.assertFalse((boolean)codec.isEncodable(chunking));
        try {
            codec.toTagging(chunking);
            Assert.fail();
        }
        catch (IllegalArgumentException e) {
            Asserts.succeed();
        }
    }

    void assertDecodable(TagChunkCodec codec, StringTagging tagging) throws IOException, ClassNotFoundException {
        this.assertDecodable2(codec, tagging);
        TagChunkCodec codec2 = (TagChunkCodec)AbstractExternalizable.serializeDeserialize((Serializable)((Object)codec));
        this.assertDecodable2(codec2, tagging);
    }

    void assertDecodable2(TagChunkCodec codec, StringTagging tagging) {
        Assert.assertTrue((boolean)codec.isDecodable(tagging));
        Chunking chunking = codec.toChunking(tagging);
        Assert.assertTrue((boolean)codec.isEncodable(chunking));
        StringTagging tagging2 = codec.toStringTagging(chunking);
        Assert.assertEquals((Object)tagging, (Object)tagging2);
        Chunking chunking2 = codec.toChunking(tagging2);
        Assert.assertEquals((Object)chunking, (Object)chunking2);
    }

    void assertNotDecodable(TagChunkCodec codec, StringTagging tagging) {
        Assert.assertFalse((boolean)codec.isDecodable(tagging));
        try {
            codec.toChunking(tagging);
            Assert.fail();
        }
        catch (IllegalArgumentException e) {
            Asserts.succeed();
        }
    }
}

