package org.apache.pinot.queries;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Random;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.ControlledRealTimeReopenThread;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.store.FSDirectory;
import org.apache.pinot.common.response.broker.ResultTable;
import org.apache.pinot.common.utils.DataSchema;
import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl;
import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader;
import org.apache.pinot.segment.spi.IndexSegment;
import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
import org.apache.pinot.spi.config.table.FieldConfig;
import org.apache.pinot.spi.config.table.TableType;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.data.readers.GenericRow;
import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

/* loaded from: input_file:org/apache/pinot/queries/TextSearchQueriesTest.class */
public class TextSearchQueriesTest extends BaseQueriesTest {
    private static final String TABLE_NAME = "MyTable";
    private static final String SEGMENT_NAME = "testSegment";
    private static final String INT_COL_NAME = "INT_COL";
    private static final int INT_BASE_VALUE = 1000;
    private IndexSegment _indexSegment;
    private List<IndexSegment> _indexSegments;
    private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "TextSearchQueriesTest");
    private static final String QUERY_LOG_TEXT_COL_NAME = "QUERY_LOG_TEXT_COL";
    private static final String SKILLS_TEXT_COL_NAME = "SKILLS_TEXT_COL";
    private static final String SKILLS_TEXT_COL_MULTI_TERM_NAME = "SKILLS_TEXT_COL_1";
    private static final String SKILLS_TEXT_NO_RAW_NAME = "SKILLS_TEXT_COL_2";
    private static final String SKILLS_TEXT_MV_COL_NAME = "SKILLS_TEXT_MV_COL";
    private static final List<String> RAW_TEXT_INDEX_COLUMNS = Arrays.asList(QUERY_LOG_TEXT_COL_NAME, SKILLS_TEXT_COL_NAME, SKILLS_TEXT_COL_MULTI_TERM_NAME, SKILLS_TEXT_NO_RAW_NAME, SKILLS_TEXT_MV_COL_NAME);
    private static final String SKILLS_TEXT_COL_DICT_NAME = "SKILLS_TEXT_COL_DICT";
    private static final String SKILLS_TEXT_MV_COL_DICT_NAME = "SKILLS_TEXT_MV_COL_DICT";
    private static final List<String> DICT_TEXT_INDEX_COLUMNS = Arrays.asList(SKILLS_TEXT_COL_DICT_NAME, SKILLS_TEXT_MV_COL_DICT_NAME);

    /* loaded from: input_file:org/apache/pinot/queries/TextSearchQueriesTest$RealtimeReader.class */
    private static class RealtimeReader implements Runnable {
        private final QueryParser _queryParser;
        private final SearcherManager _searcherManager;

        RealtimeReader(SearcherManager searcherManager, StandardAnalyzer standardAnalyzer) {
            this._queryParser = new QueryParser("skill", standardAnalyzer);
            this._searcherManager = searcherManager;
        }

        @Override // java.lang.Runnable
        public void run() {
            try {
                Query parse = this._queryParser.parse("\"machine learning\" AND spark");
                int i = 0;
                int i2 = 0;
                while (i < TextSearchQueriesTest.INT_BASE_VALUE) {
                    IndexSearcher indexSearcher = (IndexSearcher) this._searcherManager.acquire();
                    int length = indexSearcher.search(parse, Integer.MAX_VALUE).scoreDocs.length;
                    if (i > 200) {
                        Assert.assertTrue(length > 0);
                        Assert.assertTrue(length >= i2);
                    }
                    i++;
                    i2 = length;
                    this._searcherManager.release(indexSearcher);
                    Thread.sleep(1L);
                }
            } catch (Exception e) {
                throw new RuntimeException("Caught exception in realtime reader");
            }
        }
    }

    /* loaded from: input_file:org/apache/pinot/queries/TextSearchQueriesTest$RealtimeWriter.class */
    private static class RealtimeWriter implements Runnable {
        private final IndexWriter _indexWriter;

        RealtimeWriter(IndexWriter indexWriter) {
            this._indexWriter = indexWriter;
        }

        @Override // java.lang.Runnable
        public void run() {
            String[] strArr = new String[100];
            int i = 0;
            try {
                FileInputStream fileInputStream = new FileInputStream(new File(getClass().getClassLoader().getResource("data/text_search_data/skills.txt").getFile()));
                try {
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileInputStream));
                    while (true) {
                        try {
                            String readLine = bufferedReader.readLine();
                            if (readLine == null) {
                                break;
                            }
                            int i2 = i;
                            i++;
                            strArr[i2] = readLine;
                        } catch (Throwable th) {
                            try {
                                bufferedReader.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                            throw th;
                        }
                    }
                    bufferedReader.close();
                    fileInputStream.close();
                    try {
                        try {
                            int i3 = 0;
                            Random random = new Random();
                            while (i3 < 500000) {
                                Document document = new Document();
                                if (i3 >= i) {
                                    document.add(new TextField("skill", strArr[random.nextInt(i)], Field.Store.NO));
                                } else {
                                    document.add(new TextField("skill", strArr[i3], Field.Store.NO));
                                }
                                i3++;
                                this._indexWriter.addDocument(document);
                            }
                            try {
                                this._indexWriter.commit();
                                this._indexWriter.close();
                            } catch (Exception e) {
                                throw new RuntimeException("Failed to commit/close the index writer");
                            }
                        } catch (Throwable th3) {
                            try {
                                this._indexWriter.commit();
                                this._indexWriter.close();
                                throw th3;
                            } catch (Exception e2) {
                                throw new RuntimeException("Failed to commit/close the index writer");
                            }
                        }
                    } catch (Exception e3) {
                        throw new RuntimeException("Caught exception while adding a document to index");
                    }
                } finally {
                }
            } catch (Exception e4) {
                throw new RuntimeException("Caught exception while reading skills file");
            }
        }
    }

    @Override // org.apache.pinot.queries.BaseQueriesTest
    protected String getFilter() {
        return "";
    }

    @Override // org.apache.pinot.queries.BaseQueriesTest
    protected IndexSegment getIndexSegment() {
        return this._indexSegment;
    }

    @Override // org.apache.pinot.queries.BaseQueriesTest
    protected List<IndexSegment> getIndexSegments() {
        return this._indexSegments;
    }

    @BeforeClass
    public void setUp() throws Exception {
        FileUtils.deleteQuietly(INDEX_DIR);
        buildSegment();
        IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
        HashSet hashSet = new HashSet();
        hashSet.addAll(RAW_TEXT_INDEX_COLUMNS);
        hashSet.addAll(DICT_TEXT_INDEX_COLUMNS);
        indexLoadingConfig.setTextIndexColumns(hashSet);
        indexLoadingConfig.setInvertedIndexColumns(new HashSet(DICT_TEXT_INDEX_COLUMNS));
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        hashMap2.put("useANDForMultiTermTextIndexQueries", "true");
        hashMap.put(SKILLS_TEXT_COL_MULTI_TERM_NAME, hashMap2);
        indexLoadingConfig.setColumnProperties(hashMap);
        IndexSegment load = ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), indexLoadingConfig);
        this._indexSegment = load;
        this._indexSegments = Arrays.asList(load, load);
    }

    @AfterClass
    public void tearDown() {
        this._indexSegment.destroy();
        FileUtils.deleteQuietly(INDEX_DIR);
    }

    private void buildSegment() throws Exception {
        List<GenericRow> createTestData = createTestData();
        ArrayList arrayList = new ArrayList(RAW_TEXT_INDEX_COLUMNS.size() + DICT_TEXT_INDEX_COLUMNS.size());
        Iterator<String> it = RAW_TEXT_INDEX_COLUMNS.iterator();
        while (it.hasNext()) {
            arrayList.add(new FieldConfig(it.next(), FieldConfig.EncodingType.RAW, FieldConfig.IndexType.TEXT, (FieldConfig.CompressionCodec) null, (Map) null));
        }
        Iterator<String> it2 = DICT_TEXT_INDEX_COLUMNS.iterator();
        while (it2.hasNext()) {
            arrayList.add(new FieldConfig(it2.next(), FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.TEXT, (FieldConfig.CompressionCodec) null, (Map) null));
        }
        SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setNoDictionaryColumns(RAW_TEXT_INDEX_COLUMNS).setInvertedIndexColumns(DICT_TEXT_INDEX_COLUMNS).setFieldConfigList(arrayList).build(), new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).addSingleValueDimension(QUERY_LOG_TEXT_COL_NAME, FieldSpec.DataType.STRING).addSingleValueDimension(SKILLS_TEXT_COL_NAME, FieldSpec.DataType.STRING).addSingleValueDimension(SKILLS_TEXT_COL_DICT_NAME, FieldSpec.DataType.STRING).addSingleValueDimension(SKILLS_TEXT_COL_MULTI_TERM_NAME, FieldSpec.DataType.STRING).addSingleValueDimension(SKILLS_TEXT_NO_RAW_NAME, FieldSpec.DataType.STRING).addMultiValueDimension(SKILLS_TEXT_MV_COL_NAME, FieldSpec.DataType.STRING).addMultiValueDimension(SKILLS_TEXT_MV_COL_DICT_NAME, FieldSpec.DataType.STRING).addMetric(INT_COL_NAME, FieldSpec.DataType.INT).build());
        segmentGeneratorConfig.setOutDir(INDEX_DIR.getPath());
        segmentGeneratorConfig.setTableName(TABLE_NAME);
        segmentGeneratorConfig.setSegmentName(SEGMENT_NAME);
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        hashMap2.put("noRawDataForTextIndex", "true");
        hashMap2.put("rawValueForTextIndex", "ILoveCoding");
        hashMap.put(SKILLS_TEXT_NO_RAW_NAME, hashMap2);
        segmentGeneratorConfig.setColumnProperties(hashMap);
        SegmentIndexCreationDriverImpl segmentIndexCreationDriverImpl = new SegmentIndexCreationDriverImpl();
        GenericRowRecordReader genericRowRecordReader = new GenericRowRecordReader(createTestData);
        try {
            segmentIndexCreationDriverImpl.init(segmentGeneratorConfig, genericRowRecordReader);
            segmentIndexCreationDriverImpl.build();
            genericRowRecordReader.close();
        } catch (Throwable th) {
            try {
                genericRowRecordReader.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }

    private List<GenericRow> createTestData() throws Exception {
        ArrayList arrayList = new ArrayList();
        String[] strArr = new String[24];
        ArrayList arrayList2 = new ArrayList();
        int i = 0;
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader((InputStream) Objects.requireNonNull(getClass().getClassLoader().getResourceAsStream("data/text_search_data/skills.txt"))));
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                int i2 = i;
                i++;
                strArr[i2] = readLine;
                arrayList2.add(StringUtils.splitByWholeSeparator(readLine, ", "));
            } finally {
            }
        }
        bufferedReader.close();
        Assert.assertEquals(i, 24);
        int i3 = 0;
        bufferedReader = new BufferedReader(new InputStreamReader((InputStream) Objects.requireNonNull(getClass().getClassLoader().getResourceAsStream("data/text_search_data/queries.txt"))));
        while (true) {
            try {
                String readLine2 = bufferedReader.readLine();
                if (readLine2 == null) {
                    bufferedReader.close();
                    Assert.assertEquals(i3, 24150);
                    return arrayList;
                }
                GenericRow genericRow = new GenericRow();
                genericRow.putValue(INT_COL_NAME, Integer.valueOf(INT_BASE_VALUE + i3));
                genericRow.putValue(QUERY_LOG_TEXT_COL_NAME, readLine2);
                if (i3 >= i) {
                    genericRow.putValue(SKILLS_TEXT_COL_NAME, "software engineering");
                    genericRow.putValue(SKILLS_TEXT_COL_DICT_NAME, "software engineering");
                    genericRow.putValue(SKILLS_TEXT_COL_MULTI_TERM_NAME, "software engineering");
                    genericRow.putValue(SKILLS_TEXT_NO_RAW_NAME, "software engineering");
                    genericRow.putValue(SKILLS_TEXT_MV_COL_NAME, new String[]{"software", "engineering"});
                    genericRow.putValue(SKILLS_TEXT_MV_COL_DICT_NAME, new String[]{"software", "engineering"});
                } else {
                    genericRow.putValue(SKILLS_TEXT_COL_NAME, strArr[i3]);
                    genericRow.putValue(SKILLS_TEXT_COL_DICT_NAME, strArr[i3]);
                    genericRow.putValue(SKILLS_TEXT_COL_MULTI_TERM_NAME, strArr[i3]);
                    genericRow.putValue(SKILLS_TEXT_NO_RAW_NAME, strArr[i3]);
                    genericRow.putValue(SKILLS_TEXT_MV_COL_NAME, arrayList2.get(i3));
                    genericRow.putValue(SKILLS_TEXT_MV_COL_DICT_NAME, arrayList2.get(i3));
                }
                arrayList.add(genericRow);
                i3++;
            } finally {
            }
        }
    }

    @Test
    public void testTextSearch() throws Exception {
        testTextSearchSelectQueryHelper("SELECT INT_COL, QUERY_LOG_TEXT_COL FROM MyTable WHERE TEXT_MATCH(QUERY_LOG_TEXT_COL, '\"SELECT dimensionCol2\"') LIMIT 50000", 11787, false, null);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(QUERY_LOG_TEXT_COL, '\"SELECT dimensionCol2\"')", 11787);
        testTextSearchSelectQueryHelper("SELECT INT_COL, QUERY_LOG_TEXT_COL FROM MyTable WHERE TEXT_MATCH(QUERY_LOG_TEXT_COL, '\"SELECT count\"') LIMIT 50000", 12363, false, null);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(QUERY_LOG_TEXT_COL, '\"SELECT count\"')", 12363);
        testTextSearchSelectQueryHelper("SELECT INT_COL, QUERY_LOG_TEXT_COL FROM MyTable WHERE TEXT_MATCH(QUERY_LOG_TEXT_COL, '\"GROUP BY\"') LIMIT 50000", 26, true, null);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(QUERY_LOG_TEXT_COL, '\"GROUP BY\"')", 26);
        ArrayList arrayList = new ArrayList();
        arrayList.add(new Object[]{1005, "Distributed systems, Java, C++, Go, distributed query engines for analytics and data warehouses, Machine learning, spark, Kubernetes, transaction processing"});
        arrayList.add(new Object[]{1009, "Distributed systems, database development, columnar query engine, database kernel, storage, indexing and transaction processing, building large scale systems"});
        arrayList.add(new Object[]{1010, "Distributed systems, Java, realtime streaming systems, Machine learning, spark, Kubernetes, distributed storage, concurrency, multi-threading"});
        arrayList.add(new Object[]{1012, "Distributed systems, Java, database engine, cluster management, docker image building and distribution"});
        arrayList.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        arrayList.add(new Object[]{1020, "Databases, columnar query processing, Apache Arrow, distributed systems, Machine learning, cluster management, docker image building and distribution"});
        testSkillsColumn("\"Distributed systems\"", arrayList);
        ArrayList arrayList2 = new ArrayList();
        arrayList2.add(new Object[]{1014, "Apache spark, Java, C++, query processing, transaction processing, distributed storage, concurrency, multi-threading, apache airflow"});
        arrayList2.add(new Object[]{1020, "Databases, columnar query processing, Apache Arrow, distributed systems, Machine learning, cluster management, docker image building and distribution"});
        testSkillsColumn("\"query processing\"", arrayList2);
        ArrayList arrayList3 = new ArrayList();
        arrayList3.add(new Object[]{1003, "Java, C++, worked on open source projects, coursera machine learning"});
        arrayList3.add(new Object[]{1004, "Machine learning, Tensor flow, Java, Stanford university,"});
        arrayList3.add(new Object[]{1005, "Distributed systems, Java, C++, Go, distributed query engines for analytics and data warehouses, Machine learning, spark, Kubernetes, transaction processing"});
        arrayList3.add(new Object[]{1006, "Java, Python, C++, Machine learning, building and deploying large scale production systems, concurrency, multi-threading, CPU processing"});
        arrayList3.add(new Object[]{1007, "C++, Python, Tensor flow, database kernel, storage, indexing and transaction processing, building large scale systems, Machine learning"});
        arrayList3.add(new Object[]{1010, "Distributed systems, Java, realtime streaming systems, Machine learning, spark, Kubernetes, distributed storage, concurrency, multi-threading"});
        arrayList3.add(new Object[]{1011, "CUDA, GPU, Python, Machine learning, database kernel, storage, indexing and transaction processing, building large scale systems"});
        arrayList3.add(new Object[]{1016, "CUDA, GPU processing, Tensor flow, Pandas, Python, Jupyter notebook, spark, Machine learning, building high performance scalable systems"});
        arrayList3.add(new Object[]{1019, "C++, Java, Python, realtime streaming systems, Machine learning, spark, Kubernetes, transaction processing, distributed storage, concurrency, multi-threading, apache airflow"});
        arrayList3.add(new Object[]{1020, "Databases, columnar query processing, Apache Arrow, distributed systems, Machine learning, cluster management, docker image building and distribution"});
        testSkillsColumn("\"Machine learning\"", arrayList3);
        ArrayList arrayList4 = new ArrayList();
        arrayList4.add(new Object[]{1004, "Machine learning, Tensor flow, Java, Stanford university,"});
        arrayList4.add(new Object[]{1007, "C++, Python, Tensor flow, database kernel, storage, indexing and transaction processing, building large scale systems, Machine learning"});
        arrayList4.add(new Object[]{1016, "CUDA, GPU processing, Tensor flow, Pandas, Python, Jupyter notebook, spark, Machine learning, building high performance scalable systems"});
        testSkillsColumn("\"Machine learning\" AND \"Tensor flow\"", arrayList4);
        ArrayList arrayList5 = new ArrayList();
        arrayList5.add(new Object[]{Integer.valueOf(INT_BASE_VALUE), "Accounts, Banking, Insurance, worked in NGO, Java"});
        arrayList5.add(new Object[]{1003, "Java, C++, worked on open source projects, coursera machine learning"});
        arrayList5.add(new Object[]{1004, "Machine learning, Tensor flow, Java, Stanford university,"});
        arrayList5.add(new Object[]{1005, "Distributed systems, Java, C++, Go, distributed query engines for analytics and data warehouses, Machine learning, spark, Kubernetes, transaction processing"});
        arrayList5.add(new Object[]{1006, "Java, Python, C++, Machine learning, building and deploying large scale production systems, concurrency, multi-threading, CPU processing"});
        arrayList5.add(new Object[]{1008, "Amazon EC2, AWS, hadoop, big data, spark, building high performance scalable systems, building and deploying large scale production systems, concurrency, multi-threading, Java, C++, CPU processing"});
        arrayList5.add(new Object[]{1010, "Distributed systems, Java, realtime streaming systems, Machine learning, spark, Kubernetes, distributed storage, concurrency, multi-threading"});
        arrayList5.add(new Object[]{1012, "Distributed systems, Java, database engine, cluster management, docker image building and distribution"});
        arrayList5.add(new Object[]{1014, "Apache spark, Java, C++, query processing, transaction processing, distributed storage, concurrency, multi-threading, apache airflow"});
        arrayList5.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        arrayList5.add(new Object[]{1018, "Realtime stream processing, publish subscribe, columnar processing for data warehouses, concurrency, Java, multi-threading, C++,"});
        arrayList5.add(new Object[]{1019, "C++, Java, Python, realtime streaming systems, Machine learning, spark, Kubernetes, transaction processing, distributed storage, concurrency, multi-threading, apache airflow"});
        testSkillsColumn("Java", arrayList5);
        ArrayList arrayList6 = new ArrayList();
        arrayList6.add(new Object[]{1003, "Java, C++, worked on open source projects, coursera machine learning"});
        arrayList6.add(new Object[]{1005, "Distributed systems, Java, C++, Go, distributed query engines for analytics and data warehouses, Machine learning, spark, Kubernetes, transaction processing"});
        arrayList6.add(new Object[]{1006, "Java, Python, C++, Machine learning, building and deploying large scale production systems, concurrency, multi-threading, CPU processing"});
        arrayList6.add(new Object[]{1008, "Amazon EC2, AWS, hadoop, big data, spark, building high performance scalable systems, building and deploying large scale production systems, concurrency, multi-threading, Java, C++, CPU processing"});
        arrayList6.add(new Object[]{1014, "Apache spark, Java, C++, query processing, transaction processing, distributed storage, concurrency, multi-threading, apache airflow"});
        arrayList6.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        arrayList6.add(new Object[]{1018, "Realtime stream processing, publish subscribe, columnar processing for data warehouses, concurrency, Java, multi-threading, C++,"});
        arrayList6.add(new Object[]{1019, "C++, Java, Python, realtime streaming systems, Machine learning, spark, Kubernetes, transaction processing, distributed storage, concurrency, multi-threading, apache airflow"});
        testSkillsColumn("Java AND C++", arrayList6);
        ArrayList arrayList7 = new ArrayList();
        arrayList7.add(new Object[]{1003, "Java, C++, worked on open source projects, coursera machine learning"});
        arrayList7.add(new Object[]{1005, "Distributed systems, Java, C++, Go, distributed query engines for analytics and data warehouses, Machine learning, spark, Kubernetes, transaction processing"});
        arrayList7.add(new Object[]{1008, "Amazon EC2, AWS, hadoop, big data, spark, building high performance scalable systems, building and deploying large scale production systems, concurrency, multi-threading, Java, C++, CPU processing"});
        arrayList7.add(new Object[]{1014, "Apache spark, Java, C++, query processing, transaction processing, distributed storage, concurrency, multi-threading, apache airflow"});
        testSkillsColumn("\"Java C++\"", arrayList7);
        ArrayList arrayList8 = new ArrayList();
        arrayList8.add(new Object[]{1016, "CUDA, GPU processing, Tensor flow, Pandas, Python, Jupyter notebook, spark, Machine learning, building high performance scalable systems"});
        testSkillsColumn("\"Machine learning\" AND \"gpu processing\"", arrayList8);
        ArrayList arrayList9 = new ArrayList();
        arrayList9.add(new Object[]{1011, "CUDA, GPU, Python, Machine learning, database kernel, storage, indexing and transaction processing, building large scale systems"});
        arrayList9.add(new Object[]{1016, "CUDA, GPU processing, Tensor flow, Pandas, Python, Jupyter notebook, spark, Machine learning, building high performance scalable systems"});
        testSkillsColumn("\"Machine learning\" AND gpu", arrayList9);
        ArrayList arrayList10 = new ArrayList();
        arrayList10.add(new Object[]{1011, "CUDA, GPU, Python, Machine learning, database kernel, storage, indexing and transaction processing, building large scale systems"});
        arrayList10.add(new Object[]{1016, "CUDA, GPU processing, Tensor flow, Pandas, Python, Jupyter notebook, spark, Machine learning, building high performance scalable systems"});
        testSkillsColumn("\"Machine learning\" AND gpu AND python", arrayList10);
        ArrayList arrayList11 = new ArrayList();
        arrayList11.add(new Object[]{1013, "Kubernetes, cluster management, operating systems, concurrency, multi-threading, apache airflow, Apache Spark,"});
        arrayList11.add(new Object[]{1014, "Apache spark, Java, C++, query processing, transaction processing, distributed storage, concurrency, multi-threading, apache airflow"});
        arrayList11.add(new Object[]{1015, "Big data stream processing, Apache Flink, Apache Beam, database kernel, distributed query engines for analytics and data warehouses"});
        arrayList11.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        arrayList11.add(new Object[]{1019, "C++, Java, Python, realtime streaming systems, Machine learning, spark, Kubernetes, transaction processing, distributed storage, concurrency, multi-threading, apache airflow"});
        arrayList11.add(new Object[]{1020, "Databases, columnar query processing, Apache Arrow, distributed systems, Machine learning, cluster management, docker image building and distribution"});
        testSkillsColumn("apache", arrayList11);
        ArrayList arrayList12 = new ArrayList();
        arrayList12.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        arrayList12.add(new Object[]{1020, "Databases, columnar query processing, Apache Arrow, distributed systems, Machine learning, cluster management, docker image building and distribution"});
        testSkillsColumn("\"distributed systems\" AND apache", arrayList12);
        ArrayList arrayList13 = new ArrayList();
        arrayList13.add(new Object[]{1007, "C++, Python, Tensor flow, database kernel, storage, indexing and transaction processing, building large scale systems, Machine learning"});
        arrayList13.add(new Object[]{1009, "Distributed systems, database development, columnar query engine, database kernel, storage, indexing and transaction processing, building large scale systems"});
        arrayList13.add(new Object[]{1011, "CUDA, GPU, Python, Machine learning, database kernel, storage, indexing and transaction processing, building large scale systems"});
        arrayList13.add(new Object[]{1012, "Distributed systems, Java, database engine, cluster management, docker image building and distribution"});
        arrayList13.add(new Object[]{1015, "Big data stream processing, Apache Flink, Apache Beam, database kernel, distributed query engines for analytics and data warehouses"});
        arrayList13.add(new Object[]{1021, "Database engine, OLAP systems, OLTP transaction processing at large scale, concurrency, multi-threading, GO, building large scale systems"});
        testSkillsColumn("database", arrayList13);
        ArrayList arrayList14 = new ArrayList();
        arrayList14.add(new Object[]{1012, "Distributed systems, Java, database engine, cluster management, docker image building and distribution"});
        arrayList14.add(new Object[]{1021, "Database engine, OLAP systems, OLTP transaction processing at large scale, concurrency, multi-threading, GO, building large scale systems"});
        testSkillsColumn("\"database engine\"", arrayList14);
        ArrayList arrayList15 = new ArrayList();
        arrayList15.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        arrayList15.add(new Object[]{1018, "Realtime stream processing, publish subscribe, columnar processing for data warehouses, concurrency, Java, multi-threading, C++,"});
        testSkillsColumn("\"publish subscribe\"", arrayList15);
        ArrayList arrayList16 = new ArrayList();
        arrayList16.add(new Object[]{Integer.valueOf(INT_BASE_VALUE), "Accounts, Banking, Insurance, worked in NGO, Java"});
        testSkillsColumn("\"accounts banking insurance\"", arrayList16);
        ArrayList arrayList17 = new ArrayList();
        arrayList17.add(new Object[]{Integer.valueOf(INT_BASE_VALUE), "Accounts, Banking, Insurance, worked in NGO, Java"});
        arrayList17.add(new Object[]{1001, "Accounts, Banking, Finance, Insurance"});
        arrayList17.add(new Object[]{1002, "Accounts, Finance, Banking, Insurance"});
        testSkillsColumn("accounts AND banking AND insurance", arrayList17);
        ArrayList arrayList18 = new ArrayList();
        arrayList18.add(new Object[]{1005, "Distributed systems, Java, C++, Go, distributed query engines for analytics and data warehouses, Machine learning, spark, Kubernetes, transaction processing"});
        arrayList18.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        testSkillsColumn("\"distributed systems\" AND Java AND C++", arrayList18);
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_1, '\"distributed systems\" Java C++') LIMIT 50000", arrayList18.size(), false, arrayList18);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_1, '\"distributed systems\" Java C++') LIMIT 50000", arrayList18.size());
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_1, '\"distributed systems\" AND Java AND C++') LIMIT 50000", arrayList18.size(), false, arrayList18);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_1, '\"distributed systems\" AND Java AND C++') LIMIT 50000", arrayList18.size());
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_2, '\"distributed systems\" AND Java AND C++') LIMIT 50000", arrayList18.size());
        ArrayList arrayList19 = new ArrayList();
        arrayList19.add(new Object[]{1005, "ILoveCoding"});
        arrayList19.add(new Object[]{1017, "ILoveCoding"});
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL_2 FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_2, '\"distributed systems\" AND Java AND C++') LIMIT 50000", arrayList19.size(), false, arrayList19);
        ArrayList arrayList20 = new ArrayList();
        arrayList20.add(new Object[]{Integer.valueOf(INT_BASE_VALUE), "Accounts, Banking, Insurance, worked in NGO, Java"});
        arrayList20.add(new Object[]{1003, "Java, C++, worked on open source projects, coursera machine learning"});
        arrayList20.add(new Object[]{1004, "Machine learning, Tensor flow, Java, Stanford university,"});
        arrayList20.add(new Object[]{1005, "Distributed systems, Java, C++, Go, distributed query engines for analytics and data warehouses, Machine learning, spark, Kubernetes, transaction processing"});
        arrayList20.add(new Object[]{1006, "Java, Python, C++, Machine learning, building and deploying large scale production systems, concurrency, multi-threading, CPU processing"});
        arrayList20.add(new Object[]{1007, "C++, Python, Tensor flow, database kernel, storage, indexing and transaction processing, building large scale systems, Machine learning"});
        arrayList20.add(new Object[]{1008, "Amazon EC2, AWS, hadoop, big data, spark, building high performance scalable systems, building and deploying large scale production systems, concurrency, multi-threading, Java, C++, CPU processing"});
        arrayList20.add(new Object[]{1009, "Distributed systems, database development, columnar query engine, database kernel, storage, indexing and transaction processing, building large scale systems"});
        arrayList20.add(new Object[]{1010, "Distributed systems, Java, realtime streaming systems, Machine learning, spark, Kubernetes, distributed storage, concurrency, multi-threading"});
        arrayList20.add(new Object[]{1012, "Distributed systems, Java, database engine, cluster management, docker image building and distribution"});
        arrayList20.add(new Object[]{1014, "Apache spark, Java, C++, query processing, transaction processing, distributed storage, concurrency, multi-threading, apache airflow"});
        arrayList20.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        arrayList20.add(new Object[]{1018, "Realtime stream processing, publish subscribe, columnar processing for data warehouses, concurrency, Java, multi-threading, C++,"});
        arrayList20.add(new Object[]{1019, "C++, Java, Python, realtime streaming systems, Machine learning, spark, Kubernetes, transaction processing, distributed storage, concurrency, multi-threading, apache airflow"});
        arrayList20.add(new Object[]{1020, "Databases, columnar query processing, Apache Arrow, distributed systems, Machine learning, cluster management, docker image building and distribution"});
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '\"distributed systems\" Java C++') LIMIT 50000", arrayList20.size(), false, arrayList20);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '\"distributed systems\" Java C++') LIMIT 50000", arrayList20.size());
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_1, '\"distributed systems\" OR Java OR C++') LIMIT 50000", arrayList20.size(), false, arrayList20);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_1, '\"distributed systems\" OR Java OR C++') LIMIT 50000", arrayList20.size());
        ArrayList arrayList21 = new ArrayList();
        arrayList21.add(new Object[]{1005, "Distributed systems, Java, C++, Go, distributed query engines for analytics and data warehouses, Machine learning, spark, Kubernetes, transaction processing"});
        arrayList21.add(new Object[]{1010, "Distributed systems, Java, realtime streaming systems, Machine learning, spark, Kubernetes, distributed storage, concurrency, multi-threading"});
        arrayList21.add(new Object[]{1012, "Distributed systems, Java, database engine, cluster management, docker image building and distribution"});
        arrayList21.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '\"distributed systems\" AND (Java C++)') LIMIT 50000", arrayList21.size(), false, arrayList21);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '\"distributed systems\" AND (Java C++)') LIMIT 50000", arrayList21.size());
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_1, '\"distributed systems\" AND (Java OR C++)') LIMIT 50000", arrayList21.size(), false, arrayList21);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_1, '\"distributed systems\" AND (Java OR C++)') LIMIT 50000", arrayList21.size());
        ArrayList arrayList22 = new ArrayList();
        arrayList22.add(new Object[]{1005, "Distributed systems, Java, C++, Go, distributed query engines for analytics and data warehouses, Machine learning, spark, Kubernetes, transaction processing"});
        arrayList22.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_1, '\"distributed systems\" AND (Java C++)') LIMIT 50000", arrayList22.size(), false, arrayList22);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_1, '\"distributed systems\" AND (Java C++)') LIMIT 50000", arrayList22.size());
        ArrayList arrayList23 = new ArrayList();
        arrayList23.add(new Object[]{1010, "Distributed systems, Java, realtime streaming systems, Machine learning, spark, Kubernetes, distributed storage, concurrency, multi-threading"});
        arrayList23.add(new Object[]{1015, "Big data stream processing, Apache Flink, Apache Beam, database kernel, distributed query engines for analytics and data warehouses"});
        arrayList23.add(new Object[]{1018, "Realtime stream processing, publish subscribe, columnar processing for data warehouses, concurrency, Java, multi-threading, C++,"});
        arrayList23.add(new Object[]{1019, "C++, Java, Python, realtime streaming systems, Machine learning, spark, Kubernetes, transaction processing, distributed storage, concurrency, multi-threading, apache airflow"});
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, 'stream*') LIMIT 50000", arrayList23.size(), false, arrayList23);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, 'stream*') LIMIT 50000", arrayList23.size());
        ArrayList arrayList24 = new ArrayList();
        arrayList24.add(new Object[]{1022, "GET /administrator/ HTTP/1.1 200 4263 - Mozilla/5.0 (Windows NT 6.0; rv:34.0) Gecko/20100101 Firefox/34.0 - NullPointerException"});
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '/.*exception/') LIMIT 50000", arrayList24.size(), false, arrayList24);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '/.*exception/') LIMIT 50000", arrayList24.size());
    }

    @Test
    public void testTextSearchWithAdditionalFilter() throws Exception {
        ArrayList arrayList = new ArrayList();
        arrayList.add(new Object[]{1010, "Distributed systems, Java, realtime streaming systems, Machine learning, spark, Kubernetes, distributed storage, concurrency, multi-threading"});
        arrayList.add(new Object[]{1012, "Distributed systems, Java, database engine, cluster management, docker image building and distribution"});
        arrayList.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        arrayList.add(new Object[]{1020, "Databases, columnar query processing, Apache Arrow, distributed systems, Machine learning, cluster management, docker image building and distribution"});
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE INT_COL >= 1010 AND TEXT_MATCH(SKILLS_TEXT_COL, '\"Distributed systems\"') LIMIT 50000", arrayList.size(), false, arrayList);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE INT_COL >= 1010 AND TEXT_MATCH(SKILLS_TEXT_COL, '\"distributed systems\"') LIMIT 50000", arrayList.size());
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE INT_COL >= 1010 AND TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"Distributed systems\"') LIMIT 50000", arrayList.size(), false, arrayList);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE INT_COL >= 1010 AND TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"distributed systems\"') LIMIT 50000", arrayList.size());
        ArrayList arrayList2 = new ArrayList();
        arrayList2.add(new Object[]{1005, "Distributed systems, Java, C++, Go, distributed query engines for analytics and data warehouses, Machine learning, spark, Kubernetes, transaction processing"});
        arrayList2.add(new Object[]{1009, "Distributed systems, database development, columnar query engine, database kernel, storage, indexing and transaction processing, building large scale systems"});
        arrayList2.add(new Object[]{1010, "Distributed systems, Java, realtime streaming systems, Machine learning, spark, Kubernetes, distributed storage, concurrency, multi-threading"});
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE INT_COL <= 1010 AND TEXT_MATCH(SKILLS_TEXT_COL, '\"Distributed systems\"') LIMIT 50000", arrayList2.size(), false, arrayList2);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE INT_COL <= 1010 AND TEXT_MATCH(SKILLS_TEXT_COL, '\"distributed systems\"') LIMIT 50000", arrayList2.size());
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE INT_COL <= 1010 AND TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"Distributed systems\"') LIMIT 50000", arrayList2.size(), false, arrayList2);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE INT_COL <= 1010 AND TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"distributed systems\"') LIMIT 50000", arrayList2.size());
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE INT_COL >= 1010 OR TEXT_MATCH(SKILLS_TEXT_COL, '\"distributed systems\"') LIMIT 50000", 24142);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE INT_COL >= 1010 OR TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"distributed systems\"') LIMIT 50000", 24142);
        ArrayList arrayList3 = new ArrayList();
        arrayList3.add(new Object[]{Integer.valueOf(INT_BASE_VALUE), "Accounts, Banking, Insurance, worked in NGO, Java"});
        arrayList3.add(new Object[]{1001, "Accounts, Banking, Finance, Insurance"});
        arrayList3.add(new Object[]{1002, "Accounts, Finance, Banking, Insurance"});
        arrayList3.add(new Object[]{1003, "Java, C++, worked on open source projects, coursera machine learning"});
        arrayList3.add(new Object[]{1004, "Machine learning, Tensor flow, Java, Stanford university,"});
        arrayList3.add(new Object[]{1005, "Distributed systems, Java, C++, Go, distributed query engines for analytics and data warehouses, Machine learning, spark, Kubernetes, transaction processing"});
        arrayList3.add(new Object[]{1006, "Java, Python, C++, Machine learning, building and deploying large scale production systems, concurrency, multi-threading, CPU processing"});
        arrayList3.add(new Object[]{1007, "C++, Python, Tensor flow, database kernel, storage, indexing and transaction processing, building large scale systems, Machine learning"});
        arrayList3.add(new Object[]{1008, "Amazon EC2, AWS, hadoop, big data, spark, building high performance scalable systems, building and deploying large scale production systems, concurrency, multi-threading, Java, C++, CPU processing"});
        arrayList3.add(new Object[]{1009, "Distributed systems, database development, columnar query engine, database kernel, storage, indexing and transaction processing, building large scale systems"});
        arrayList3.add(new Object[]{1010, "Distributed systems, Java, realtime streaming systems, Machine learning, spark, Kubernetes, distributed storage, concurrency, multi-threading"});
        arrayList3.add(new Object[]{1012, "Distributed systems, Java, database engine, cluster management, docker image building and distribution"});
        arrayList3.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        arrayList3.add(new Object[]{1020, "Databases, columnar query processing, Apache Arrow, distributed systems, Machine learning, cluster management, docker image building and distribution"});
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE INT_COL <= 1010 OR TEXT_MATCH(SKILLS_TEXT_COL, '\"Distributed systems\"') LIMIT 50000", arrayList3.size(), false, arrayList3);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE INT_COL <= 1010 OR TEXT_MATCH(SKILLS_TEXT_COL, '\"distributed systems\"') LIMIT 50000", arrayList3.size());
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE INT_COL <= 1010 OR TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"Distributed systems\"') LIMIT 50000", arrayList3.size(), false, arrayList3);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE INT_COL <= 1010 OR TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"distributed systems\"') LIMIT 50000", arrayList3.size());
        ArrayList arrayList4 = new ArrayList();
        arrayList4.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE INT_COL = 1017 AND TEXT_MATCH(SKILLS_TEXT_COL, '\"Distributed systems\"') LIMIT 50000", arrayList4.size(), false, arrayList4);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE INT_COL = 1017 AND TEXT_MATCH(SKILLS_TEXT_COL, '\"distributed systems\"') LIMIT 50000", arrayList4.size());
        testTextSearchSelectQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE INT_COL = 1017 AND TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"Distributed systems\"') LIMIT 50000", arrayList4.size(), false, arrayList4);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE INT_COL = 1017 AND TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"distributed systems\"') LIMIT 50000", arrayList4.size());
        ArrayList arrayList5 = new ArrayList();
        arrayList5.add(new Object[]{1005});
        arrayList5.add(new Object[]{1009});
        arrayList5.add(new Object[]{1010});
        arrayList5.add(new Object[]{1012});
        arrayList5.add(new Object[]{1017});
        arrayList5.add(new Object[]{1020});
        testTextSearchSelectQueryHelper("SELECT INT_COL FROM MyTable WHERE INT_COL = 1017 OR TEXT_MATCH(SKILLS_TEXT_COL, '\"Distributed systems\"') LIMIT 50000", arrayList5.size(), false, arrayList5);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE INT_COL = 1017 OR TEXT_MATCH(SKILLS_TEXT_COL, '\"distributed systems\"') LIMIT 50000", arrayList5.size());
        testTextSearchSelectQueryHelper("SELECT INT_COL FROM MyTable WHERE INT_COL = 1017 OR TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"Distributed systems\"') LIMIT 50000", arrayList5.size(), false, arrayList5);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE INT_COL = 1017 OR TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"distributed systems\"') LIMIT 50000", arrayList5.size());
        ArrayList arrayList6 = new ArrayList();
        arrayList6.add(new Object[]{1005});
        arrayList6.add(new Object[]{1009});
        arrayList6.add(new Object[]{1010});
        arrayList6.add(new Object[]{1012});
        arrayList6.add(new Object[]{1017});
        arrayList6.add(new Object[]{1020});
        testTextSearchSelectQueryHelper("SELECT INT_COL FROM MyTable WHERE TEXT_MATCH(QUERY_LOG_TEXT_COL, '\"SELECT count\"') AND TEXT_MATCH(SKILLS_TEXT_COL, '\"Distributed systems\"') LIMIT 50000", arrayList6.size(), false, arrayList6);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(QUERY_LOG_TEXT_COL, '\"SELECT count\"') AND TEXT_MATCH(SKILLS_TEXT_COL, '\"Distributed systems\"') LIMIT 50000", arrayList6.size());
        ArrayList arrayList7 = new ArrayList();
        arrayList7.add(new Object[]{1005});
        arrayList7.add(new Object[]{1009});
        arrayList7.add(new Object[]{1010});
        arrayList7.add(new Object[]{1012});
        arrayList7.add(new Object[]{1013});
        arrayList7.add(new Object[]{1014});
        arrayList7.add(new Object[]{1015});
        arrayList7.add(new Object[]{1017});
        arrayList7.add(new Object[]{1019});
        arrayList7.add(new Object[]{1020});
        testTextSearchSelectQueryHelper("SELECT INT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, 'apache') OR TEXT_MATCH(SKILLS_TEXT_COL, '\"Distributed systems\"') LIMIT 50000", arrayList7.size(), false, arrayList7);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, 'apache') OR TEXT_MATCH(SKILLS_TEXT_COL, '\"Distributed systems\"') LIMIT 50000", arrayList7.size());
        ArrayList arrayList8 = new ArrayList();
        arrayList8.add(new Object[]{1004});
        testTextSearchSelectQueryHelper("SELECT INT_COL FROM MyTable WHERE SKILLS_TEXT_COL_DICT = 'Machine learning, Tensor flow, Java, Stanford university,' LIMIT 50000", arrayList8.size(), false, arrayList8);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE SKILLS_TEXT_COL_DICT = 'Machine learning, Tensor flow, Java, Stanford university,' LIMIT 50000", arrayList8.size());
        ArrayList arrayList9 = new ArrayList();
        arrayList9.add(new Object[]{1003});
        arrayList9.add(new Object[]{1004});
        arrayList9.add(new Object[]{1005});
        arrayList9.add(new Object[]{1006});
        arrayList9.add(new Object[]{1007});
        arrayList9.add(new Object[]{1010});
        arrayList9.add(new Object[]{1011});
        arrayList9.add(new Object[]{1016});
        arrayList9.add(new Object[]{1019});
        arrayList9.add(new Object[]{1020});
        testTextSearchSelectQueryHelper("SELECT INT_COL FROM MyTable WHERE SKILLS_TEXT_COL_DICT = 'Machine learning, Tensor flow, Java, Stanford university,' OR TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"machine learning\"') LIMIT 50000", arrayList9.size(), false, arrayList9);
        testTextSearchAggregationQueryHelper("SELECT COUNT(*) FROM MyTable WHERE SKILLS_TEXT_COL_DICT = 'Machine learning, Tensor flow, Java, Stanford university,' OR TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"machine learning\"') LIMIT 50000", arrayList9.size());
    }

    @Test
    public void testLuceneRealtimeWithSearcherManager() throws Exception {
        FSDirectory open = FSDirectory.open(new File(INDEX_DIR.getPath() + "/realtime-test1.index").toPath());
        StandardAnalyzer standardAnalyzer = new StandardAnalyzer();
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(standardAnalyzer);
        indexWriterConfig.setRAMBufferSizeMB(500.0d);
        IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
        SearcherManager searcherManager = new SearcherManager(indexWriter, false, false, (SearcherFactory) null);
        Query parse = new QueryParser("skill", standardAnalyzer).parse("\"machine learning\"");
        IndexSearcher indexSearcher = (IndexSearcher) searcherManager.acquire();
        Assert.assertEquals(2, indexSearcher.getIndexReader().getRefCount());
        Assert.assertEquals(0, indexSearcher.search(parse, 100).scoreDocs.length);
        IndexSearcher indexSearcher2 = (IndexSearcher) searcherManager.acquire();
        Assert.assertEquals(3, indexSearcher2.getIndexReader().getRefCount());
        Assert.assertEquals(0, indexSearcher2.search(parse, 100).scoreDocs.length);
        Assert.assertEquals(indexSearcher, indexSearcher2);
        Assert.assertEquals(3, indexSearcher.getIndexReader().getRefCount());
        Assert.assertEquals(0, indexSearcher.search(parse, 100).scoreDocs.length);
        Document document = new Document();
        document.add(new TextField("skill", "machine learning", Field.Store.NO));
        indexWriter.addDocument(document);
        searcherManager.maybeRefresh();
        IndexSearcher indexSearcher3 = (IndexSearcher) searcherManager.acquire();
        Assert.assertEquals(2, indexSearcher3.getIndexReader().getRefCount());
        Assert.assertEquals(1, indexSearcher3.search(parse, 100).scoreDocs.length);
        Assert.assertNotEquals(indexSearcher2, indexSearcher3);
        Assert.assertEquals(2, indexSearcher.getIndexReader().getRefCount());
        Assert.assertEquals(0, indexSearcher.search(parse, 100).scoreDocs.length);
        Assert.assertEquals(2, indexSearcher2.getIndexReader().getRefCount());
        Assert.assertEquals(0, indexSearcher2.search(parse, 100).scoreDocs.length);
        searcherManager.release(indexSearcher);
        Assert.assertEquals(1, indexSearcher.getIndexReader().getRefCount());
        Assert.assertEquals(1, indexSearcher2.getIndexReader().getRefCount());
        Assert.assertEquals(2, indexSearcher3.getIndexReader().getRefCount());
        Assert.assertEquals(1, indexSearcher3.search(parse, 100).scoreDocs.length);
        searcherManager.release(indexSearcher2);
        Assert.assertEquals(0, indexSearcher.getIndexReader().getRefCount());
        Assert.assertEquals(0, indexSearcher2.getIndexReader().getRefCount());
        Assert.assertEquals(2, indexSearcher3.getIndexReader().getRefCount());
        Assert.assertEquals(1, indexSearcher3.search(parse, 100).scoreDocs.length);
        Document document2 = new Document();
        document2.add(new TextField("skill", "java, machine learning", Field.Store.NO));
        indexWriter.addDocument(document2);
        Assert.assertEquals(2, indexSearcher3.getIndexReader().getRefCount());
        Assert.assertEquals(1, indexSearcher3.search(parse, 100).scoreDocs.length);
        searcherManager.maybeRefresh();
        Assert.assertEquals(1, indexSearcher3.getIndexReader().getRefCount());
        Assert.assertEquals(1, indexSearcher3.search(parse, 100).scoreDocs.length);
        IndexSearcher indexSearcher4 = (IndexSearcher) searcherManager.acquire();
        Assert.assertEquals(2, indexSearcher4.getIndexReader().getRefCount());
        Assert.assertEquals(2, indexSearcher4.search(parse, 100).scoreDocs.length);
        Assert.assertEquals(1, indexSearcher3.getIndexReader().getRefCount());
        Assert.assertEquals(1, indexSearcher3.search(parse, 100).scoreDocs.length);
        searcherManager.release(indexSearcher3);
        Assert.assertEquals(0, indexSearcher.getIndexReader().getRefCount());
        Assert.assertEquals(0, indexSearcher2.getIndexReader().getRefCount());
        Assert.assertEquals(0, indexSearcher3.getIndexReader().getRefCount());
        Assert.assertEquals(2, indexSearcher4.getIndexReader().getRefCount());
        Assert.assertEquals(2, indexSearcher4.search(parse, 100).scoreDocs.length);
        searcherManager.maybeRefresh();
        IndexSearcher indexSearcher5 = (IndexSearcher) searcherManager.acquire();
        Assert.assertEquals(3, indexSearcher4.getIndexReader().getRefCount());
        Assert.assertEquals(2, indexSearcher4.search(parse, 100).scoreDocs.length);
        Assert.assertEquals(3, indexSearcher5.getIndexReader().getRefCount());
        Assert.assertEquals(2, indexSearcher5.search(parse, 100).scoreDocs.length);
        Assert.assertEquals(indexSearcher4, indexSearcher5);
        searcherManager.release(indexSearcher4);
        Assert.assertEquals(0, indexSearcher.getIndexReader().getRefCount());
        Assert.assertEquals(0, indexSearcher2.getIndexReader().getRefCount());
        Assert.assertEquals(0, indexSearcher3.getIndexReader().getRefCount());
        Assert.assertEquals(2, indexSearcher4.getIndexReader().getRefCount());
        Assert.assertEquals(2, indexSearcher4.search(parse, 100).scoreDocs.length);
        Assert.assertEquals(2, indexSearcher5.getIndexReader().getRefCount());
        Assert.assertEquals(2, indexSearcher5.search(parse, 100).scoreDocs.length);
        searcherManager.release(indexSearcher5);
        Assert.assertEquals(0, indexSearcher.getIndexReader().getRefCount());
        Assert.assertEquals(0, indexSearcher2.getIndexReader().getRefCount());
        Assert.assertEquals(0, indexSearcher3.getIndexReader().getRefCount());
        Assert.assertEquals(1, indexSearcher4.getIndexReader().getRefCount());
        Assert.assertEquals(2, indexSearcher4.search(parse, 100).scoreDocs.length);
        Assert.assertEquals(1, indexSearcher5.getIndexReader().getRefCount());
        Assert.assertEquals(2, indexSearcher5.search(parse, 100).scoreDocs.length);
        searcherManager.close();
        Assert.assertEquals(0, indexSearcher4.getIndexReader().getRefCount());
        Assert.assertEquals(0, indexSearcher5.getIndexReader().getRefCount());
        indexWriter.close();
    }

    @Test
    public void testLuceneRealtimeWithoutSearcherManager() throws Exception {
        FSDirectory open = FSDirectory.open(new File(INDEX_DIR.getPath() + "/realtime-test2.index").toPath());
        StandardAnalyzer standardAnalyzer = new StandardAnalyzer();
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(standardAnalyzer);
        indexWriterConfig.setRAMBufferSizeMB(50.0d);
        IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
        Document document = new Document();
        document.add(new TextField("skill", "distributed systems, machine learning, JAVA, C++", Field.Store.NO));
        indexWriter.addDocument(document);
        Query parse = new QueryParser("skill", standardAnalyzer).parse("\"distributed systems\" AND (Java C++)");
        DirectoryReader open2 = DirectoryReader.open(indexWriter);
        Assert.assertEquals(1, new IndexSearcher(open2).search(parse, 50).scoreDocs.length);
        Document document2 = new Document();
        document2.add(new TextField("skill", "distributed systems, python, JAVA, C++", Field.Store.NO));
        indexWriter.addDocument(document2);
        DirectoryReader openIfChanged = DirectoryReader.openIfChanged(open2);
        Assert.assertNotNull(openIfChanged);
        Assert.assertEquals(2, new IndexSearcher(openIfChanged).search(parse, 50).scoreDocs.length);
        Document document3 = new Document();
        document3.add(new TextField("skill", "distributed systems, GPU, JAVA, C++", Field.Store.NO));
        indexWriter.addDocument(document3);
        DirectoryReader openIfChanged2 = DirectoryReader.openIfChanged(openIfChanged);
        Assert.assertNotNull(openIfChanged2);
        Assert.assertEquals(3, new IndexSearcher(openIfChanged2).search(parse, 50).scoreDocs.length);
        indexWriter.close();
        open2.close();
        openIfChanged.close();
        openIfChanged2.close();
    }

    @Test
    public void testMultiThreadedLuceneRealtime() throws Exception {
        FSDirectory open = FSDirectory.open(new File(INDEX_DIR.getPath() + "/realtime-test3.index").toPath());
        StandardAnalyzer standardAnalyzer = new StandardAnalyzer();
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(standardAnalyzer);
        indexWriterConfig.setRAMBufferSizeMB(500.0d);
        IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
        SearcherManager searcherManager = new SearcherManager(indexWriter, false, false, (SearcherFactory) null);
        ControlledRealTimeReopenThread controlledRealTimeReopenThread = new ControlledRealTimeReopenThread(indexWriter, searcherManager, 0.01d, 0.01d);
        controlledRealTimeReopenThread.start();
        Thread thread = new Thread(new RealtimeWriter(indexWriter));
        Thread thread2 = new Thread(new RealtimeReader(searcherManager, standardAnalyzer));
        thread.start();
        thread2.start();
        thread.join();
        thread2.join();
        controlledRealTimeReopenThread.join();
    }

    private void testTextSearchSelectQueryHelper(String str, int i, boolean z, List<Object[]> list) throws Exception {
        List<Object[]> list2 = (List) getOperator(str).nextBlock().getSelectionResult();
        Assert.assertNotNull(list2);
        Assert.assertEquals(list2.size(), i);
        if (z) {
            verifySearchOutputWithGrepResults(list2);
            return;
        }
        if (list != null) {
            for (int i2 = 0; i2 < i; i2++) {
                Object[] objArr = list2.get(i2);
                Object[] objArr2 = list.get(i2);
                Assert.assertEquals(objArr.length, objArr2.length);
                for (int i3 = 0; i3 < objArr.length; i3++) {
                    Assert.assertEquals(objArr[i3], objArr2[i3]);
                }
            }
        }
    }

    private void verifySearchOutputWithGrepResults(List<Object[]> list) throws Exception {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(getClass().getClassLoader().getResource("data/text_search_data/group_by_grep_results.out").getFile()))));
        int i = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            String[] split = readLine.split(":");
            Object[] objArr = list.get(i);
            Assert.assertEquals(Integer.valueOf((Integer.valueOf(split[0]).intValue() + INT_BASE_VALUE) - 1), objArr[0]);
            Assert.assertEquals(split[1], objArr[1]);
            i++;
        }
    }

    private void testTextSearchAggregationQueryHelper(String str, int i) {
        Assert.assertEquals(i, ((Long) getOperator(str).nextBlock().getAggregationResult().get(0)).longValue());
    }

    private void testSkillsColumn(String str, List<Object[]> list) throws Exception {
        for (String str2 : Arrays.asList(SKILLS_TEXT_COL_NAME, SKILLS_TEXT_COL_DICT_NAME, SKILLS_TEXT_COL_MULTI_TERM_NAME, SKILLS_TEXT_NO_RAW_NAME, SKILLS_TEXT_MV_COL_NAME, SKILLS_TEXT_MV_COL_DICT_NAME)) {
            testTextSearchSelectQueryHelper(String.format("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(%s, '%s') LIMIT 50000", str2, str), list.size(), false, list);
            testTextSearchAggregationQueryHelper(String.format("SELECT COUNT(*) FROM MyTable WHERE TEXT_MATCH(%s, '%s') LIMIT 50000", str2, str), list.size());
        }
    }

    @Test
    public void testInterSegment() {
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '\"Machine learning\" AND \"Tensor flow\"')", 12L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"Machine learning\" AND \"Tensor flow\"')", 12L);
        ArrayList arrayList = new ArrayList();
        arrayList.add(new Object[]{1004, "Machine learning, Tensor flow, Java, Stanford university,"});
        arrayList.add(new Object[]{1007, "C++, Python, Tensor flow, database kernel, storage, indexing and transaction processing, building large scale systems, Machine learning"});
        arrayList.add(new Object[]{1016, "CUDA, GPU processing, Tensor flow, Pandas, Python, Jupyter notebook, spark, Machine learning, building high performance scalable systems"});
        arrayList.add(new Object[]{1004, "Machine learning, Tensor flow, Java, Stanford university,"});
        arrayList.add(new Object[]{1007, "C++, Python, Tensor flow, database kernel, storage, indexing and transaction processing, building large scale systems, Machine learning"});
        arrayList.add(new Object[]{1016, "CUDA, GPU processing, Tensor flow, Pandas, Python, Jupyter notebook, spark, Machine learning, building high performance scalable systems"});
        arrayList.add(new Object[]{1004, "Machine learning, Tensor flow, Java, Stanford university,"});
        arrayList.add(new Object[]{1007, "C++, Python, Tensor flow, database kernel, storage, indexing and transaction processing, building large scale systems, Machine learning"});
        arrayList.add(new Object[]{1016, "CUDA, GPU processing, Tensor flow, Pandas, Python, Jupyter notebook, spark, Machine learning, building high performance scalable systems"});
        arrayList.add(new Object[]{1004, "Machine learning, Tensor flow, Java, Stanford university,"});
        arrayList.add(new Object[]{1007, "C++, Python, Tensor flow, database kernel, storage, indexing and transaction processing, building large scale systems, Machine learning"});
        arrayList.add(new Object[]{1016, "CUDA, GPU processing, Tensor flow, Pandas, Python, Jupyter notebook, spark, Machine learning, building high performance scalable systems"});
        testInterSegmentSelectionQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '\"Machine learning\" AND \"Tensor flow\"') LIMIT 50000", arrayList);
        testInterSegmentSelectionQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"Machine learning\" AND \"Tensor flow\"') LIMIT 50000", arrayList);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '(\"distributed systems\" AND apache) OR (Java AND C++)')", 36L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_DICT, '(\"distributed systems\" AND apache) OR (Java AND C++)')", 36L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '(\"distributed systems\" AND apache) AND (Java AND C++)')", 4L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_DICT, '(\"distributed systems\" AND apache) AND (Java AND C++)')", 4L);
        ArrayList arrayList2 = new ArrayList();
        arrayList2.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        arrayList2.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        arrayList2.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        arrayList2.add(new Object[]{1017, "Distributed systems, Apache Kafka, publish-subscribe, building and deploying large scale production systems, concurrency, multi-threading, C++, CPU processing, Java"});
        testInterSegmentSelectionQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '(\"distributed systems\" AND apache) AND (Java AND C++)') LIMIT 50000", arrayList2);
        testInterSegmentSelectionQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_DICT, '(\"distributed systems\" AND apache) AND (Java AND C++)') LIMIT 50000", arrayList2);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '(\"apache spark\" OR \"query processing\") AND \"machine learning\"')", 4L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_DICT, '(\"apache spark\" OR \"query processing\") AND \"machine learning\"')", 4L);
        ArrayList arrayList3 = new ArrayList();
        arrayList3.add(new Object[]{1020, "Databases, columnar query processing, Apache Arrow, distributed systems, Machine learning, cluster management, docker image building and distribution"});
        arrayList3.add(new Object[]{1020, "Databases, columnar query processing, Apache Arrow, distributed systems, Machine learning, cluster management, docker image building and distribution"});
        arrayList3.add(new Object[]{1020, "Databases, columnar query processing, Apache Arrow, distributed systems, Machine learning, cluster management, docker image building and distribution"});
        arrayList3.add(new Object[]{1020, "Databases, columnar query processing, Apache Arrow, distributed systems, Machine learning, cluster management, docker image building and distribution"});
        testInterSegmentSelectionQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '(\"apache spark\" OR \"query processing\") AND \"machine learning\"') LIMIT 50000", arrayList3);
        testInterSegmentSelectionQueryHelper("SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_DICT, '(\"apache spark\" OR \"query processing\") AND \"machine learning\"') LIMIT 50000", arrayList3);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, 'a and or in the are')", 0L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_DICT, 'a and or in the are')", 0L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '\"learned a lot\"')", 4L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"learned a lot\"')", 4L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '\"indexing and transaction processing\"')", 12L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"indexing and transaction processing\"')", 12L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '\"docker image building and distribution\"')", 8L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"docker image building and distribution\"')", 8L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '\"distributed query engines for analytics and data warehouses\"')", 8L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"distributed query engines for analytics and data warehouses\"')", 8L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '\"worked in NGO\"')", 4L);
        testInterSegmentAggregationQueryHelper("SELECT count(*) FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL_DICT, '\"worked in NGO\"')", 4L);
    }

    private void testInterSegmentAggregationQueryHelper(String str, long j) {
        QueriesTestUtils.testInterSegmentsResult(getBrokerResponse(str), new ResultTable(new DataSchema(new String[]{"count(*)"}, new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.LONG}), Collections.singletonList(new Object[]{Long.valueOf(j)})));
    }

    private void testInterSegmentSelectionQueryHelper(String str, List<Object[]> list) {
        QueriesTestUtils.testInterSegmentsResult(getBrokerResponse(str), new ResultTable(new DataSchema(new String[]{INT_COL_NAME, SKILLS_TEXT_COL_NAME}, new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.STRING}), list));
    }
}
