package com.itextpdf.pdfocr.tesseract4;

import com.itextpdf.commons.actions.confirmations.ConfirmEvent;
import com.itextpdf.commons.actions.confirmations.EventConfirmationType;
import com.itextpdf.commons.actions.contexts.IMetaInfo;
import com.itextpdf.commons.actions.data.ProductData;
import com.itextpdf.commons.utils.MessageFormatUtil;
import com.itextpdf.io.image.ImageType;
import com.itextpdf.pdfocr.AbstractPdfOcrEventHelper;
import com.itextpdf.pdfocr.IOcrEngine;
import com.itextpdf.pdfocr.IProductAware;
import com.itextpdf.pdfocr.OcrProcessContext;
import com.itextpdf.pdfocr.PdfOcrMetaInfoContainer;
import com.itextpdf.pdfocr.TextInfo;
import com.itextpdf.pdfocr.statistics.PdfOcrOutputType;
import com.itextpdf.pdfocr.statistics.PdfOcrOutputTypeStatisticsEvent;
import com.itextpdf.pdfocr.tesseract4.actions.data.PdfOcrTesseract4ProductData;
import com.itextpdf.pdfocr.tesseract4.actions.events.PdfOcrTesseract4ProductEvent;
import com.itextpdf.pdfocr.tesseract4.exceptions.PdfOcrInputTesseract4Exception;
import com.itextpdf.pdfocr.tesseract4.exceptions.PdfOcrTesseract4Exception;
import com.itextpdf.pdfocr.tesseract4.exceptions.PdfOcrTesseract4ExceptionMessageConstant;
import com.itextpdf.pdfocr.tesseract4.logs.Tesseract4LogMessageConstant;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/itextpdf/pdfocr/tesseract4/AbstractTesseract4OcrEngine.class */
public abstract class AbstractTesseract4OcrEngine implements IOcrEngine, IProductAware {
    private static final Set<ImageType> SUPPORTED_IMAGE_FORMATS = Collections.unmodifiableSet(new HashSet(Arrays.asList(ImageType.BMP, ImageType.PNG, ImageType.TIFF, ImageType.JPEG)));
    private Tesseract4OcrEngineProperties tesseract4OcrEngineProperties;
    Set<UUID> processedUUID = new HashSet();
    private ThreadLocal<IMetaInfo> threadLocalMetaInfo = new ThreadLocal<>();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/itextpdf/pdfocr/tesseract4/AbstractTesseract4OcrEngine$ITesseractOcrResult.class */
    public interface ITesseractOcrResult {
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/itextpdf/pdfocr/tesseract4/AbstractTesseract4OcrEngine$StringTesseractOcrResult.class */
    public static class StringTesseractOcrResult implements ITesseractOcrResult {
        private String data;

        StringTesseractOcrResult(String str) {
            this.data = str;
        }

        String getData() {
            return this.data;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/itextpdf/pdfocr/tesseract4/AbstractTesseract4OcrEngine$TextInfoTesseractOcrResult.class */
    public static class TextInfoTesseractOcrResult implements ITesseractOcrResult {
        private Map<Integer, List<TextInfo>> textInfos;

        TextInfoTesseractOcrResult(Map<Integer, List<TextInfo>> map) {
            this.textInfos = map;
        }

        Map<Integer, List<TextInfo>> getTextInfos() {
            return this.textInfos;
        }
    }

    public AbstractTesseract4OcrEngine(Tesseract4OcrEngineProperties tesseract4OcrEngineProperties) {
        this.tesseract4OcrEngineProperties = tesseract4OcrEngineProperties;
    }

    public void doTesseractOcr(File file, File file2, OutputFormat outputFormat) {
        doTesseractOcr(file, file2, outputFormat, new OcrProcessContext(new Tesseract4EventHelper()));
    }

    public void doTesseractOcr(File file, File file2, OutputFormat outputFormat, OcrProcessContext ocrProcessContext) {
        doTesseractOcr(file, Collections.singletonList(file2), outputFormat, 1, ocrProcessContext.getOcrEventHelper());
    }

    public void createTxtFile(List<File> list, File file) {
        createTxtFile(list, file, new OcrProcessContext(new Tesseract4EventHelper()));
    }

    public void createTxtFile(List<File> list, File file, OcrProcessContext ocrProcessContext) {
        LoggerFactory.getLogger(getClass()).info(MessageFormatUtil.format(Tesseract4LogMessageConstant.START_OCR_FOR_IMAGES, new Object[]{Integer.valueOf(list.size())}));
        AbstractPdfOcrEventHelper tesseract4EventHelper = ocrProcessContext.getOcrEventHelper() == null ? new Tesseract4EventHelper() : ocrProcessContext.getOcrEventHelper();
        PdfOcrTesseract4ProductEvent createProcessImageEvent = PdfOcrTesseract4ProductEvent.createProcessImageEvent(tesseract4EventHelper.getSequenceId(), null, tesseract4EventHelper.getConfirmationType());
        tesseract4EventHelper.onEvent(createProcessImageEvent);
        try {
            ocrProcessContext.setOcrEventHelper(new Tesseract4FileResultEventHelper(tesseract4EventHelper));
            StringBuilder sb = new StringBuilder();
            Iterator<File> it = list.iterator();
            while (it.hasNext()) {
                sb.append(doImageOcr(it.next(), OutputFormat.TXT, ocrProcessContext));
            }
            TesseractHelper.writeToTextFile(file.getAbsolutePath(), sb.toString());
            if (createProcessImageEvent.getConfirmationType() == EventConfirmationType.ON_DEMAND) {
                tesseract4EventHelper.onEvent(new ConfirmEvent(createProcessImageEvent));
            }
        } finally {
            ocrProcessContext.setOcrEventHelper(tesseract4EventHelper);
        }
    }

    public final Tesseract4OcrEngineProperties getTesseract4OcrEngineProperties() {
        return this.tesseract4OcrEngineProperties;
    }

    public final void setTesseract4OcrEngineProperties(Tesseract4OcrEngineProperties tesseract4OcrEngineProperties) {
        this.tesseract4OcrEngineProperties = tesseract4OcrEngineProperties;
    }

    public final String getLanguagesAsString() {
        return getTesseract4OcrEngineProperties().getLanguages().size() > 0 ? String.join("+", getTesseract4OcrEngineProperties().getLanguages()) : getTesseract4OcrEngineProperties().getDefaultLanguage();
    }

    public final Map<Integer, List<TextInfo>> doImageOcr(File file) {
        verifyImageFormatValidity(file);
        return ((TextInfoTesseractOcrResult) processInputFiles(file, OutputFormat.HOCR, new Tesseract4EventHelper())).getTextInfos();
    }

    public final Map<Integer, List<TextInfo>> doImageOcr(File file, OcrProcessContext ocrProcessContext) {
        verifyImageFormatValidity(file);
        return ((TextInfoTesseractOcrResult) processInputFiles(file, OutputFormat.HOCR, ocrProcessContext.getOcrEventHelper())).getTextInfos();
    }

    public final String doImageOcr(File file, OutputFormat outputFormat, OcrProcessContext ocrProcessContext) {
        String str = "";
        verifyImageFormatValidity(file);
        ITesseractOcrResult processInputFiles = processInputFiles(file, outputFormat, ocrProcessContext.getOcrEventHelper());
        if (processInputFiles != null) {
            if (outputFormat.equals(OutputFormat.TXT)) {
                str = ((StringTesseractOcrResult) processInputFiles).getData();
            } else {
                StringBuilder sb = new StringBuilder();
                Map<Integer, List<TextInfo>> textInfos = ((TextInfoTesseractOcrResult) processInputFiles).getTextInfos();
                Iterator<Integer> it = textInfos.keySet().iterator();
                while (it.hasNext()) {
                    int intValue = it.next().intValue();
                    StringBuilder sb2 = new StringBuilder();
                    Iterator<TextInfo> it2 = textInfos.get(Integer.valueOf(intValue)).iterator();
                    while (it2.hasNext()) {
                        sb2.append(it2.next().getText());
                        sb2.append(System.lineSeparator());
                    }
                    sb.append((CharSequence) sb2);
                    sb.append(System.lineSeparator());
                }
                str = sb.toString();
            }
        }
        return str;
    }

    public final String doImageOcr(File file, OutputFormat outputFormat) {
        return doImageOcr(file, outputFormat, new OcrProcessContext(new Tesseract4EventHelper()));
    }

    public boolean isWindows() {
        return identifyOsType().toLowerCase().contains("win");
    }

    public String identifyOsType() {
        return (System.getProperty("os.name") == null ? System.getProperty("OS") : System.getProperty("os.name")).toLowerCase();
    }

    public void validateLanguages(List<String> list) throws PdfOcrTesseract4Exception {
        if (list.size() == 0) {
            if (!new File(getTessData() + File.separatorChar + getTesseract4OcrEngineProperties().getDefaultLanguage() + ".traineddata").exists()) {
                throw new PdfOcrInputTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_LANGUAGE).setMessageParams(new String[]{getTesseract4OcrEngineProperties().getDefaultLanguage() + ".traineddata", getTessData()});
            }
            return;
        }
        for (String str : list) {
            if (!new File(getTessData() + File.separatorChar + str + ".traineddata").exists()) {
                throw new PdfOcrInputTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_LANGUAGE).setMessageParams(new String[]{str + ".traineddata", getTessData()});
            }
        }
    }

    public PdfOcrMetaInfoContainer getMetaInfoContainer() {
        return new PdfOcrMetaInfoContainer(new Tesseract4MetaInfo());
    }

    public ProductData getProductData() {
        return PdfOcrTesseract4ProductData.getInstance();
    }

    void doTesseractOcr(File file, List<File> list, OutputFormat outputFormat, int i, AbstractPdfOcrEventHelper abstractPdfOcrEventHelper) {
        doTesseractOcr(file, list, outputFormat, i, true, abstractPdfOcrEventHelper);
    }

    abstract void doTesseractOcr(File file, List<File> list, OutputFormat outputFormat, int i, boolean z, AbstractPdfOcrEventHelper abstractPdfOcrEventHelper);

    /* JADX INFO: Access modifiers changed from: package-private */
    public String getTessData() {
        if (getTesseract4OcrEngineProperties().getPathToTessData() == null) {
            throw new PdfOcrTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.PATH_TO_TESS_DATA_IS_NOT_SET);
        }
        return getTesseract4OcrEngineProperties().getPathToTessData().getAbsolutePath();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public PdfOcrTesseract4ProductEvent onEvent(AbstractPdfOcrEventHelper abstractPdfOcrEventHelper) {
        PdfOcrTesseract4ProductEvent createProcessImageEvent = PdfOcrTesseract4ProductEvent.createProcessImageEvent(abstractPdfOcrEventHelper.getSequenceId(), null, abstractPdfOcrEventHelper.getConfirmationType());
        abstractPdfOcrEventHelper.onEvent(createProcessImageEvent);
        return createProcessImageEvent;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void onEventStatistics(AbstractPdfOcrEventHelper abstractPdfOcrEventHelper) {
        abstractPdfOcrEventHelper.onEvent(new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.DATA, PdfOcrTesseract4ProductData.getInstance()));
    }

    /* JADX WARN: Multi-variable type inference failed */
    private ITesseractOcrResult processInputFiles(File file, OutputFormat outputFormat, AbstractPdfOcrEventHelper abstractPdfOcrEventHelper) {
        Map linkedHashMap = new LinkedHashMap();
        StringBuilder sb = new StringBuilder();
        ArrayList arrayList = new ArrayList();
        ITesseractOcrResult iTesseractOcrResult = null;
        try {
            try {
                int numberOfPageTiff = !ImagePreprocessingUtil.isTiffImage(file) ? 1 : ImagePreprocessingUtil.getNumberOfPageTiff(file);
                int i = getTesseract4OcrEngineProperties().isPreprocessingImages() ? numberOfPageTiff : 1;
                int i2 = getTesseract4OcrEngineProperties().isPreprocessingImages() ? 1 : numberOfPageTiff;
                for (int i3 = 1; i3 <= i; i3++) {
                    String str = outputFormat.equals(OutputFormat.HOCR) ? ".hocr" : ".txt";
                    for (int i4 = 0; i4 < i2; i4++) {
                        arrayList.add(createTempFile(str));
                    }
                    doTesseractOcr(file, arrayList, outputFormat, i3, true, abstractPdfOcrEventHelper);
                    if (outputFormat.equals(OutputFormat.HOCR)) {
                        ArrayList arrayList2 = null;
                        if (getTesseract4OcrEngineProperties().isUseTxtToImproveHocrParsing()) {
                            arrayList2 = new ArrayList();
                            for (int i5 = 0; i5 < i2; i5++) {
                                arrayList2.add(createTempFile(".txt"));
                            }
                            doTesseractOcr(file, arrayList2, OutputFormat.TXT, i3, false, abstractPdfOcrEventHelper);
                        }
                        Map parseHocrFile = TesseractHelper.parseHocrFile(arrayList, arrayList2, getTesseract4OcrEngineProperties());
                        if (getTesseract4OcrEngineProperties().isPreprocessingImages()) {
                            linkedHashMap.put(Integer.valueOf(i3), parseHocrFile.get(1));
                        } else {
                            linkedHashMap = parseHocrFile;
                        }
                        iTesseractOcrResult = new TextInfoTesseractOcrResult(linkedHashMap);
                    } else {
                        for (File file2 : arrayList) {
                            if (Files.exists(Paths.get(file2.getAbsolutePath(), new String[0]), new LinkOption[0])) {
                                sb.append(TesseractHelper.readTxtFile(file2));
                            }
                        }
                        iTesseractOcrResult = new StringTesseractOcrResult(sb.toString());
                    }
                }
                Iterator<File> it = arrayList.iterator();
                while (it.hasNext()) {
                    TesseractHelper.deleteFile(it.next().getAbsolutePath());
                }
            } catch (IOException e) {
                LoggerFactory.getLogger(getClass()).error(MessageFormatUtil.format(Tesseract4LogMessageConstant.CANNOT_OCR_INPUT_FILE, new Object[]{e.getMessage()}));
                Iterator<File> it2 = arrayList.iterator();
                while (it2.hasNext()) {
                    TesseractHelper.deleteFile(it2.next().getAbsolutePath());
                }
            }
            return iTesseractOcrResult;
        } catch (Throwable th) {
            Iterator<File> it3 = arrayList.iterator();
            while (it3.hasNext()) {
                TesseractHelper.deleteFile(it3.next().getAbsolutePath());
            }
            throw th;
        }
    }

    private File createTempFile(String str) {
        return new File(TesseractOcrUtil.getTempFilePath(UUID.randomUUID().toString(), str));
    }

    private void verifyImageFormatValidity(File file) throws PdfOcrTesseract4Exception {
        if (SUPPORTED_IMAGE_FORMATS.contains(ImagePreprocessingUtil.getImageType(file))) {
            return;
        }
        LoggerFactory.getLogger(getClass()).error(MessageFormatUtil.format("Cannot read input image {0}", new Object[]{file.getAbsolutePath()}));
        throw new PdfOcrInputTesseract4Exception(PdfOcrTesseract4ExceptionMessageConstant.INCORRECT_INPUT_IMAGE_FORMAT).setMessageParams(new String[]{file.getName()});
    }
}
