package com.itextpdf.pdfocr.tesseract4;

import com.itextpdf.io.util.MessageFormatUtil;
import com.itextpdf.kernel.counter.EventCounterHandler;
import com.itextpdf.kernel.counter.event.IMetaInfo;
import com.itextpdf.pdfocr.IOcrEngine;
import com.itextpdf.pdfocr.OcrPdfCreatorMetaInfo;
import com.itextpdf.pdfocr.TextInfo;
import com.itextpdf.pdfocr.events.IThreadLocalMetaInfoAware;
import com.itextpdf.pdfocr.tesseract4.events.PdfOcrTesseract4Event;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/itextpdf/pdfocr/tesseract4/AbstractTesseract4OcrEngine.class */
public abstract class AbstractTesseract4OcrEngine implements IOcrEngine, IThreadLocalMetaInfoAware {
    private static final Set<String> SUPPORTED_IMAGE_FORMATS = Collections.unmodifiableSet(new HashSet(Arrays.asList("bmp", "png", "tiff", "tif", "jpeg", "jpg", "jpe", "jfif")));
    private Tesseract4OcrEngineProperties tesseract4OcrEngineProperties;
    Set<UUID> processedUUID = new HashSet();
    private ThreadLocal<IMetaInfo> threadLocalMetaInfo = new ThreadLocal<>();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/itextpdf/pdfocr/tesseract4/AbstractTesseract4OcrEngine$ITesseractOcrResult.class */
    public interface ITesseractOcrResult {
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/itextpdf/pdfocr/tesseract4/AbstractTesseract4OcrEngine$StringTesseractOcrResult.class */
    public static class StringTesseractOcrResult implements ITesseractOcrResult {
        private String data;

        StringTesseractOcrResult(String str) {
            this.data = str;
        }

        String getData() {
            return this.data;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/itextpdf/pdfocr/tesseract4/AbstractTesseract4OcrEngine$TextInfoTesseractOcrResult.class */
    public static class TextInfoTesseractOcrResult implements ITesseractOcrResult {
        private Map<Integer, List<TextInfo>> textInfos;

        TextInfoTesseractOcrResult(Map<Integer, List<TextInfo>> map) {
            this.textInfos = map;
        }

        Map<Integer, List<TextInfo>> getTextInfos() {
            return this.textInfos;
        }
    }

    public AbstractTesseract4OcrEngine(Tesseract4OcrEngineProperties tesseract4OcrEngineProperties) {
        this.tesseract4OcrEngineProperties = tesseract4OcrEngineProperties;
    }

    public void doTesseractOcr(File file, File file2, OutputFormat outputFormat) {
        doTesseractOcr(file, Collections.singletonList(file2), outputFormat, 1);
    }

    public void createTxtFile(List<File> list, File file) {
        LoggerFactory.getLogger(getClass()).info(MessageFormatUtil.format(Tesseract4LogMessageConstant.START_OCR_FOR_IMAGES, new Object[]{Integer.valueOf(list.size())}));
        StringBuilder sb = new StringBuilder();
        Iterator<File> it = list.iterator();
        while (it.hasNext()) {
            sb.append(doImageOcr(it.next(), OutputFormat.TXT));
        }
        TesseractHelper.writeToTextFile(file.getAbsolutePath(), sb.toString());
    }

    public final Tesseract4OcrEngineProperties getTesseract4OcrEngineProperties() {
        return this.tesseract4OcrEngineProperties;
    }

    public final void setTesseract4OcrEngineProperties(Tesseract4OcrEngineProperties tesseract4OcrEngineProperties) {
        this.tesseract4OcrEngineProperties = tesseract4OcrEngineProperties;
    }

    public final String getLanguagesAsString() {
        return getTesseract4OcrEngineProperties().getLanguages().size() > 0 ? String.join("+", getTesseract4OcrEngineProperties().getLanguages()) : getTesseract4OcrEngineProperties().getDefaultLanguage();
    }

    public final Map<Integer, List<TextInfo>> doImageOcr(File file) {
        verifyImageFormatValidity(file);
        return ((TextInfoTesseractOcrResult) processInputFiles(file, OutputFormat.HOCR)).getTextInfos();
    }

    public final String doImageOcr(File file, OutputFormat outputFormat) {
        String str = "";
        verifyImageFormatValidity(file);
        ITesseractOcrResult processInputFiles = processInputFiles(file, outputFormat);
        if (processInputFiles != null) {
            if (outputFormat.equals(OutputFormat.TXT)) {
                str = ((StringTesseractOcrResult) processInputFiles).getData();
            } else {
                StringBuilder sb = new StringBuilder();
                Map<Integer, List<TextInfo>> textInfos = ((TextInfoTesseractOcrResult) processInputFiles).getTextInfos();
                Iterator<Integer> it = textInfos.keySet().iterator();
                while (it.hasNext()) {
                    int intValue = it.next().intValue();
                    StringBuilder sb2 = new StringBuilder();
                    Iterator<TextInfo> it2 = textInfos.get(Integer.valueOf(intValue)).iterator();
                    while (it2.hasNext()) {
                        sb2.append(it2.next().getText());
                        sb2.append(System.lineSeparator());
                    }
                    sb.append((CharSequence) sb2);
                    sb.append(System.lineSeparator());
                }
                str = sb.toString();
            }
        }
        return str;
    }

    public boolean isWindows() {
        return identifyOsType().toLowerCase().contains("win");
    }

    public String identifyOsType() {
        return (System.getProperty("os.name") == null ? System.getProperty("OS") : System.getProperty("os.name")).toLowerCase();
    }

    public void validateLanguages(List<String> list) throws Tesseract4OcrException {
        if (list.size() == 0) {
            if (!new File(getTessData() + File.separatorChar + getTesseract4OcrEngineProperties().getDefaultLanguage() + ".traineddata").exists()) {
                throw new Tesseract4OcrException(Tesseract4OcrException.INCORRECT_LANGUAGE).setMessageParams(new String[]{getTesseract4OcrEngineProperties().getDefaultLanguage() + ".traineddata", getTessData()});
            }
            return;
        }
        for (String str : list) {
            if (!new File(getTessData() + File.separatorChar + str + ".traineddata").exists()) {
                throw new Tesseract4OcrException(Tesseract4OcrException.INCORRECT_LANGUAGE).setMessageParams(new String[]{str + ".traineddata", getTessData()});
            }
        }
    }

    public IMetaInfo getThreadLocalMetaInfo() {
        return this.threadLocalMetaInfo.get();
    }

    public IThreadLocalMetaInfoAware setThreadLocalMetaInfo(IMetaInfo iMetaInfo) {
        this.threadLocalMetaInfo.set(iMetaInfo);
        return this;
    }

    abstract void doTesseractOcr(File file, List<File> list, OutputFormat outputFormat, int i);

    /* JADX INFO: Access modifiers changed from: package-private */
    public String getTessData() {
        if (getTesseract4OcrEngineProperties().getPathToTessData() == null) {
            throw new Tesseract4OcrException(Tesseract4OcrException.PATH_TO_TESS_DATA_IS_NOT_SET);
        }
        return getTesseract4OcrEngineProperties().getPathToTessData().getAbsolutePath();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void scheduledCheck() {
        ReflectionUtils.scheduledCheck();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void onEvent() {
        OcrPdfCreatorMetaInfo threadLocalMetaInfo = getThreadLocalMetaInfo();
        if (!(threadLocalMetaInfo instanceof OcrPdfCreatorMetaInfo)) {
            EventCounterHandler.getInstance().onEvent(PdfOcrTesseract4Event.TESSERACT4_IMAGE_OCR, getThreadLocalMetaInfo(), getClass());
            return;
        }
        UUID documentId = threadLocalMetaInfo.getDocumentId();
        if (this.processedUUID.contains(documentId)) {
            return;
        }
        this.processedUUID.add(documentId);
        EventCounterHandler.getInstance().onEvent(OcrPdfCreatorMetaInfo.PdfDocumentType.PDFA.equals(threadLocalMetaInfo.getPdfDocumentType()) ? PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDFA : PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF, threadLocalMetaInfo.getWrappedMetaInfo(), getClass());
    }

    /* JADX WARN: Multi-variable type inference failed */
    private ITesseractOcrResult processInputFiles(File file, OutputFormat outputFormat) {
        Map linkedHashMap = new LinkedHashMap();
        StringBuilder sb = new StringBuilder();
        ArrayList arrayList = new ArrayList();
        ITesseractOcrResult iTesseractOcrResult = null;
        try {
            try {
                int numberOfPageTiff = !ImagePreprocessingUtil.isTiffImage(file) ? 1 : ImagePreprocessingUtil.getNumberOfPageTiff(file);
                int i = getTesseract4OcrEngineProperties().isPreprocessingImages() ? numberOfPageTiff : 1;
                int i2 = getTesseract4OcrEngineProperties().isPreprocessingImages() ? 1 : numberOfPageTiff;
                for (int i3 = 1; i3 <= i; i3++) {
                    String str = outputFormat.equals(OutputFormat.HOCR) ? ".hocr" : ".txt";
                    for (int i4 = 0; i4 < i2; i4++) {
                        arrayList.add(createTempFile(str));
                    }
                    doTesseractOcr(file, arrayList, outputFormat, i3);
                    if (outputFormat.equals(OutputFormat.HOCR)) {
                        Map parseHocrFile = TesseractHelper.parseHocrFile(arrayList, getTesseract4OcrEngineProperties().getTextPositioning());
                        if (getTesseract4OcrEngineProperties().isPreprocessingImages()) {
                            linkedHashMap.put(Integer.valueOf(i3), parseHocrFile.get(1));
                        } else {
                            linkedHashMap = parseHocrFile;
                        }
                        iTesseractOcrResult = new TextInfoTesseractOcrResult(linkedHashMap);
                    } else {
                        for (File file2 : arrayList) {
                            if (Files.exists(Paths.get(file2.getAbsolutePath(), new String[0]), new LinkOption[0])) {
                                sb.append(TesseractHelper.readTxtFile(file2));
                            }
                        }
                        iTesseractOcrResult = new StringTesseractOcrResult(sb.toString());
                    }
                }
            } catch (IOException e) {
                LoggerFactory.getLogger(getClass()).error(MessageFormatUtil.format(Tesseract4LogMessageConstant.CANNOT_OCR_INPUT_FILE, new Object[]{e.getMessage()}));
                Iterator<File> it = arrayList.iterator();
                while (it.hasNext()) {
                    TesseractHelper.deleteFile(it.next().getAbsolutePath());
                }
            }
            return iTesseractOcrResult;
        } finally {
            Iterator<File> it2 = arrayList.iterator();
            while (it2.hasNext()) {
                TesseractHelper.deleteFile(it2.next().getAbsolutePath());
            }
        }
    }

    private File createTempFile(String str) {
        return new File(TesseractOcrUtil.getTempFilePath(UUID.randomUUID().toString(), str));
    }

    private void verifyImageFormatValidity(File file) throws Tesseract4OcrException {
        boolean z = false;
        String str = "incorrect extension";
        int lastIndexOf = file.getAbsolutePath().lastIndexOf(46);
        if (lastIndexOf > 0) {
            str = new String(file.getAbsolutePath().toCharArray(), lastIndexOf + 1, (file.getAbsolutePath().length() - lastIndexOf) - 1);
            Iterator<String> it = SUPPORTED_IMAGE_FORMATS.iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                } else if (it.next().equals(str.toLowerCase())) {
                    z = true;
                    break;
                }
            }
        }
        if (z) {
            return;
        }
        LoggerFactory.getLogger(getClass()).error(MessageFormatUtil.format("Cannot read input image {0}", new Object[]{file.getAbsolutePath()}));
        throw new Tesseract4OcrException(Tesseract4OcrException.INCORRECT_INPUT_IMAGE_FORMAT).setMessageParams(new String[]{str});
    }
}
