package org.apache.crunch.io.text.xml;

import com.google.common.base.Charsets;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:lib/crunch-core-0.13.0.jar:org/apache/crunch/io/text/xml/XmlInputFormat.class */
public class XmlInputFormat extends TextInputFormat {
    private static final Logger log = LoggerFactory.getLogger(XmlInputFormat.class);
    public static final String START_TAG_KEY = "xmlinput.start";
    public static final String END_TAG_KEY = "xmlinput.end";
    public static final String ENCODING = "xml.encoding";

    /* loaded from: input_file:lib/crunch-core-0.13.0.jar:org/apache/crunch/io/text/xml/XmlInputFormat$XmlRecordReader.class */
    public static class XmlRecordReader extends RecordReader<LongWritable, Text> {
        private static final String DEFAULT_ENCODING = Charsets.UTF_8.name();
        private final char[] startTag;
        private final char[] endTag;
        private final long start;
        private final long end;
        private LongWritable currentKey;
        private Text currentValue;
        private final DataOutputBuffer outBuffer;
        private final BufferedReader inReader;
        private final OutputStreamWriter outWriter;
        private final String inputEncoding;
        private long readByteCounter;
        private CharsetEncoder charsetEncoder;

        public XmlRecordReader(FileSplit fileSplit, Configuration configuration) throws IOException {
            this.inputEncoding = configuration.get(XmlInputFormat.ENCODING, DEFAULT_ENCODING);
            this.startTag = new String(configuration.get(XmlInputFormat.START_TAG_KEY).getBytes(this.inputEncoding), this.inputEncoding).toCharArray();
            this.endTag = new String(configuration.get(XmlInputFormat.END_TAG_KEY).getBytes(this.inputEncoding), this.inputEncoding).toCharArray();
            this.start = fileSplit.getStart();
            this.end = this.start + fileSplit.getLength();
            FSDataInputStream open = fileSplit.getPath().getFileSystem(configuration).open(fileSplit.getPath());
            open.seek(this.start);
            this.readByteCounter = this.start;
            this.inReader = new BufferedReader(new InputStreamReader(open, Charset.forName(this.inputEncoding)));
            this.outBuffer = new DataOutputBuffer();
            this.outWriter = new OutputStreamWriter(this.outBuffer, this.inputEncoding);
            this.charsetEncoder = Charset.forName(this.inputEncoding).newEncoder();
        }

        private boolean next(LongWritable longWritable, Text text) throws IOException {
            if (this.readByteCounter >= this.end || !readUntilMatch(this.startTag, false)) {
                return false;
            }
            try {
                this.outWriter.write(this.startTag);
                if (!readUntilMatch(this.endTag, true)) {
                    this.outWriter.flush();
                    this.outBuffer.reset();
                    return false;
                }
                longWritable.set(this.readByteCounter);
                this.outWriter.flush();
                text.set(toUTF8(this.outBuffer.getData()), 0, this.outBuffer.getLength());
                this.outWriter.flush();
                this.outBuffer.reset();
                return true;
            } catch (Throwable th) {
                this.outWriter.flush();
                this.outBuffer.reset();
                throw th;
            }
        }

        private byte[] toUTF8(byte[] bArr) throws UnsupportedEncodingException {
            return new String(bArr, this.inputEncoding).getBytes(Charsets.UTF_8);
        }

        @Override // org.apache.hadoop.mapreduce.RecordReader, java.io.Closeable, java.lang.AutoCloseable
        public void close() throws IOException {
            this.inReader.close();
        }

        @Override // org.apache.hadoop.mapreduce.RecordReader
        public float getProgress() throws IOException {
            return ((float) (this.readByteCounter - this.start)) / ((float) (this.end - this.start));
        }

        private boolean readUntilMatch(char[] cArr, boolean z) throws IOException {
            int i = 0;
            while (true) {
                int read = this.inReader.read();
                this.readByteCounter += calculateCharacterByteLength((char) read);
                if (read == -1) {
                    return false;
                }
                if (z) {
                    this.outWriter.write(read);
                }
                if (read == cArr[i]) {
                    i++;
                    if (i >= cArr.length) {
                        return true;
                    }
                } else {
                    i = 0;
                }
                if (!z && i == 0 && this.readByteCounter >= this.end) {
                    return false;
                }
            }
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // org.apache.hadoop.mapreduce.RecordReader
        public LongWritable getCurrentKey() throws IOException, InterruptedException {
            return this.currentKey;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // org.apache.hadoop.mapreduce.RecordReader
        public Text getCurrentValue() throws IOException, InterruptedException {
            return this.currentValue;
        }

        @Override // org.apache.hadoop.mapreduce.RecordReader
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        }

        @Override // org.apache.hadoop.mapreduce.RecordReader
        public boolean nextKeyValue() throws IOException, InterruptedException {
            this.currentKey = new LongWritable();
            this.currentValue = new Text();
            return next(this.currentKey, this.currentValue);
        }

        private int calculateCharacterByteLength(char c) {
            try {
                return this.charsetEncoder.encode(CharBuffer.wrap(new char[]{c})).limit();
            } catch (CharacterCodingException e) {
                throw new RuntimeException("The character attempting to be read (" + c + ") could not be encoded with " + this.inputEncoding);
            }
        }
    }

    @Override // org.apache.hadoop.mapreduce.lib.input.TextInputFormat, org.apache.hadoop.mapreduce.InputFormat
    public RecordReader<LongWritable, Text> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
        try {
            return new XmlRecordReader((FileSplit) inputSplit, taskAttemptContext.getConfiguration());
        } catch (IOException e) {
            log.warn("Error while creating XmlRecordReader", (Throwable) e);
            return null;
        }
    }
}
