001/*
002 * Copyright (C) 2014 Konik.io
003 *
004 * This file is part of Konik library.
005 *
006 * Konik library is free software: you can redistribute it and/or modify
007 * it under the terms of the GNU Affero General Public License as published by
008 * the Free Software Foundation, either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * Konik library is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014 * GNU Affero General Public License for more details.
015 *
016 * You should have received a copy of the GNU Affero General Public License
017 * along with Konik library.  If not, see <http://www.gnu.org/licenses/>.
018 */
019package io.konik.itext.extractor;
020
021import static com.itextpdf.text.pdf.PdfName.EF;
022import static com.itextpdf.text.pdf.PdfName.F;
023import static com.itextpdf.text.pdf.PdfReader.getStreamBytes;
024import static javax.xml.bind.JAXBContext.newInstance;
025import io.konik.harness.InvoiceExtractionError;
026import io.konik.harness.InvoiceExtractor;
027import io.konik.zugferd.Invoice;
028
029import java.io.ByteArrayInputStream;
030import java.io.IOException;
031import java.io.InputStream;
032
033import javax.inject.Named;
034import javax.inject.Singleton;
035import javax.xml.bind.JAXBElement;
036import javax.xml.bind.JAXBException;
037import javax.xml.bind.Unmarshaller;
038import javax.xml.transform.Source;
039import javax.xml.transform.stream.StreamSource;
040
041import com.itextpdf.text.pdf.PRStream;
042import com.itextpdf.text.pdf.PdfArray;
043import com.itextpdf.text.pdf.PdfDictionary;
044import com.itextpdf.text.pdf.PdfName;
045import com.itextpdf.text.pdf.PdfReader;
046import com.itextpdf.text.pdf.PdfStream;
047
048/**
049 * The Class iText Pdf Invoice Extractor.
050 * 
051 */
052@Named
053@Singleton
054public class ITextPdfInvoiceExtractor implements InvoiceExtractor {
055
056   private final static PdfName AF = new PdfName("AF");
057
058   @Override
059   public Invoice extract(byte[] pdfIn) {
060      return extract(new ByteArrayInputStream(pdfIn));
061   }
062   
063   @Override
064   public Invoice extract(InputStream pdfStream) {
065         PdfReader reader = getPdfReader(pdfStream);
066         PdfArray af = getValidAf(reader.getCatalog());
067         PdfDictionary fileSpec = getValidFileSpec(af); 
068         PdfDictionary ef = getValidEf(fileSpec);
069         byte[] invoiceXmlContent = getFStream(ef);
070         return covertToObjectModel(invoiceXmlContent);
071   }
072
073   private PdfReader getPdfReader(InputStream pdfStream) {
074      try {
075         return new PdfReader(pdfStream);
076      } catch (IOException e) {
077         throw new InvoiceExtractionError("Could not read or open pdf.",e);
078      }
079   }
080
081   private PdfArray getValidAf(PdfDictionary catalog) {
082      if (catalog.contains(AF)) {
083         PdfArray af = catalog.getAsArray(AF);
084         if (!af.isEmpty() && af.getDirectObject(0).isDictionary())
085            return af;
086      }
087      throw new InvoiceExtractionError("Pdf catalog does not contain Valid AF Entry");
088   }
089   
090   private PdfDictionary getValidFileSpec(PdfArray af) {
091      if (af.isEmpty() || af.getAsDict(0) == null) {
092         throw new InvoiceExtractionError("Pdf does not contain a FileSpec Entry");
093      }
094      return af.getAsDict(0);
095   }
096   
097   private PdfDictionary getValidEf(PdfDictionary fileSpec) {
098      if (fileSpec.contains(EF)) {
099         return fileSpec.getAsDict(EF);
100      }
101      throw new InvoiceExtractionError("Pdf catalog does not contain Valid EF Entry");
102   }
103
104   private byte[] getFStream(PdfDictionary ef){
105      if (ef.contains(F)) {
106         PdfStream xmlStream = ef.getAsStream(F);
107         try {
108            return getStreamBytes((PRStream) xmlStream);
109         } catch (IOException e) {
110            throw new InvoiceExtractionError("Could not extrac xml content form pdf.",e);
111         }
112      }
113      throw new InvoiceExtractionError("Pdf catalog does not contain Valid F Entry");
114   }
115   
116   static Invoice covertToObjectModel(byte[] xmlContent){
117      try {
118         Unmarshaller unmarshaller = newInstance("io.konik.zugferd").createUnmarshaller();
119         Source s = new StreamSource(new ByteArrayInputStream(xmlContent));
120         JAXBElement<Invoice> invoice = unmarshaller.unmarshal(s, Invoice.class);
121         return invoice.getValue();
122      } catch (JAXBException e) {
123         throw new InvoiceExtractionError("Could not read parse xml content",e);
124      }
125   }
126}