import { useEffect } from "react";
import app from "nystem";

let pdfLib = false;

const add00 = (int) => (int < 10 ? `00${int}` : int < 100 ? `0${int}` : int);

const MediaPdfExtractor = ({ model, view }) => {
  useEffect(() => {
    const parsePdf = async (query) => {
      if (
        query.contentType !== view.contentType ||
        query.modelId !== model.id ||
        !query.data ||
        query.ext !== "pdf"
      )
        return;

      await app().event("fileUploadLog", { log: "PDF split started" });
      if (!pdfLib) pdfLib = (await import("../../client/pdfLib")).default;

      const { PDFDocument, PDFName, PDFRawStream } = pdfLib;

      const pdfDoc = await PDFDocument.load(query.data);

      let tiffs = false;
      const images = pdfDoc.context
        .enumerateIndirectObjects()
        .filter(([, pdfObject]) => {
          if (!(pdfObject instanceof PDFRawStream)) return false;

          const subtype = pdfObject.dict.get(PDFName.of("Subtype"));
          if (!subtype || subtype.encodedName !== "/Image") return false;

          let filter = pdfObject.dict.get(PDFName.of("Filter"));
          // eslint-disable-next-line prefer-destructuring
          if (filter.array) filter = filter.array[0];

          if (!filter || filter.encodedName !== "/DCTDecode") {
            console.log("Tiff files", pdfObject.dict);
            tiffs = true;
          }

          return true;
        });

      if (tiffs) return;

      if (images.length > 1) {
        console.log("Pdf image count", images.length);

        const mediaId = app().uuid();
        for (let index = 0; index < images.length; index++) {
          const [, pdfObject] = images[index];

          app().event("fileUpload", {
            ...query,
            id: app().uuid(),
            type: "image",
            ext: "jpeg",
            name: `${query.name}.${add00(index)}.jpeg`,
            data: pdfObject.contents,
            pos: index,
            mediaId,
          });
        }
        await app().event("fileUploadLog", { log: "PDF split done" });

        return { mediaId: query.mediaId };
      }

      const [, pdfObject] = images[0];
      return {
        ...query,
        data: pdfObject.contents,
        type: "image",
        ext: "jpeg",
        name: `${query.name}.jpeg`,
      };
    };

    app().on("fileUpload", 990, parsePdf);
    return () => {
      app().off("fileUpload", parsePdf);
    };
  });

  return null;
};
export default MediaPdfExtractor;

/*
Jpg
key: PDFName {encodedName: "/Filter"}
value: PDFName {encodedName: "/DCTDecode"}

Tiff
key: PDFName {encodedName: "/Filter"}
value: PDFName {encodedName: "/CCITTFaxDecode"}

https://github.com/paulcuth/netpbm-viewer/blob/master/js/site.js


https://github.com/ronanpaixao/PyPDFTK/blob/master/pdf_images.py
import PyPDF2
import struct

"""
Links:
PDF format: http://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
CCITT Group 4: https://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-T.6-198811-I!!PDF-E&type=items
Extract images from pdf: http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-resampling-in-python
Extract images coded with CCITTFaxDecode in .net: http://stackoverflow.com/questions/2641770/extracting-image-from-pdf-with-ccittfaxdecode-filter
TIFF format and tags: http://www.awaresystems.be/imaging/tiff/faq.html
"""


def tiff_header_for_CCITT(width, height, img_size, CCITT_group=4):
    tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h'
    return struct.pack(tiff_header_struct,
         b'II',  # Byte order indication: Little indian
         42,  # Version number (always 42)
         8,  # Offset to first IFD
         8,  # Number of tags in IFD
         256, 4, 1, width,  # ImageWidth, LONG, 1, width
         257, 4, 1, height,  # ImageLength, LONG, 1, lenght
         258, 3, 1, 1,  # BitsPerSample, SHORT, 1, 1
         259, 3, 1, CCITT_group,  # Compression, SHORT, 1, 4 = CCITT Group 4 fax encoding
         262, 3, 1, 0,  # Threshholding, SHORT, 1, 0 = WhiteIsZero
         273, 4, 1, struct.calcsize(tiff_header_struct),  # StripOffsets, LONG, 1, len of header
         278, 4, 1, height,  # RowsPerStrip, LONG, 1, lenght
         279, 4, 1, img_size,  # StripByteCounts, LONG, 1, size of image
         0  # last IFD
         )

pdf_filename = 'scan.pdf'
pdf_file = open(pdf_filename, 'rb')
cond_scan_reader = PyPDF2.PdfFileReader(pdf_file)
for i in range(0, cond_scan_reader.getNumPages()):
    page = cond_scan_reader.getPage(i)
    xObject = page['/Resources']['/XObject'].getObject()
    for obj in xObject:
        if xObject[obj]['/Subtype'] == '/Image':
            """
            The  CCITTFaxDecode filter decodes image data that has been encoded using
            either Group 3 or Group 4 CCITT facsimile (fax) encoding. CCITT encoding is
            designed to achieve efficient compression of monochrome (1 bit per pixel) image
            data at relatively low resolutions, and so is useful only for bitmap image data, not
            for color images, grayscale images, or general data.

            K < 0 --- Pure two-dimensional encoding (Group 4)
            K = 0 --- Pure one-dimensional encoding (Group 3, 1-D)
            K > 0 --- Mixed one- and two-dimensional encoding (Group 3, 2-D)
            """
            if xObject[obj]['/Filter'] == '/CCITTFaxDecode':
                if xObject[obj]['/DecodeParms']['/K'] == -1:
                    CCITT_group = 4
                else:
                    CCITT_group = 3
                width = xObject[obj]['/Width']
                height = xObject[obj]['/Height']
                data = xObject[obj]._data  # sorry, getData() does not work for CCITTFaxDecode
                img_size = len(data)
                tiff_header = tiff_header_for_CCITT(width, height, img_size, CCITT_group)
                img_name = obj[1:] + '.tiff'
                with open(img_name, 'wb') as img_file:
                    img_file.write(tiff_header + data)
                #
                # import io
                # from PIL import Image
                # im = Image.open(io.BytesIO(tiff_header + data))
pdf_file.close()

*/
