Monday, 6 August 2012

Use Apache PDFBox convert PDF to image (support BMP,bmp,jpeg,wbmp,gif,png,JPG,jpg,JPEG,WBMP)

  • dependency
  • 
       org.apache.pdfbox
       pdfbox
       1.7.0
    
    
       org.bouncycastle
       bcprov-jdk15
       1.46
    
    
       org.bouncycastle
       bcmail-jdk15
       1.46
    
    
       org.apache.pdfbox
       fontbox
       1.7.0
                       
    
  • Code
  • import au.gov.nsw.police.nodi.common.CustomProperties;
    import org.apache.pdfbox.exceptions.CryptographyException;
    import org.apache.pdfbox.exceptions.InvalidPasswordException;
    import org.apache.pdfbox.pdmodel.PDDocument;
    import org.apache.pdfbox.pdmodel.PDPage;
    import org.apache.pdfbox.util.PDFImageWriter;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    import org.w3c.dom.Node;
    import org.w3c.dom.NodeList;
    
    import javax.imageio.*;
    import javax.imageio.metadata.IIOInvalidTreeException;
    import javax.imageio.metadata.IIOMetadata;
    import javax.imageio.metadata.IIOMetadataNode;
    import java.awt.*;
    import java.awt.image.BufferedImage;
    import java.awt.image.RenderedImage;
    import java.io.ByteArrayInputStream;
    import java.io.IOException;
    import java.util.Iterator;
    import java.util.List;
    
    import static com.google.common.base.Throwables.getStackTraceAsString;
    
    public class PDFToImage {
        private static final Logger log = LoggerFactory.getLogger(PDFToImage.class);
        private static final String STANDARD_METADATA_FORMAT = "javax_imageio_1.0";
        private static final String PDF_ENCRYPTED_PASSWORD = CustomProperties.getInstance().getProperty("pdf.from.esb.encrypted.password");
        private static final String IMAGE_FORMAT = "png";
        private static final int DEFAULT_IMAGE_RESOLUTION = 256;
    
        public static RenderedImage convertPdfOfGivenPageNumberToRenderImage(PDDocument document, int pageNumber) throws IOException {
            try {
                decryptDocument(PDF_ENCRYPTED_PASSWORD, document);
    
                int imageType = BufferedImage.TYPE_INT_RGB;
                int resolution = DEFAULT_IMAGE_RESOLUTION;
                try {
                    int screenResolution = Toolkit.getDefaultToolkit().getScreenResolution();
                    if (screenResolution > resolution) {
                        resolution = screenResolution;
                    }
                } catch (HeadlessException e) {
                    log.debug("As it can't get the screen resolution. Use default resolution: {}", resolution);
                }
    
                List pages = document.getDocumentCatalog().getAllPages();
                PDPage page = (PDPage) pages.get(pageNumber);
                BufferedImage image = page.convertToImage(imageType, resolution);
                return covertBufferedImageToRenderImage(image, IMAGE_FORMAT, resolution);
            } finally {
                if (document != null) {
                    document.close();
                }
            }
        }
    
        private static void decryptDocument(String password, PDDocument document) throws IOException {
            if (document.isEncrypted()) {
                try {
                    document.decrypt(password);
                } catch (InvalidPasswordException e) {
                    log.error("Error: The document is encrypted. Please provide correct PDF_ENCRYPTED_PASSWORD. Cause:\n{}", getStackTraceAsString(e));
                } catch (CryptographyException e) {
                    log.error(getStackTraceAsString(e));
                }
            }
        }
    
        private static RenderedImage covertBufferedImageToRenderImage(BufferedImage image, String imageFormat, int resolution) throws IOException {
            ImageWriter imageWriter = null;
            Iterator imageWriterIterator = ImageIO.getImageWritersByFormatName(imageFormat);
            if (imageWriterIterator.hasNext()) {
                try {
                    imageWriter = imageWriterIterator.next();
                    ImageWriteParam writerParams = imageWriter.getDefaultWriteParam();
                    if (writerParams.canWriteCompressed()) {
                        writerParams.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
                        // reset the compression type if overwritten by setCompressionMode
                        if (writerParams.getCompressionType() == null) {
                            writerParams.setCompressionType(writerParams.getCompressionTypes()[0]);
                        }
                        writerParams.setCompressionQuality(1.0f);
                    }
                    IIOMetadata meta = createMetadata(image, imageWriter, writerParams, resolution);
                    IIOImage iioImage = new IIOImage(image, null, meta);
                    return iioImage.getRenderedImage();
                } finally {
                    if (imageWriter != null) {
                        imageWriter.dispose();
                    }
                }
            }
    
            return null;
        }
    
        private static PDDocument loadPdfDocumentFromBytes(byte[] imageOfPdf) throws IOException {
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(imageOfPdf);
            return PDDocument.load(byteArrayInputStream);
    
        }
    
        private static PDDocument loadPdfDocumentFromFile(String fileName) throws IOException {
            return PDDocument.load(fileName);
        }
    
        //----------------------Copy from (start): org.apache.pdfbox.util.ImageIOUtil-----------------------------------
        private static IIOMetadata createMetadata(RenderedImage image, ImageWriter imageWriter, ImageWriteParam writerParams, int resolution) {
            ImageTypeSpecifier type;
            if (writerParams.getDestinationType() != null) {
                type = writerParams.getDestinationType();
            } else {
                type = ImageTypeSpecifier.createFromRenderedImage(image);
            }
            IIOMetadata meta = imageWriter.getDefaultImageMetadata(type, writerParams);
            return (addResolution(meta, resolution) ? meta : null);
        }
    
        private static boolean addResolution(IIOMetadata meta, int resolution) {
            if (!meta.isReadOnly() && meta.isStandardMetadataFormatSupported()) {
                IIOMetadataNode root = (IIOMetadataNode) meta.getAsTree(STANDARD_METADATA_FORMAT);
                IIOMetadataNode dimension = getChildNode(root, "Dimension");
                IIOMetadataNode horizontalPixelSize = getChildNode(dimension, "HorizontalPixelSize");
                String pixelSize = Double.toString(resolution / 25.4);
                horizontalPixelSize.setAttribute("value", pixelSize);
    
                IIOMetadataNode verticalPixelSize = getChildNode(dimension, "VerticalPixelSize");
                verticalPixelSize.setAttribute("value", pixelSize);
                try {
                    meta.mergeTree(STANDARD_METADATA_FORMAT, root);
                } catch (IIOInvalidTreeException e) {
                    throw new RuntimeException("Cannot update image metadata: " + e.getMessage());
                }
                return true;
            }
            return false;
        }
    
    
        private static IIOMetadataNode getChildNode(IIOMetadataNode parentNode, String childNodeName) {
            NodeList nodes = parentNode.getChildNodes();
            for (int i = 0; i < nodes.getLength(); i++) {
                Node child = nodes.item(i);
                if (childNodeName.equals(child.getNodeName())) {
                    return (IIOMetadataNode) child;
                }
            }
    
            return createChildNodeIfNotExist(parentNode, childNodeName);
        }
    
    
        private static IIOMetadataNode createChildNodeIfNotExist(IIOMetadataNode parentNode, String childNodeName) {
            IIOMetadataNode childNode = new IIOMetadataNode(childNodeName);
            parentNode.appendChild(childNode);
            return childNode;
        }
        //----------------------Copy from (end): org.apache.pdfbox.util.ImageIOUtil-----------------------------------
    
        public static void main(String[] args) throws IOException {
            String pdfFile = "c:/temp/test_avo.pdf";
            String outputPrefix = "c:/temp/";
    
            PDDocument document = loadPdfDocumentFromFile(pdfFile);
            int numberOfPages = document.getNumberOfPages();
            for (int i = 0; i < numberOfPages; i++) {
                int pageNumber = i + 1;
                RenderedImage renderedImage = convertPdfOfGivenPageNumberToRenderImage(document, pageNumber);
                // render image to ui or
            }
    
            // if you just want to covert pdf file to image file, it's much easier.
            PDFImageWriter imageWriter = new PDFImageWriter();
            boolean success = imageWriter.writeImage(document, IMAGE_FORMAT, password, startPage, endPage, outputPrefix, BufferedImage.TYPE_INT_RGB, resolution);
            if (!success) {
                System.err.println("Error: no writer found for image format '" + IMAGE_FORMAT + "'");
                System.exit(1);
            }
        }
    
  • You can use following code to detected which image format is support
  • public static String getImageFormats() {
            StringBuffer retval = new StringBuffer();
            String[] formats = ImageIO.getReaderFormatNames();
            for (int i = 0; i < formats.length; i++) {
                retval.append(formats[i]);
                if (i + 1 < formats.length) {
                    retval.append(",");
                }
            }
            return retval.toString();
        }
    
  • Dependencies can be download here
  • Others
  • Tiff image need native libary. You can find more information on java.net

3 comments:

  1. I believe you have covered everything in detail. Hey! why don't you try this JPG to PDF converter. It is free and is compatible with multiple image formats.

    ReplyDelete