Monday, 6 August 2012

Use Apache PDFBox convert PDF to image (support BMP,bmp,jpeg,wbmp,gif,png,JPG,jpg,JPEG,WBMP)

  • dependency
  • 
       org.apache.pdfbox
       pdfbox
       1.7.0
    
    
       org.bouncycastle
       bcprov-jdk15
       1.46
    
    
       org.bouncycastle
       bcmail-jdk15
       1.46
    
    
       org.apache.pdfbox
       fontbox
       1.7.0
                       
    
  • Code
  • import au.gov.nsw.police.nodi.common.CustomProperties;
    import org.apache.pdfbox.exceptions.CryptographyException;
    import org.apache.pdfbox.exceptions.InvalidPasswordException;
    import org.apache.pdfbox.pdmodel.PDDocument;
    import org.apache.pdfbox.pdmodel.PDPage;
    import org.apache.pdfbox.util.PDFImageWriter;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    import org.w3c.dom.Node;
    import org.w3c.dom.NodeList;
    
    import javax.imageio.*;
    import javax.imageio.metadata.IIOInvalidTreeException;
    import javax.imageio.metadata.IIOMetadata;
    import javax.imageio.metadata.IIOMetadataNode;
    import java.awt.*;
    import java.awt.image.BufferedImage;
    import java.awt.image.RenderedImage;
    import java.io.ByteArrayInputStream;
    import java.io.IOException;
    import java.util.Iterator;
    import java.util.List;
    
    import static com.google.common.base.Throwables.getStackTraceAsString;
    
    public class PDFToImage {
        private static final Logger log = LoggerFactory.getLogger(PDFToImage.class);
        private static final String STANDARD_METADATA_FORMAT = "javax_imageio_1.0";
        private static final String PDF_ENCRYPTED_PASSWORD = CustomProperties.getInstance().getProperty("pdf.from.esb.encrypted.password");
        private static final String IMAGE_FORMAT = "png";
        private static final int DEFAULT_IMAGE_RESOLUTION = 256;
    
        public static RenderedImage convertPdfOfGivenPageNumberToRenderImage(PDDocument document, int pageNumber) throws IOException {
            try {
                decryptDocument(PDF_ENCRYPTED_PASSWORD, document);
    
                int imageType = BufferedImage.TYPE_INT_RGB;
                int resolution = DEFAULT_IMAGE_RESOLUTION;
                try {
                    int screenResolution = Toolkit.getDefaultToolkit().getScreenResolution();
                    if (screenResolution > resolution) {
                        resolution = screenResolution;
                    }
                } catch (HeadlessException e) {
                    log.debug("As it can't get the screen resolution. Use default resolution: {}", resolution);
                }
    
                List pages = document.getDocumentCatalog().getAllPages();
                PDPage page = (PDPage) pages.get(pageNumber);
                BufferedImage image = page.convertToImage(imageType, resolution);
                return covertBufferedImageToRenderImage(image, IMAGE_FORMAT, resolution);
            } finally {
                if (document != null) {
                    document.close();
                }
            }
        }
    
        private static void decryptDocument(String password, PDDocument document) throws IOException {
            if (document.isEncrypted()) {
                try {
                    document.decrypt(password);
                } catch (InvalidPasswordException e) {
                    log.error("Error: The document is encrypted. Please provide correct PDF_ENCRYPTED_PASSWORD. Cause:\n{}", getStackTraceAsString(e));
                } catch (CryptographyException e) {
                    log.error(getStackTraceAsString(e));
                }
            }
        }
    
        private static RenderedImage covertBufferedImageToRenderImage(BufferedImage image, String imageFormat, int resolution) throws IOException {
            ImageWriter imageWriter = null;
            Iterator imageWriterIterator = ImageIO.getImageWritersByFormatName(imageFormat);
            if (imageWriterIterator.hasNext()) {
                try {
                    imageWriter = imageWriterIterator.next();
                    ImageWriteParam writerParams = imageWriter.getDefaultWriteParam();
                    if (writerParams.canWriteCompressed()) {
                        writerParams.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
                        // reset the compression type if overwritten by setCompressionMode
                        if (writerParams.getCompressionType() == null) {
                            writerParams.setCompressionType(writerParams.getCompressionTypes()[0]);
                        }
                        writerParams.setCompressionQuality(1.0f);
                    }
                    IIOMetadata meta = createMetadata(image, imageWriter, writerParams, resolution);
                    IIOImage iioImage = new IIOImage(image, null, meta);
                    return iioImage.getRenderedImage();
                } finally {
                    if (imageWriter != null) {
                        imageWriter.dispose();
                    }
                }
            }
    
            return null;
        }
    
        private static PDDocument loadPdfDocumentFromBytes(byte[] imageOfPdf) throws IOException {
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(imageOfPdf);
            return PDDocument.load(byteArrayInputStream);
    
        }
    
        private static PDDocument loadPdfDocumentFromFile(String fileName) throws IOException {
            return PDDocument.load(fileName);
        }
    
        //----------------------Copy from (start): org.apache.pdfbox.util.ImageIOUtil-----------------------------------
        private static IIOMetadata createMetadata(RenderedImage image, ImageWriter imageWriter, ImageWriteParam writerParams, int resolution) {
            ImageTypeSpecifier type;
            if (writerParams.getDestinationType() != null) {
                type = writerParams.getDestinationType();
            } else {
                type = ImageTypeSpecifier.createFromRenderedImage(image);
            }
            IIOMetadata meta = imageWriter.getDefaultImageMetadata(type, writerParams);
            return (addResolution(meta, resolution) ? meta : null);
        }
    
        private static boolean addResolution(IIOMetadata meta, int resolution) {
            if (!meta.isReadOnly() && meta.isStandardMetadataFormatSupported()) {
                IIOMetadataNode root = (IIOMetadataNode) meta.getAsTree(STANDARD_METADATA_FORMAT);
                IIOMetadataNode dimension = getChildNode(root, "Dimension");
                IIOMetadataNode horizontalPixelSize = getChildNode(dimension, "HorizontalPixelSize");
                String pixelSize = Double.toString(resolution / 25.4);
                horizontalPixelSize.setAttribute("value", pixelSize);
    
                IIOMetadataNode verticalPixelSize = getChildNode(dimension, "VerticalPixelSize");
                verticalPixelSize.setAttribute("value", pixelSize);
                try {
                    meta.mergeTree(STANDARD_METADATA_FORMAT, root);
                } catch (IIOInvalidTreeException e) {
                    throw new RuntimeException("Cannot update image metadata: " + e.getMessage());
                }
                return true;
            }
            return false;
        }
    
    
        private static IIOMetadataNode getChildNode(IIOMetadataNode parentNode, String childNodeName) {
            NodeList nodes = parentNode.getChildNodes();
            for (int i = 0; i < nodes.getLength(); i++) {
                Node child = nodes.item(i);
                if (childNodeName.equals(child.getNodeName())) {
                    return (IIOMetadataNode) child;
                }
            }
    
            return createChildNodeIfNotExist(parentNode, childNodeName);
        }
    
    
        private static IIOMetadataNode createChildNodeIfNotExist(IIOMetadataNode parentNode, String childNodeName) {
            IIOMetadataNode childNode = new IIOMetadataNode(childNodeName);
            parentNode.appendChild(childNode);
            return childNode;
        }
        //----------------------Copy from (end): org.apache.pdfbox.util.ImageIOUtil-----------------------------------
    
        public static void main(String[] args) throws IOException {
            String pdfFile = "c:/temp/test_avo.pdf";
            String outputPrefix = "c:/temp/";
    
            PDDocument document = loadPdfDocumentFromFile(pdfFile);
            int numberOfPages = document.getNumberOfPages();
            for (int i = 0; i < numberOfPages; i++) {
                int pageNumber = i + 1;
                RenderedImage renderedImage = convertPdfOfGivenPageNumberToRenderImage(document, pageNumber);
                // render image to ui or
            }
    
            // if you just want to covert pdf file to image file, it's much easier.
            PDFImageWriter imageWriter = new PDFImageWriter();
            boolean success = imageWriter.writeImage(document, IMAGE_FORMAT, password, startPage, endPage, outputPrefix, BufferedImage.TYPE_INT_RGB, resolution);
            if (!success) {
                System.err.println("Error: no writer found for image format '" + IMAGE_FORMAT + "'");
                System.exit(1);
            }
        }
    
  • You can use following code to detected which image format is support
  • public static String getImageFormats() {
            StringBuffer retval = new StringBuffer();
            String[] formats = ImageIO.getReaderFormatNames();
            for (int i = 0; i < formats.length; i++) {
                retval.append(formats[i]);
                if (i + 1 < formats.length) {
                    retval.append(",");
                }
            }
            return retval.toString();
        }
    
  • Dependencies can be download here
  • Others
  • Tiff image need native libary. You can find more information on java.net

4 comments:

  1. I believe you have covered everything in detail. Hey! why don't you try this JPG to PDF converter. It is free and is compatible with multiple image formats.

    ReplyDelete
  2. This tool is great, it implements a lot of pdf to conversion between various formats, it can be said that it is very versatile, and the conversion of pdf to image is very easy to use.

    ReplyDelete