web-dev-qa-db-fra.com

Convertir un PDF fichier à l'image

Je voulais convertir le document PDF en image. J'utilisais Ghost4j.

Problème: Ghost4J a besoin du fichier gsdll32.dll au moment de l'exécution et je fais pas vouloir utiliser le fichier DLL.

_ {Question 1:} _ y a-t-il un moyen, dans ghost4j, de convertir une image sans la dll?

Question 2: J'ai trouvé la solution dans l'API de PDFBox. org.Apache.pdfbox.pdmodel.PDPagep have methodconvertToImage () `qui convertit la page PDF au format Image.

PDDocument doc = PDDocument.load(new File("/document.pdf"));
List<PDPage>pages =  doc.getDocumentCatalog().getAllPages();
PDPage page = pages.get(0);
BufferedImage image =page.convertToImage();
File outputfile = new File("/image.png");
ImageIO.write(image, "png", outputfile);
doc.close();

Je n'ai que du texte sur le document PDF. et j'ai cette exception quand je lance ce code:

Aug 12, 2013 6:00:24 PM org.Apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: BDC
Exception in thread "main" Java.lang.ExceptionInInitializerError
    at org.Apache.pdfbox.pdmodel.font.PDTrueTypeFont.getawtFont(PDTrueTypeFont.Java:481)
    at org.Apache.pdfbox.pdmodel.font.PDSimpleFont.drawString(PDSimpleFont.Java:109)
    at org.Apache.pdfbox.pdfviewer.PageDrawer.processTextPosition(PageDrawer.Java:235)
    at org.Apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.Java:496)
    at org.Apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.Java:62)
    at org.Apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.Java:554)
    at org.Apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.Java:268)
    at org.Apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.Java:235)
    at org.Apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.Java:215)
    at org.Apache.pdfbox.pdfviewer.PageDrawer.drawPage(PageDrawer.Java:125)
    at org.Apache.pdfbox.pdmodel.PDPage.convertToImage(PDPage.Java:781)
    at org.Apache.pdfbox.pdmodel.PDPage.convertToImage(PDPage.Java:712)
    at ge.eid.esignature.adessa.pades.sign.PDFtoImage.main(PDFtoImage.Java:25)
Caused by: Java.lang.IllegalArgumentException
    at Java.nio.Buffer.position(Buffer.Java:216)
    at Sun.font.TrueTypeFont.lookupName(TrueTypeFont.Java:1153)
    at Sun.font.TrueTypeFont.getPostscriptName(TrueTypeFont.Java:1205)
    at Java.awt.Font.getPSName(Font.Java:1156)
    at org.Apache.pdfbox.pdmodel.font.FontManager.loadFonts(FontManager.Java:101)
    at org.Apache.pdfbox.pdmodel.font.FontManager.<clinit>(FontManager.Java:53)
    ... 13 more
18
grep

Vous pouvez facilement convertir les pages de fichiers 04-Request-Headers.pdf au format image.

Convertissez toutes les pages PDF au format image en Java à l’aide de la boîte PDF. 

Jar requis pdfbox-1.8.3.jar

ou la dépendance maven

<dependency>
    <groupId>org.Apache.pdfbox</groupId>
    <artifactId>pdfbox</artifactId>
    <version>1.8.3</version>
</dependency>

Voici la solution: 

package com.pdf.pdfbox.examples;

import Java.awt.image.BufferedImage;
import Java.io.File;
import Java.util.List;

import javax.imageio.ImageIO;

import org.Apache.pdfbox.pdmodel.PDDocument;
import org.Apache.pdfbox.pdmodel.PDPage;

@SuppressWarnings("unchecked")
public class ConvertPDFPagesToImages {
    public static void main(String[] args) {
        try {
        String sourceDir = "C:/Documents/04-Request-Headers.pdf"; // Pdf files are read from this folder
        String destinationDir = "C:/Documents/Converted_PdfFiles_to_Image/"; // converted images from pdf document are saved here

        File sourceFile = new File(sourceDir);
        File destinationFile = new File(destinationDir);
        if (!destinationFile.exists()) {
            destinationFile.mkdir();
            System.out.println("Folder Created -> "+ destinationFile.getAbsolutePath());
        }
        if (sourceFile.exists()) {
            System.out.println("Images copied to Folder: "+ destinationFile.getName());             
            PDDocument document = PDDocument.load(sourceDir);
            List<PDPage> list = document.getDocumentCatalog().getAllPages();
            System.out.println("Total files to be converted -> "+ list.size());

            String fileName = sourceFile.getName().replace(".pdf", "");             
            int pageNumber = 1;
            for (PDPage page : list) {
                BufferedImage image = page.convertToImage();
                File outputfile = new File(destinationDir + fileName +"_"+ pageNumber +".png");
                System.out.println("Image Created -> "+ outputfile.getName());
                ImageIO.write(image, "png", outputfile);
                pageNumber++;
            }
            document.close();
            System.out.println("Converted Images are saved at -> "+ destinationFile.getAbsolutePath());
        } else {
            System.err.println(sourceFile.getName() +" File not exists");
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
}
}

Conversions possibles de l'image au format jpg, jpeg, png, bmp, gif.

Remarque: J'ai mentionné les formats d'image principalement utilisés.

ImageIO.write(image , "jpg", new File( destinationDir +fileName+"_"+pageNumber+".jpg" ));
ImageIO.write(image , "jpeg", new File( destinationDir +fileName+"_"+pageNumber+".jpeg" ));
ImageIO.write(image , "png", new File( destinationDir +fileName+"_"+pageNumber+".png" ));
ImageIO.write(image , "bmp", new File( destinationDir +fileName+"_"+pageNumber+".bmp" ));
ImageIO.write(image , "gif", new File( destinationDir +fileName+"_"+pageNumber+".gif" ));

Sortie de la console:

Images copied to Folder: Converted_PdfFiles_to_Image
Total files to be converted -> 13
Aug 06, 2014 1:35:49 PM org.Apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: i
Image Created -> 04-Request-Headers_1.png
Aug 06, 2014 1:35:50 PM org.Apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: i
Image Created -> 04-Request-Headers_2.png
Aug 06, 2014 1:35:51 PM org.Apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: i
Image Created -> 04-Request-Headers_3.png
Aug 06, 2014 1:35:51 PM org.Apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: i
Image Created -> 04-Request-Headers_4.png
Aug 06, 2014 1:35:52 PM org.Apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: i
Image Created -> 04-Request-Headers_5.png
Aug 06, 2014 1:35:52 PM org.Apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: i
Image Created -> 04-Request-Headers_6.png
Aug 06, 2014 1:35:53 PM org.Apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: i
Image Created -> 04-Request-Headers_7.png
Aug 06, 2014 1:35:53 PM org.Apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: i
Image Created -> 04-Request-Headers_8.png
Aug 06, 2014 1:35:54 PM org.Apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: i
Image Created -> 04-Request-Headers_9.png
Aug 06, 2014 1:35:54 PM org.Apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: i
Image Created -> 04-Request-Headers_10.png
Aug 06, 2014 1:35:54 PM org.Apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: i
Image Created -> 04-Request-Headers_11.png
Aug 06, 2014 1:35:55 PM org.Apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: i
Image Created -> 04-Request-Headers_12.png
Aug 06, 2014 1:35:55 PM org.Apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: i
Image Created -> 04-Request-Headers_13.png
Converted Images are saved at -> C:\Documents\Converted_PdfFiles_to_Image
27
pudaykiran

Vous pouvez essayer d'utiliser NonSequentialParser pour éviter les erreurs avec certains fichiers PDF (avec les mises à jour incrémentielles):

PDDocument doc = PDDocument.loadNonSeq (nouveau fichier ("/ document.pdf"));

6
stanlyF

Le moyen de passer par PDFBox est un bon moyen d’éviter les liaisons natives . Essayez d’utiliser PDFImageWriter à partir de PDFBox. Utilisez l'écrivain avec.

PDFImageWriter.write(doc, "png", null, , Integer.MAX_VALUE, "picture");

Pour toutes les pages.

PDFImageWriter.write(doc, "png", null, 0, 0, "picture");

Voir: PDFImageWriter Javadoc

4
Xondio

Vous avez probablement essayé de convertir un fichier corrompu PDF. J'ai les mêmes erreurs lorsque le fichier PDF contient des flux JPXEncoded.

3
Malhotra

Vous pouvez facilement convertir PDF en image en utilisant PDFBox . renderImageWithDPI method of PDFRenderer class of PDFBox est utilisé pour convertir un fichier PDF en image.

PDDocument doc=PDDocument.load(new File("filepath/sample.pdf"));
PDFRenderer pdfRenderer = new PDFRenderer(doc);
BufferedImage bffim = pdfRenderer.renderImageWithDPI(pageNo, 300, ImageType.RGB);
        String fileName = "image-" + page + ".png";
        ImageIOUtil.writeImage(bim, fileName, 300);
1
Bittu Choudhary

Pour l'erreur:

org.Apache.pdfbox.util.PDFStreamEngine processOperator INFO: opération non prise en charge/désactivée

Vous devez inclure fontar-1.7.1 jar dans le chemin d'accès aux classes, à l'exception d'Apache pdfbox jar, qui résoudra votre problème, car PDFBox utilise en interne fontbox-1.7.1.

0
user2605874
 try {           
                PDDocument document = PDDocument.load(PdfInfo.getPDFWAY());
                if (document.isEncrypted()) {
                    document.decrypt(PdfInfo.getPASSWORD());
                }
                if ("bilevel".equalsIgnoreCase(PdfInfo.getCOLOR())) {
                    PdfInfo.setIMAGETYPE( BufferedImage.TYPE_BYTE_BINARY);
                } else if ("indexed".equalsIgnoreCase(PdfInfo.getCOLOR())) {
                    PdfInfo.setIMAGETYPE(BufferedImage.TYPE_BYTE_INDEXED);
                } else if ("gray".equalsIgnoreCase(PdfInfo.getCOLOR())) {
                    PdfInfo.setIMAGETYPE(BufferedImage.TYPE_BYTE_GRAY);
                } else if ("rgb".equalsIgnoreCase(PdfInfo.getCOLOR())) {
                    PdfInfo.setIMAGETYPE(BufferedImage.TYPE_INT_RGB);
                } else if ("rgba".equalsIgnoreCase(PdfInfo.getCOLOR())) {
                    PdfInfo.setIMAGETYPE(BufferedImage.TYPE_INT_ARGB);
                } else {
                    System.exit(2);
                }
                PDFImageWriter imageWriter = new PDFImageWriter();
                boolean success = imageWriter.writeImage(document, PdfInfo.getIMAGE_FORMAT(),PdfInfo.getPASSWORD(),
                        PdfInfo.getSTART_PAGE(),PdfInfo.getEND_PAGE(),PdfInfo.getOUTPUT_PREFIX(),PdfInfo.getIMAGETYPE(),PdfInfo.getRESOLUTION());
                if (!success) {
                    System.exit(1);
                }
                document.close();

        } catch (IOException | CryptographyException | InvalidPasswordException ex) {
            Logger.getLogger(PdfToImae.class.getName()).log(Level.SEVERE, null, ex);
        }
public class PdfInfo {
    private static String PDFWAY;    
    private static String OUTPUT_PREFIX;
    private static String PASSWORD;
    private static int START_PAGE=1;
    private static int END_PAGE=Integer.MAX_VALUE;
    private static String IMAGE_FORMAT="jpg";
    private static String COLOR="rgb";
    private static int RESOLUTION=256;
    private static int IMAGETYPE=24;
    private static String filename;
    private static String filePath="";
}
0
Vahap Gençdal