利用jpedal进行pdf转换成jpeg jpg png tiff tif等格式的图片

项目中运用到pdf文件转换成image图片,开始时使用pdfbox开源库进行图片转换,但是转换出来的文件中含有部分乱码的情况.下面是pdfBox 的pdf转换图片的代码示例.

try{String password = null; int startPage = 1;String imageType = "jpg";File imageFile = new File("E:\\upload\\pdf\\0424\\Servlet."+ imageType);File pdfFile = new File("E:\\upload\\pdf\\0424\\Servlet.pdf");PDDocument document = PDDocument.load(pdfFile);endPage = document.getPageCount();PDFImageWriter imageWriter = new PDFImageWriter();imageWriter.writeImage(document,imageType,password,startPage, endPage,imageFile.getAbsolutePath());document.close();}catch(IOException e){e.printStackTrace();}

比较了其他的开源库之后,准备采用jpedal。但是jpedal的治疗非常少，除了官方网站外，即使是英文资料也很少。而且官方提供的代码示例中的一些方法在的lgpl授权的

jpeal的代码库中不存在。下面是收集到的一些资料

1、jpedal文档：/org/jpedal/PdfDecoder.html

2、简单调用示例：/java-pdf-code-faq/#pdf2img

3、lgpl授权的jpedal库的下载地址：/projects/jpedal/

4、转换示例示例地址：/samplecode/org/jpedal/examples/images/ConvertPagesToImages.java.html

5、高清图片转换示例地址：/samplecode/org/jpedal/examples/images/ConvertPagesToHiResImages.java.html

于是稍微修改了官方的转换示例，下面是经过测试可以使用的转换代码

import .mon;import com.sun.imageio.plugins.jpeg.JPEGImageWriter;import org.jpedal.*;import org.jpedal.color.ColorSpaces;import org.jpedal.constants.PageInfo;import org.jpedal.exception.PdfException;import org.jpedal.external.Options;import org.jpedal.fonts.FontMappings;import org.jpedal.objects.PdfFileInformation;import org.jpedal.utils.LogWriter;import org.w3c.dom.Element;import javax.imageio.IIOImage;import javax.imageio.ImageIO;import javax.imageio.ImageTypeSpecifier;import javax.imageio.metadata.IIOMetadata;import javax.imageio.plugins.jpeg.JPEGImageWriteParam;import javax.imageio.stream.ImageOutputStream;import java.awt.*;import java.awt.image.BufferedImage;import java.io.*;import java.util.Iterator;public class ConvertPagesToImages{/*** show if image transparent */boolean isTransparent=false;/**output where we put files */private String user_dir = System.getProperty("user.dir");/**use 96 dpi as default so pages correct size (72 will be smaller) */private float pageScaling =1.33f;/**flag to show if we print messages */public static boolean outputMessages = false;String output_dir=null;/**correct separator for OS */String separator = System.getProperty("file.separator");/**the decoder object which decodes the pdf and returns a data object */PdfDecoder decode_pdf = null;//type of image to save thumbnailsprivate String format = "png";/** holding all creators that produce OCR pdf's ocr*/private String[] ocr = {"TeleForm"};/**scaling to use - default is 100 percent */private int scaling=100;/**file password or null */private String password=null;//only used if between 0 and 1 private float JPEGcompression=-1f;private int pageCount = 0;public ConvertPagesToImages() { }public void init(String file_name, int scaling, String format, String output_dir, String password, int pageCount){/*缩小比率*/this.scaling = scaling;/*图片格式*/this.format = format;/*输出目录*/this.output_dir = output_dir;/*pdf密码*/this.password = password;/*输出图片数*/this.pageCount = pageCount;/*判断文件是否存在*/File pdf_file = new File(file_name);if (!pdf_file.exists()) {System.out.println("File " + pdf_file + " not found");System.out.println("May need full path");return;}extraction(file_name, output_dir); }private void extraction(String file_name, String output_dir) { this.output_dir=output_dir;if (!user_dir.endsWith(separator)){user_dir = user_dir + separator;}if (file_name.toLowerCase().endsWith(".pdf")) {if(output_dir==null){output_dir=user_dir + "thumbnails" + separator;}decodeFile(file_name,output_dir);} else {String[] files = null;File inputFiles;if (!file_name.endsWith(separator)){file_name = file_name + separator;}try {inputFiles = new File(file_name);if (!inputFiles.isDirectory()) {System.err.println(file_name + " is not a directory. Exiting program");}else{files = inputFiles.list();}} catch (Exception ee) {LogWriter.writeLog("Exception trying to access file " + ee.getMessage());}if(files!=null){for (String file : files) {if (file.toLowerCase().endsWith(".pdf")) {if (outputMessages){System.out.println(file_name + file);}decodeFile(file_name + file, output_dir);}}}}if(outputMessages){System.out.println("Thumbnails created");}}/*** routine to decode a file */private void decodeFile(String file_name,String output_dir) {String name = "demo"; //set a default just in caseint pointer = file_name.lastIndexOf(separator);if(pointer==-1){pointer = file_name.lastIndexOf('/');}if (pointer != -1){name = file_name.substring(pointer + 1, file_name.length() - 4);}else if((file_name.toLowerCase().endsWith(".pdf"))){name=file_name.substring(0,file_name.length()-4);}//fix for odd files on Linux created when you view pagesif(name.startsWith(".")){return;}//create output dir for imagesif(output_dir==null){output_dir = user_dir + "thumbnails" + separator ;}//PdfDecoder returns a PdfException if there is a problemtry {if(decode_pdf==null){decode_pdf = new PdfDecoder(true);}/**optional JAI code for faster rendering*/org.jpedal.external.ImageHandler myExampleImageHandler=new org.jpedal.examples.handlers.ExampleImageDrawOnScreenHandler();decode_pdf.addExternalHandler(myExampleImageHandler, Options.ImageHandler);//mappings for non-embedded fonts to useFontMappings.setFontReplacements();//true as we are rendering pagedecode_pdf.setExtractionMode(0, pageScaling);//don't bother to extract text and images/*** open the file (and read metadata including pages in file)*/if (outputMessages){System.out.println("Opening file :" + file_name);}if(password != null && password != ""){decode_pdf.openPdfFile(file_name,password);}else{decode_pdf.openPdfFile(file_name);}} catch (Exception e) {System.err.println("8.Exception " + e + " in pdf code in "+file_name);}/*** extract data from pdf (if allowed).*/if(decode_pdf.isEncrypted() && !decode_pdf.isFileViewable()){throw new RuntimeException("Wrong password password used=>"+password+ '<');}else if ((decode_pdf.isEncrypted()&&(!decode_pdf.isPasswordSupplied())) && (!decode_pdf.isExtractionAllowed())) {throw new RuntimeException("Extraction not allowed");} else {extractPageAsImage(file_name, output_dir, name, isTransparent);}/**close the pdf file */decode_pdf.closePdfFile(); }private void extractPageAsImage(String file_name, String output_dir, String name, boolean isTransparent) { //create a directory if it doesn't existFile output_path = new File(output_dir);if (!output_path.exists()){output_path.mkdirs();}boolean isSingleOutputFile=false;boolean compressTiffs = false;String rawJPEGComp = null; String jpgFlag = "96"; //page rangeint start = 1, end = decode_pdf.getPageCount();end = (pageCount == 0) ? end : pageCount;if (outputMessages){System.out.println("Thumbnails will be in " + output_dir);}try {BufferedImage[] multiPages = new BufferedImage[1 + (end - start)];for (int page = start; page < end + 1; page++){getPage(output_dir, name, isTransparent, isSingleOutputFile,rawJPEGComp, jpgFlag, compressTiffs, start, end,multiPages, page);}} catch (Exception e) {decode_pdf.closePdfFile();throw new RuntimeException("Exception " + e.getMessage()+" with thumbnails on File="+file_name);}}private void getPage(String output_dir, String name, boolean isTransparent,boolean isSingleOutputFile, String rawJPEGComp, String jpgFlag,boolean compressTiffs, int start, int end,BufferedImage[] multiPages, int page) throws PdfException, IOException, FileNotFoundException {if (outputMessages ){System.out.println("Page " + page);}/*** 补0操作*/String pageAsString= String.valueOf(page);String maxPageSize= String.valueOf(end);int padding= maxPageSize.length()-pageAsString.length();for(int ii = 0; ii < padding; ii++){pageAsString = '0' + pageAsString;}String image_name;if(isSingleOutputFile){image_name =name;}else{image_name =name+"_page_" + pageAsString;}/*** get PRODUCER and if OCR disable text printing*/PdfFileInformation currentFileInformation = decode_pdf.getFileInformationData();String[] values=currentFileInformation.getFieldValues();String[] fields=PdfFileInformation.getFieldNames();for(int i=0;i<fields.length;i++){if(fields[i].equals("Creator")){for (String anOcr : ocr) { if (values[i].equals(anOcr)) { decode_pdf.setRenderMode(PdfDecoder.RENDERIMAGES); }}}}BufferedImage image_to_save;if(!isTransparent){image_to_save=decode_pdf.getPageAsImage(page);}else{ //use this if you want a transparent image image_to_save =decode_pdf.getPageAsTransparentImage(page);//java adds odd tint if you save this as JPEG which does not have transparency// so put as RGB on white background// (or save as PNG or TIFF which has transparency)// or just call decode_pdf.getPageAsImage(page)if(image_to_save!=null && format.toLowerCase().startsWith("jp")){BufferedImage rawVersion=image_to_save;int w=rawVersion.getWidth(), h=rawVersion.getHeight();//blank canvasimage_to_save = new BufferedImage(w,h , BufferedImage.TYPE_INT_RGB);//Graphics2D g2 = image_to_save.createGraphics();//white backgroundg2.setPaint(Color.WHITE);g2.fillRect(0,0,w,h);//paint on imageg2.drawImage(rawVersion, 0, 0,null);}}/*if just gray we can reduce memory usage by converting image to Grayscale@SuppressWarnings("rawtypes")Iterator colorspacesUsed = decode_pdf.getPageInfo(PageInfo.COLORSPACES);int nextID;boolean isGrayOnly=colorspacesUsed!=null; //assume true and disprovewhile(colorspacesUsed!=null && colorspacesUsed.hasNext()){nextID= (Integer) (colorspacesUsed.next());if(nextID!= ColorSpaces.DeviceGray && nextID!=ColorSpaces.CalGray){isGrayOnly=false;}}//draw onto GRAY image to reduce colour depthif(isGrayOnly){BufferedImage image_to_save2=new BufferedImage(image_to_save.getWidth(),image_to_save.getHeight(), BufferedImage.TYPE_BYTE_GRAY);image_to_save2.getGraphics().drawImage(image_to_save,0,0,null);image_to_save = image_to_save2;}//put image in array if multi-imagesif(isSingleOutputFile){multiPages[page-start] = image_to_save;}if (image_to_save != null) {/**BufferedImage does not support any dpi concept. A higher dpi can be created* using JAI to convert to a higher dpi image*///shrink the page to 50% with graphics2D transformation//- add your own parameters as needed//you may want to replace null with a hints object if you//want to fine tune quality./** example 1 biliniear scalingAffineTransform scale = new AffineTransform();scale.scale(.5, .5); //50% as a decimalAffineTransformOp scalingOp =new AffineTransformOp(scale, null);image_to_save =scalingOp.filter(image_to_save, null);*//** example 2 bicubic scaling - better quality but slowerto preserve aspect ratio set newWidth or newHeight to -1*//**allow user to specify maximum dimension for thumbnail*/int maxDimension = -1;if(scaling!=100 || maxDimension != -1){int newWidth=image_to_save.getWidth()*scaling/100;int newHeight=image_to_save.getHeight()*scaling/100;Image scaledImage;if(maxDimension != -1 && (newWidth > maxDimension || newHeight > maxDimension)){if(newWidth > newHeight){newWidth = maxDimension;scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH);} else {newHeight = maxDimension;scaledImage= image_to_save.getScaledInstance(-1,newHeight,BufferedImage.SCALE_SMOOTH);}} else {scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH);}if(format.toLowerCase().startsWith("jp")){image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null) , BufferedImage.TYPE_INT_RGB);}else{image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null) , BufferedImage.TYPE_INT_ARGB);}Graphics2D g2 = image_to_save.createGraphics();g2.drawImage(scaledImage, 0, 0,null);}if (format.startsWith("jp")) {saveAsJPEG(jpgFlag, image_to_save, JPEGcompression, new BufferedOutputStream(new FileOutputStream(output_dir + pageAsString + image_name + '.' + format)));} else {//save imagedecode_pdf.getObjectStore().saveStoredImage(output_dir + pageAsString + image_name,image_to_save,true,false,format);} }//flush images in case we do more than 1 page so only contains//images from current pagedecode_pdf.flushObjectValues(true); }private static void saveAsJPEG(String jpgFlag,BufferedImage image_to_save, float JPEGcompression, BufferedOutputStream fos) throws IOException {JPEGImageWriter imageWriter = (JPEGImageWriter) ImageIO.getImageWritersBySuffix("jpeg").next();ImageOutputStream ios = ImageIO.createImageOutputStream(fos);imageWriter.setOutput(ios);IIOMetadata imageMetaData = imageWriter.getDefaultImageMetadata(new ImageTypeSpecifier(image_to_save), null);if (Common.isInteger(jpgFlag)){int dpi = 96;try {dpi = Integer.parseInt(jpgFlag);} catch (Exception e) {e.printStackTrace();}Element tree = (Element) imageMetaData.getAsTree("javax_imageio_jpeg_image_1.0");Element jfif = (Element)tree.getElementsByTagName("app0JFIF").item(0);jfif.setAttribute("Xdensity", Integer.toString(dpi));jfif.setAttribute("Ydensity", Integer.toString(dpi));}JPEGImageWriteParam jpegParams = (JPEGImageWriteParam) imageWriter.getDefaultWriteParam();if(JPEGcompression>=0 && JPEGcompression<=1f){jpegParams.setCompressionMode(JPEGImageWriteParam.MODE_EXPLICIT);jpegParams.setCompressionQuality(JPEGcompression);}imageWriter.write(imageMetaData, new IIOImage(image_to_save, null, null), jpegParams);ios.close();imageWriter.dispose();}public static void main(String[] args) { long start=System.currentTimeMillis();String pdfPath = "E:\\upload\\pdf\\0424\\Servlet.pdf";int scaling = -1;String format = "jpg";String output_dir = "E:\\upload\\pdf\\0424\\jpg\\";String password = null;int pageCount = 10;ConvertPagesToImages convertPagesToImages = new ConvertPagesToImages();convertPagesToImages.init(pdfPath, scaling, format, output_dir, password, pageCount);System.out.println("花费时间为="+(System.currentTimeMillis()-start)/1000 + "秒");} }

功能说明：

1、支持对文件夹下的所有pdf转换成图片，同时也支持对单个pdf进行转换操作。

2、支持转换成jpg，jpeg，tiff，tif，png格式的图片

3、支持指定转换的图片数。

4、支持指定图片的存储位置

传入参数说明

1、pdfPath pdf文件绝对路径，可以是pdf所在的目录也可以是pdf文件路径

2、format 图片格式 (支持jpg,jpeg,tiff,png) ，传参时不能带有点号

3、scaling 图片比率从1到100(100 = 全尺寸) 支持设置为-1 将保持高质量

4、output_dir 输出路径，输出路径为绝对路径

5、password 文件密码若没有传入null值