EMMA Coverage Report

EMMA Coverage Report (generated Sat Oct 08 11:41:37 CEST 2011)
[all classes][net.sf.jomic.tools]

COVERAGE SUMMARY FOR SOURCE FILE [ExtractPdfImagesTask.java]

name	class, %	method, %	block, %	line, %
ExtractPdfImagesTask.java	100% (1/1)	100% (4/4)	79% (270/342)	86% (66.6/77)

COVERAGE BREAKDOWN BY CLASS AND METHOD

name	class, %	method, %	block, %	line, %

class ExtractPdfImagesTask	100% (1/1)	100% (4/4)	79% (270/342)	86% (66.6/77)
start (): void		100% (1/1)	68% (153/224)	77% (35.6/46)
ExtractPdfImagesTask (): void		100% (1/1)	95% (19/20)	99% (5/5)
ExtractPdfImagesTask (File, File): void		100% (1/1)	100% (16/16)	100% (5/5)
getTargetImageNames (List): List		100% (1/1)	100% (82/82)	100% (21/21)

1	// Jomic - a viewer for comic book archives.
2	// Copyright (C) 2004-2011 Thomas Aglassinger
3	//
4	// This program is free software: you can redistribute it and/or modify
5	// it under the terms of the GNU General Public License as published by
6	// the Free Software Foundation, either version 3 of the License, or
7	// (at your option) any later version.
8	//
9	// This program is distributed in the hope that it will be useful,
10	// but WITHOUT ANY WARRANTY; without even the implied warranty of
11	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	// GNU General Public License for more details.
13	//
14	// You should have received a copy of the GNU General Public License
15	// along with this program. If not, see <http://www.gnu.org/licenses/>.
16	package net.sf.jomic.tools;
17
18	import java.io.File;
19	import java.io.IOException;
20	import java.text.DecimalFormat;
21	import java.util.ArrayList;
22	import java.util.Iterator;
23	import java.util.List;
24	import java.util.Map;
25
26	import org.apache.commons.logging.Log;
27	import org.apache.commons.logging.LogFactory;
28	import org.apache.pdfbox.pdmodel.PDDocument;
29	import org.apache.pdfbox.pdmodel.PDPage;
30	import org.apache.pdfbox.pdmodel.PDResources;
31	import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;
32
33	/**
34	* Task to extract all images from a PDF file to a specified target folder. The images will be
35	* named "00.jpg", "01.jpg" and so on, with the actual number of digits depending on the number of
36	* images in the PDF.
37	*
38	* @author Thomas Aglassinger
39	*/
40	public class ExtractPdfImagesTask extends AbstractTask
41	{
42	private static final double WEIGHT_IMAGE_NAMES = 0.10;
43	private static final double WEIGHT_LOAD = 0.10;
44	private FileTools fileTools;
45
46	private Log logger;
47	private File pdfFile;
48	private StringTools stringTools;
49	private File targetFolder;
50
51	public ExtractPdfImagesTask(File newPdfFile, File newTargetFolder) {
52	this();
53	pdfFile = newPdfFile;
54	targetFolder = newTargetFolder;
55	setMaxProgress(pdfFile.length() + 1);
56	}
57
58	private ExtractPdfImagesTask() {
59	super();
60	logger = LogFactory.getLog(ExtractPdfImagesTask.class);
61	fileTools = FileTools.instance();
62	stringTools = StringTools.instance();
63	}
64
65	private List getTargetImageNames(List pages)
66	throws IOException {
67	List result = new ArrayList();
68	Iterator pageRider = pages.iterator();
69
70	while (pageRider.hasNext()) {
71	PDPage page = (PDPage) pageRider.next();
72	PDResources resources = page.getResources();
73	Map images = resources.getImages();
74
75	if (images != null) {
76	Iterator imageRider = images.values().iterator();
77
78	while (imageRider.hasNext()) {
79	PDXObjectImage image = (PDXObjectImage) imageRider.next();
80	String suffix = image.getSuffix();
81
82	result.add(suffix);
83	}
84	}
85	}
86
87	// Assign numeric names to all images, but preserve the original suffix.
88	int imageCount = result.size();
89
90	if (imageCount > 0) {
91	DecimalFormat format = stringTools.getLeadingZeroFormat(imageCount);
92
93	for (int i = 0; i < imageCount; i += 1) {
94	String name = format.format(i) + "." + result.get(i);
95
96	result.set(i, name);
97	}
98	}
99	return result;
100	}
101
102	public void start()
103	throws Exception {
104	setProgress(0);
105
106	boolean allFilesExtracted = false;
107	List targetImageNames = null;
108	PDDocument pdf = PDDocument.load(pdfFile);
109
110	setProgress(Math.round(WEIGHT_LOAD * getMaxProgress()));
111
112	try {
113	List pages = pdf.getDocumentCatalog().getAllPages();
114
115	targetImageNames = getTargetImageNames(pages);
116
117	int imageCount = targetImageNames.size();
118
119	setProgress(Math.round((WEIGHT_LOAD + WEIGHT_IMAGE_NAMES) * getMaxProgress()));
120
121	long progressWhenStartingToExtract = getProgress();
122	long maxExtractProgress = (getMaxProgress() - progressWhenStartingToExtract);
123	int imageIndex = 0;
124	Iterator pageRider = pages.iterator();
125
126	fileTools.mkdirs(targetFolder);
127	while (pageRider.hasNext()) {
128	PDPage page = (PDPage) pageRider.next();
129	PDResources resources = page.getResources();
130	Map images = resources.getImages();
131
132	if (images != null) {
133	Iterator imageRider = images.values().iterator();
134
135	while (imageRider.hasNext()) {
136	PDXObjectImage image = (PDXObjectImage) imageRider.next();
137	File imageFile = new File(targetFolder, (String) targetImageNames.get(imageIndex));
138	String name = imageFile.getAbsolutePath();
139
140	if (logger.isInfoEnabled()) {
141	logger.info("extracting image: " + stringTools.sourced(name));
142	}
143	image.write2file(fileTools.getWithoutLastSuffix(name));
144	imageIndex += 1;
145
146	double imagesProcessedRatio = ((double) imageIndex) / imageCount;
147
148	setProgress(progressWhenStartingToExtract
149	+ Math.round(imagesProcessedRatio * maxExtractProgress));
150	}
151	}
152	allFilesExtracted = !isInterrupted();
153	}
154	} finally {
155	pdf.close();
156	if (!allFilesExtracted && (targetImageNames != null)) {
157	if (logger.isInfoEnabled()) {
158	logger.info("removing files extracted so far");
159	}
160	Iterator fileRider = targetImageNames.iterator();
161
162	// TODO: Also remove folder structure generated by extracted files.
163	while (fileRider.hasNext()) {
164	String fileNameToDelete = (String) fileRider.next();
165	File fileToDelete = new File(targetFolder, fileNameToDelete);
166
167	fileToDelete.delete();
168	}
169
170	}
171	}
172	setProgress(getMaxProgress());
173	}
174	}

[all classes][net.sf.jomic.tools]

EMMA 2.0.4217 (C) Vladimir Roubtsov