1 | // Jomic - a viewer for comic book archives. |
2 | // Copyright (C) 2004-2011 Thomas Aglassinger |
3 | // |
4 | // This program is free software: you can redistribute it and/or modify |
5 | // it under the terms of the GNU General Public License as published by |
6 | // the Free Software Foundation, either version 3 of the License, or |
7 | // (at your option) any later version. |
8 | // |
9 | // This program is distributed in the hope that it will be useful, |
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | // GNU General Public License for more details. |
13 | // |
14 | // You should have received a copy of the GNU General Public License |
15 | // along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | package net.sf.jomic.tools; |
17 | |
18 | import java.io.BufferedInputStream; |
19 | import java.io.BufferedOutputStream; |
20 | import java.io.File; |
21 | import java.io.FileOutputStream; |
22 | import java.io.IOException; |
23 | import java.io.InputStream; |
24 | import java.io.OutputStream; |
25 | import java.text.DecimalFormat; |
26 | import java.util.ArrayList; |
27 | import java.util.Arrays; |
28 | import java.util.Enumeration; |
29 | import java.util.HashSet; |
30 | import java.util.Iterator; |
31 | import java.util.List; |
32 | import java.util.Map; |
33 | import java.util.Set; |
34 | import java.util.zip.ZipException; |
35 | |
36 | import org.apache.commons.logging.Log; |
37 | import org.apache.commons.logging.LogFactory; |
38 | import org.apache.pdfbox.pdmodel.PDDocument; |
39 | import org.apache.pdfbox.pdmodel.PDPage; |
40 | import org.apache.pdfbox.pdmodel.PDResources; |
41 | import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage; |
42 | import org.apache.tools.zip.ZipEntry; |
43 | import org.apache.tools.zip.ZipFile; |
44 | |
45 | import de.innosystec.unrar.Archive; |
46 | import de.innosystec.unrar.exception.RarException; |
47 | import de.innosystec.unrar.rarfile.FileHeader; |
48 | |
49 | /** |
50 | * An archive containing files. Currently supported formats are ZIP and RAR.<p> |
51 | * |
52 | * For hints and implementation details on various archive formats, see for example Marco Schmidt's |
53 | * list of <a href="http://www.geocities.com/marcoschmidt.geo/archive-file-formats.html">archive |
54 | * file formats and archivers</a> . |
55 | * |
56 | * @author Thomas Aglassinger |
57 | */ |
58 | public class FileArchive |
59 | { |
60 | private static final int BUFFER_SIZE = 4096; |
61 | |
62 | private File archiveFile; |
63 | private String baseName; |
64 | private FileTools fileTools; |
65 | private String fileType; |
66 | private LocaleTools localeTools; |
67 | private Log logger; |
68 | private long progress; |
69 | private StringTools stringTools; |
70 | |
71 | public FileArchive(File newFile) |
72 | throws IOException { |
73 | this(); |
74 | assert newFile != null; |
75 | |
76 | archiveFile = newFile; |
77 | |
78 | String suffix = fileTools.getSuffix(archiveFile); |
79 | String archiveName = archiveFile.getName(); |
80 | |
81 | baseName = archiveName.substring(0, archiveName.length() - suffix.length() - 1); |
82 | if (logger.isDebugEnabled()) { |
83 | logger.debug("baseName = \"" + baseName + "\""); |
84 | } |
85 | |
86 | fileType = fileTools.obtainComicFormat(archiveFile); |
87 | } |
88 | |
89 | private FileArchive() { |
90 | super(); |
91 | logger = LogFactory.getLog(FileArchive.class); |
92 | fileTools = FileTools.instance(); |
93 | localeTools = LocaleTools.instance(); |
94 | stringTools = StringTools.instance(); |
95 | } |
96 | |
97 | /** |
98 | * Get the plain name of the archive file, without directory and suffix. Example: |
99 | * "/Users/me/Comics/blah.cbz" yields "blah". |
100 | */ |
101 | public String getBaseName() { |
102 | return baseName; |
103 | } |
104 | |
105 | /** |
106 | * Get the file type of the archive. |
107 | * |
108 | * @return one of: FileTools.FORMAT_ZIP, FileTools.FORMAT_RAR, FileTools.FORMAT_PDF |
109 | */ |
110 | public String getFileType() { |
111 | return fileType; |
112 | } |
113 | |
114 | /** |
115 | * The referenced file as <code>File</code> using a relative path. |
116 | */ |
117 | private File getFile(FileHeader fileHeader) { |
118 | // TODO: Move to FileHeader. |
119 | File result; |
120 | String path; |
121 | |
122 | if (fileHeader.isUnicode()) { |
123 | path = fileHeader.getFileNameW(); |
124 | } else { |
125 | path = fileHeader.getFileNameString(); |
126 | } |
127 | |
128 | String[] pathParts = path.split("\\\\"); |
129 | |
130 | if (pathParts.length == 0) { |
131 | result = new File(""); |
132 | } else { |
133 | result = new File(pathParts[0]); |
134 | for (int indexOfPathPartToAdd = 1; indexOfPathPartToAdd < pathParts.length; indexOfPathPartToAdd += 1) { |
135 | result = new File(result, pathParts[indexOfPathPartToAdd]); |
136 | } |
137 | } |
138 | assert result != null; |
139 | return result; |
140 | } |
141 | |
142 | private PDDocument getPdfDocument() |
143 | throws IOException { |
144 | PDDocument result = PDDocument.load(archiveFile); |
145 | |
146 | return result; |
147 | } |
148 | |
149 | /** |
150 | * Extract to <code>targetDir</code> all files in <code>fileNamesToExtract</code>. If <code>progressFrame</code> |
151 | * is not null, advance its progress bar by <code>progressIncrement</code> for every file that |
152 | * is extracted. |
153 | */ |
154 | public void extract( |
155 | File targetDir, |
156 | String[] fileNamesToExtract, |
157 | ProgressFrame progressFrame, |
158 | long progressIncrement) |
159 | throws IOException { |
160 | assert targetDir != null; |
161 | assert fileNamesToExtract != null; |
162 | |
163 | // TODO: Proportional progress bar depending on archive size in bytes. |
164 | if (progressFrame != null) { |
165 | String message = localeTools.getMessage("progress.extracting"); |
166 | |
167 | progressFrame.setNote(message); |
168 | } |
169 | boolean extracted = false; |
170 | |
171 | try { |
172 | if (fileType.equals(FileTools.FORMAT_PDF)) { |
173 | extractPdf(targetDir, fileNamesToExtract, progressFrame, progressIncrement); |
174 | } else if (fileType.equals(FileTools.FORMAT_RAR)) { |
175 | extractRar(targetDir, fileNamesToExtract, progressFrame, progressIncrement); |
176 | } else if (fileType.equals(FileTools.FORMAT_ZIP)) { |
177 | extractZip(targetDir, fileNamesToExtract, progressFrame, progressIncrement); |
178 | } else { |
179 | assert false : "fileType = " + fileType; |
180 | } |
181 | extracted = true; |
182 | } finally { |
183 | if (!extracted) { |
184 | logger.warn("extracting failed; removing possibly existing files"); |
185 | if (progressFrame != null) { |
186 | String message = localeTools.getMessage("progress.cleaningUp"); |
187 | |
188 | progressFrame.setNote(message); |
189 | } |
190 | removeExtracted(targetDir); |
191 | } |
192 | } |
193 | } |
194 | |
195 | public void extract(File targetDir, String[] fileNamesToExtract) |
196 | throws IOException { |
197 | extract(targetDir, fileNamesToExtract, null, -1); |
198 | } |
199 | |
200 | public String[] list() |
201 | throws IOException { |
202 | String[] result; |
203 | List archiveContents; |
204 | |
205 | // TODO: ProgressFrame.setNote("Examining...");? |
206 | if (fileType.equals(FileTools.FORMAT_RAR)) { |
207 | archiveContents = listRar(); |
208 | } else if (fileType.equals(FileTools.FORMAT_ZIP)) { |
209 | archiveContents = listZip(); |
210 | } else if (fileType.equals(FileTools.FORMAT_PDF)) { |
211 | archiveContents = listPdf(); |
212 | } else { |
213 | assert false : "fileType = " + fileType; |
214 | archiveContents = new ArrayList(); |
215 | } |
216 | result = (String[]) archiveContents.toArray(new String[0]); |
217 | return result; |
218 | } |
219 | |
220 | /** |
221 | * Attempt to remove all files that could have been extracted from the archive to <code>targetDir</code> |
222 | * . If a file cannot be removed, log a warning and continue. |
223 | */ |
224 | public void removeExtracted(File targetDir) |
225 | throws IOException { |
226 | assert targetDir != null; |
227 | // TODO: Remove need to re-read the file list from the archive. |
228 | String[] files = list(); |
229 | |
230 | for (int i = 0; i < files.length; i += 1) { |
231 | File file = new File(targetDir, files[i]); |
232 | |
233 | if (!file.delete()) { |
234 | logger.warn("cannot delete \"" + file + "\""); |
235 | } else { |
236 | if (logger.isDebugEnabled()) { |
237 | logger.debug("deleted: \"" + file + "\""); |
238 | } |
239 | } |
240 | |
241 | File dir = file.getParentFile(); |
242 | |
243 | while (!dir.equals(targetDir) && dir.delete()) { |
244 | if (logger.isDebugEnabled()) { |
245 | logger.debug("deleted: \"" + dir + "\""); |
246 | } |
247 | dir = dir.getParentFile(); |
248 | } |
249 | } |
250 | if (targetDir.delete()) { |
251 | if (logger.isDebugEnabled()) { |
252 | logger.debug("deleted: \"" + targetDir + "\""); |
253 | } |
254 | } |
255 | } |
256 | |
257 | private void advanceProgress(ProgressFrame progressFrame, long progressIncrement) { |
258 | assert (progressFrame == null) || (progressIncrement > 0); |
259 | |
260 | if (progressFrame != null) { |
261 | progress += progressIncrement; |
262 | if (logger.isDebugEnabled()) { |
263 | logger.debug("progress = " + progress); |
264 | } |
265 | progressFrame.setProgress(progress); |
266 | } else { |
267 | if (logger.isDebugEnabled()) { |
268 | logger.debug("no progress"); |
269 | } |
270 | } |
271 | } |
272 | |
273 | private void extractPdf( |
274 | File targetDir, |
275 | String[] imageNames, |
276 | ProgressFrame progressFrame, |
277 | long progressIncrement) |
278 | throws IOException { |
279 | assert targetDir != null; |
280 | assert imageNames.length > 0; |
281 | PDDocument pdf = getPdfDocument(); |
282 | |
283 | try { |
284 | List pages = pdf.getDocumentCatalog().getAllPages(); |
285 | int imageIndex = 0; |
286 | Iterator pageRider = pages.iterator(); |
287 | |
288 | progress = 0; |
289 | while (pageRider.hasNext()) { |
290 | PDPage page = (PDPage) pageRider.next(); |
291 | PDResources resources = page.getResources(); |
292 | Map images = resources.getImages(); |
293 | |
294 | if (images != null) { |
295 | Iterator imageRider = images.values().iterator(); |
296 | |
297 | while (imageRider.hasNext()) { |
298 | PDXObjectImage image = (PDXObjectImage) imageRider.next(); |
299 | File imageFile = new File(targetDir, imageNames[imageIndex]); |
300 | String name = imageFile.getAbsolutePath(); |
301 | |
302 | if (logger.isDebugEnabled()) { |
303 | logger.debug("extracting image: " + StringTools.instance().sourced(name)); |
304 | } |
305 | image.write2file(fileTools.getWithoutLastSuffix(name)); |
306 | advanceProgress(progressFrame, progressIncrement); |
307 | imageIndex += 1; |
308 | } |
309 | } |
310 | } |
311 | } finally { |
312 | pdf.close(); |
313 | } |
314 | } |
315 | |
316 | //@ requires unrarCommand != null; |
317 | private void extractRar( |
318 | File targetDir, |
319 | String[] fileNamesToExtract, |
320 | ProgressFrame progressFrame, |
321 | long progressIncrement) |
322 | throws IOException { |
323 | Archive rarArchive = null; |
324 | |
325 | try { |
326 | rarArchive = new Archive(archiveFile); |
327 | try { |
328 | Set fileNamesToExtractSet = new HashSet(Arrays.asList(fileNamesToExtract)); |
329 | FileHeader fileHeader = rarArchive.nextFileHeader(); |
330 | |
331 | progress = 0; |
332 | |
333 | while (fileHeader != null) { |
334 | if (!fileHeader.isDirectory()) { |
335 | String filePathToExtract = getFile(fileHeader).getPath(); |
336 | |
337 | if (fileNamesToExtractSet.contains(filePathToExtract)) { |
338 | advanceProgress(progressFrame, progressIncrement); |
339 | |
340 | File targetFile = new File(targetDir, filePathToExtract); |
341 | File targetParent = targetFile.getParentFile(); |
342 | |
343 | fileTools.mkdirs(targetParent); |
344 | if (logger.isInfoEnabled()) { |
345 | logger.info("extract \"" + filePathToExtract + "\" to \"" + targetFile.getAbsolutePath() |
346 | + "\""); |
347 | } |
348 | OutputStream out = new BufferedOutputStream(new FileOutputStream(targetFile), BUFFER_SIZE); |
349 | |
350 | try { |
351 | ifCanceledThrowException(progressFrame); |
352 | rarArchive.extractFile(fileHeader, out); |
353 | } finally { |
354 | out.close(); |
355 | } |
356 | } else { |
357 | if (logger.isDebugEnabled()) { |
358 | logger.debug("ignore \"" + filePathToExtract + "\""); |
359 | } |
360 | } |
361 | } |
362 | fileHeader = rarArchive.nextFileHeader(); |
363 | } |
364 | } finally { |
365 | if (rarArchive != null) { |
366 | rarArchive.close(); |
367 | } |
368 | } |
369 | } catch (RarException error) { |
370 | // Just change the RarException to an IOException. It is unlikely that the stack trace |
371 | // or error message are of any use. |
372 | throw new IOException(error.getMessage()); |
373 | } |
374 | } |
375 | |
376 | private void extractZip(File targetDir, String[] imageNames, ProgressFrame progressFrame, |
377 | long progressIncrement) |
378 | throws IOException { |
379 | assert targetDir != null; |
380 | assert imageNames.length > 0; |
381 | |
382 | ZipFile zipFile = new ZipFile(archiveFile); |
383 | |
384 | try { |
385 | Enumeration zipEntries = zipFile.getEntries(); |
386 | // Build a dictionary to quickly lookup image names for plain |
387 | // existence |
388 | String[] imageLookupMap = (String[]) imageNames.clone(); |
389 | |
390 | Arrays.sort(imageLookupMap); |
391 | |
392 | progress = 0; |
393 | while (zipEntries.hasMoreElements()) { |
394 | ZipEntry entry = (ZipEntry) zipEntries.nextElement(); |
395 | String name = entry.getName(); |
396 | |
397 | if (!entry.isDirectory() && (Arrays.binarySearch(imageLookupMap, name) >= 0)) { |
398 | advanceProgress(progressFrame, progressIncrement); |
399 | |
400 | File targetFile = new File(targetDir, name); |
401 | File targetParent = targetFile.getParentFile(); |
402 | |
403 | fileTools.mkdirs(targetParent); |
404 | if (logger.isInfoEnabled()) { |
405 | logger.info("extract \"" + name + "\" to \"" + targetFile.getAbsolutePath() |
406 | + "\""); |
407 | } |
408 | InputStream in = zipFile.getInputStream(entry); |
409 | |
410 | if (in == null) { |
411 | // At least turn Sun bug 4244499 in a clear error |
412 | // message. |
413 | String message = localeTools.getMessage( |
414 | "errors.nonAsciiCharactersMustBeRemoved", name); |
415 | |
416 | throw new IOException(message); |
417 | } |
418 | try { |
419 | BufferedInputStream inBuffered = new BufferedInputStream(in); |
420 | |
421 | try { |
422 | byte[] data = new byte[BUFFER_SIZE]; |
423 | FileOutputStream out = new FileOutputStream(targetFile); |
424 | |
425 | try { |
426 | BufferedOutputStream outBuffered = new BufferedOutputStream(out, BUFFER_SIZE); |
427 | |
428 | try { |
429 | int bytesRead; |
430 | |
431 | while ((bytesRead = inBuffered.read(data, 0, BUFFER_SIZE)) != -1) { |
432 | ifCanceledThrowException(progressFrame); |
433 | outBuffered.write(data, 0, bytesRead); |
434 | } |
435 | } finally { |
436 | outBuffered.close(); |
437 | } |
438 | } finally { |
439 | out.close(); |
440 | } |
441 | } finally { |
442 | inBuffered.close(); |
443 | } |
444 | } finally { |
445 | in.close(); |
446 | } |
447 | } else { |
448 | if (logger.isDebugEnabled()) { |
449 | logger.debug("ignore \"" + name + "\""); |
450 | } |
451 | } |
452 | } |
453 | } finally { |
454 | zipFile.close(); |
455 | } |
456 | } |
457 | |
458 | /** |
459 | * Check if progressFrame has been canceled, and if so, throw a OperationCanceledException. |
460 | * |
461 | * @see OperationCanceledException |
462 | */ |
463 | private void ifCanceledThrowException(ProgressFrame progressFrame) { |
464 | if ((progressFrame != null) && (progressFrame.isCanceled())) { |
465 | if (logger.isDebugEnabled()) { |
466 | logger.debug("throwing OperationCanceledException"); |
467 | } |
468 | throw new OperationCanceledException(); |
469 | } |
470 | } |
471 | |
472 | private List listPdf() |
473 | throws IOException { |
474 | List result = new ArrayList(); |
475 | PDDocument pdf = getPdfDocument(); |
476 | |
477 | try { |
478 | List pages = pdf.getDocumentCatalog().getAllPages(); |
479 | Iterator pageRider = pages.iterator(); |
480 | |
481 | while (pageRider.hasNext()) { |
482 | PDPage page = (PDPage) pageRider.next(); |
483 | PDResources resources = page.getResources(); |
484 | Map images = resources.getImages(); |
485 | |
486 | if (images != null) { |
487 | Iterator imageRider = images.values().iterator(); |
488 | |
489 | while (imageRider.hasNext()) { |
490 | PDXObjectImage image = (PDXObjectImage) imageRider.next(); |
491 | String suffix = image.getSuffix(); |
492 | |
493 | result.add(suffix); |
494 | } |
495 | } |
496 | } |
497 | |
498 | // Assign numeric names to all images, but preserve the original suffix. |
499 | int imageCount = result.size(); |
500 | |
501 | if (imageCount > 0) { |
502 | DecimalFormat format = stringTools.getLeadingZeroFormat(imageCount); |
503 | |
504 | for (int i = 0; i < imageCount; i += 1) { |
505 | String name = format.format(i) + "." + result.get(i); |
506 | |
507 | result.set(i, name); |
508 | } |
509 | } |
510 | } finally { |
511 | pdf.close(); |
512 | } |
513 | return result; |
514 | } |
515 | |
516 | //@ ensures \result.size() > 0; |
517 | private List listRar() |
518 | throws IOException { |
519 | List result = new ArrayList(); |
520 | |
521 | try { |
522 | Archive rarArchive = new Archive(archiveFile); |
523 | |
524 | try { |
525 | FileHeader fileHeader = rarArchive.nextFileHeader(); |
526 | |
527 | while (fileHeader != null) { |
528 | assert fileHeader.isFileHeader(); |
529 | if (!fileHeader.isDirectory()) { |
530 | String filePathToAdd = getFile(fileHeader).getPath(); |
531 | |
532 | result.add(filePathToAdd); |
533 | } |
534 | fileHeader = rarArchive.nextFileHeader(); |
535 | } |
536 | } finally { |
537 | if (rarArchive != null) { |
538 | rarArchive.close(); |
539 | } |
540 | } |
541 | } catch (RarException error) { |
542 | // Just change the RarException to an IOException. It is unlikely that the stack trace |
543 | // or error message are of any use. |
544 | throw new IOException(error.getMessage()); |
545 | } |
546 | return result; |
547 | } |
548 | |
549 | private List listZip() |
550 | throws IOException { |
551 | List result = new ArrayList(); |
552 | ZipFile zipFile = new ZipFile(archiveFile); |
553 | boolean foundAtLeastOneEntry = false; |
554 | |
555 | try { |
556 | Enumeration zipEntries = zipFile.getEntries(); |
557 | |
558 | while (zipEntries.hasMoreElements()) { |
559 | foundAtLeastOneEntry = true; |
560 | |
561 | ZipEntry entry = (ZipEntry) zipEntries.nextElement(); |
562 | |
563 | result.add(entry.getName()); |
564 | } |
565 | } finally { |
566 | zipFile.close(); |
567 | } |
568 | if (!foundAtLeastOneEntry) { |
569 | // This is the message java.util.ZipFile would cause in case the header is corrupted, |
570 | // although org.apache.tools.zip.ZipFile just returns no entries without any Exception. |
571 | String message = localeTools.getMessage("errors.cannotParseZipHeaderJzentry0"); |
572 | |
573 | throw new ZipException(message); |
574 | } |
575 | |
576 | return result; |
577 | } |
578 | } |