| 1 | // Jomic - a viewer for comic book archives. |
| 2 | // Copyright (C) 2004-2011 Thomas Aglassinger |
| 3 | // |
| 4 | // This program is free software: you can redistribute it and/or modify |
| 5 | // it under the terms of the GNU General Public License as published by |
| 6 | // the Free Software Foundation, either version 3 of the License, or |
| 7 | // (at your option) any later version. |
| 8 | // |
| 9 | // This program is distributed in the hope that it will be useful, |
| 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | // GNU General Public License for more details. |
| 13 | // |
| 14 | // You should have received a copy of the GNU General Public License |
| 15 | // along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 16 | package net.sf.jomic.tools; |
| 17 | |
| 18 | import java.io.BufferedInputStream; |
| 19 | import java.io.BufferedOutputStream; |
| 20 | import java.io.File; |
| 21 | import java.io.FileOutputStream; |
| 22 | import java.io.IOException; |
| 23 | import java.io.InputStream; |
| 24 | import java.io.OutputStream; |
| 25 | import java.text.DecimalFormat; |
| 26 | import java.util.ArrayList; |
| 27 | import java.util.Arrays; |
| 28 | import java.util.Enumeration; |
| 29 | import java.util.HashSet; |
| 30 | import java.util.Iterator; |
| 31 | import java.util.List; |
| 32 | import java.util.Map; |
| 33 | import java.util.Set; |
| 34 | import java.util.zip.ZipException; |
| 35 | |
| 36 | import org.apache.commons.logging.Log; |
| 37 | import org.apache.commons.logging.LogFactory; |
| 38 | import org.apache.pdfbox.pdmodel.PDDocument; |
| 39 | import org.apache.pdfbox.pdmodel.PDPage; |
| 40 | import org.apache.pdfbox.pdmodel.PDResources; |
| 41 | import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage; |
| 42 | import org.apache.tools.zip.ZipEntry; |
| 43 | import org.apache.tools.zip.ZipFile; |
| 44 | |
| 45 | import de.innosystec.unrar.Archive; |
| 46 | import de.innosystec.unrar.exception.RarException; |
| 47 | import de.innosystec.unrar.rarfile.FileHeader; |
| 48 | |
| 49 | /** |
| 50 | * An archive containing files. Currently supported formats are ZIP and RAR.<p> |
| 51 | * |
| 52 | * For hints and implementation details on various archive formats, see for example Marco Schmidt's |
| 53 | * list of <a href="http://www.geocities.com/marcoschmidt.geo/archive-file-formats.html">archive |
| 54 | * file formats and archivers</a> . |
| 55 | * |
| 56 | * @author Thomas Aglassinger |
| 57 | */ |
| 58 | public class FileArchive |
| 59 | { |
| 60 | private static final int BUFFER_SIZE = 4096; |
| 61 | |
| 62 | private File archiveFile; |
| 63 | private String baseName; |
| 64 | private FileTools fileTools; |
| 65 | private String fileType; |
| 66 | private LocaleTools localeTools; |
| 67 | private Log logger; |
| 68 | private long progress; |
| 69 | private StringTools stringTools; |
| 70 | |
| 71 | public FileArchive(File newFile) |
| 72 | throws IOException { |
| 73 | this(); |
| 74 | assert newFile != null; |
| 75 | |
| 76 | archiveFile = newFile; |
| 77 | |
| 78 | String suffix = fileTools.getSuffix(archiveFile); |
| 79 | String archiveName = archiveFile.getName(); |
| 80 | |
| 81 | baseName = archiveName.substring(0, archiveName.length() - suffix.length() - 1); |
| 82 | if (logger.isDebugEnabled()) { |
| 83 | logger.debug("baseName = \"" + baseName + "\""); |
| 84 | } |
| 85 | |
| 86 | fileType = fileTools.obtainComicFormat(archiveFile); |
| 87 | } |
| 88 | |
| 89 | private FileArchive() { |
| 90 | super(); |
| 91 | logger = LogFactory.getLog(FileArchive.class); |
| 92 | fileTools = FileTools.instance(); |
| 93 | localeTools = LocaleTools.instance(); |
| 94 | stringTools = StringTools.instance(); |
| 95 | } |
| 96 | |
| 97 | /** |
| 98 | * Get the plain name of the archive file, without directory and suffix. Example: |
| 99 | * "/Users/me/Comics/blah.cbz" yields "blah". |
| 100 | */ |
| 101 | public String getBaseName() { |
| 102 | return baseName; |
| 103 | } |
| 104 | |
| 105 | /** |
| 106 | * Get the file type of the archive. |
| 107 | * |
| 108 | * @return one of: FileTools.FORMAT_ZIP, FileTools.FORMAT_RAR, FileTools.FORMAT_PDF |
| 109 | */ |
| 110 | public String getFileType() { |
| 111 | return fileType; |
| 112 | } |
| 113 | |
| 114 | /** |
| 115 | * The referenced file as <code>File</code> using a relative path. |
| 116 | */ |
| 117 | private File getFile(FileHeader fileHeader) { |
| 118 | // TODO: Move to FileHeader. |
| 119 | File result; |
| 120 | String path; |
| 121 | |
| 122 | if (fileHeader.isUnicode()) { |
| 123 | path = fileHeader.getFileNameW(); |
| 124 | } else { |
| 125 | path = fileHeader.getFileNameString(); |
| 126 | } |
| 127 | |
| 128 | String[] pathParts = path.split("\\\\"); |
| 129 | |
| 130 | if (pathParts.length == 0) { |
| 131 | result = new File(""); |
| 132 | } else { |
| 133 | result = new File(pathParts[0]); |
| 134 | for (int indexOfPathPartToAdd = 1; indexOfPathPartToAdd < pathParts.length; indexOfPathPartToAdd += 1) { |
| 135 | result = new File(result, pathParts[indexOfPathPartToAdd]); |
| 136 | } |
| 137 | } |
| 138 | assert result != null; |
| 139 | return result; |
| 140 | } |
| 141 | |
| 142 | private PDDocument getPdfDocument() |
| 143 | throws IOException { |
| 144 | PDDocument result = PDDocument.load(archiveFile); |
| 145 | |
| 146 | return result; |
| 147 | } |
| 148 | |
| 149 | /** |
| 150 | * Extract to <code>targetDir</code> all files in <code>fileNamesToExtract</code>. If <code>progressFrame</code> |
| 151 | * is not null, advance its progress bar by <code>progressIncrement</code> for every file that |
| 152 | * is extracted. |
| 153 | */ |
| 154 | public void extract( |
| 155 | File targetDir, |
| 156 | String[] fileNamesToExtract, |
| 157 | ProgressFrame progressFrame, |
| 158 | long progressIncrement) |
| 159 | throws IOException { |
| 160 | assert targetDir != null; |
| 161 | assert fileNamesToExtract != null; |
| 162 | |
| 163 | // TODO: Proportional progress bar depending on archive size in bytes. |
| 164 | if (progressFrame != null) { |
| 165 | String message = localeTools.getMessage("progress.extracting"); |
| 166 | |
| 167 | progressFrame.setNote(message); |
| 168 | } |
| 169 | boolean extracted = false; |
| 170 | |
| 171 | try { |
| 172 | if (fileType.equals(FileTools.FORMAT_PDF)) { |
| 173 | extractPdf(targetDir, fileNamesToExtract, progressFrame, progressIncrement); |
| 174 | } else if (fileType.equals(FileTools.FORMAT_RAR)) { |
| 175 | extractRar(targetDir, fileNamesToExtract, progressFrame, progressIncrement); |
| 176 | } else if (fileType.equals(FileTools.FORMAT_ZIP)) { |
| 177 | extractZip(targetDir, fileNamesToExtract, progressFrame, progressIncrement); |
| 178 | } else { |
| 179 | assert false : "fileType = " + fileType; |
| 180 | } |
| 181 | extracted = true; |
| 182 | } finally { |
| 183 | if (!extracted) { |
| 184 | logger.warn("extracting failed; removing possibly existing files"); |
| 185 | if (progressFrame != null) { |
| 186 | String message = localeTools.getMessage("progress.cleaningUp"); |
| 187 | |
| 188 | progressFrame.setNote(message); |
| 189 | } |
| 190 | removeExtracted(targetDir); |
| 191 | } |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | public void extract(File targetDir, String[] fileNamesToExtract) |
| 196 | throws IOException { |
| 197 | extract(targetDir, fileNamesToExtract, null, -1); |
| 198 | } |
| 199 | |
| 200 | public String[] list() |
| 201 | throws IOException { |
| 202 | String[] result; |
| 203 | List archiveContents; |
| 204 | |
| 205 | // TODO: ProgressFrame.setNote("Examining...");? |
| 206 | if (fileType.equals(FileTools.FORMAT_RAR)) { |
| 207 | archiveContents = listRar(); |
| 208 | } else if (fileType.equals(FileTools.FORMAT_ZIP)) { |
| 209 | archiveContents = listZip(); |
| 210 | } else if (fileType.equals(FileTools.FORMAT_PDF)) { |
| 211 | archiveContents = listPdf(); |
| 212 | } else { |
| 213 | assert false : "fileType = " + fileType; |
| 214 | archiveContents = new ArrayList(); |
| 215 | } |
| 216 | result = (String[]) archiveContents.toArray(new String[0]); |
| 217 | return result; |
| 218 | } |
| 219 | |
| 220 | /** |
| 221 | * Attempt to remove all files that could have been extracted from the archive to <code>targetDir</code> |
| 222 | * . If a file cannot be removed, log a warning and continue. |
| 223 | */ |
| 224 | public void removeExtracted(File targetDir) |
| 225 | throws IOException { |
| 226 | assert targetDir != null; |
| 227 | // TODO: Remove need to re-read the file list from the archive. |
| 228 | String[] files = list(); |
| 229 | |
| 230 | for (int i = 0; i < files.length; i += 1) { |
| 231 | File file = new File(targetDir, files[i]); |
| 232 | |
| 233 | if (!file.delete()) { |
| 234 | logger.warn("cannot delete \"" + file + "\""); |
| 235 | } else { |
| 236 | if (logger.isDebugEnabled()) { |
| 237 | logger.debug("deleted: \"" + file + "\""); |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | File dir = file.getParentFile(); |
| 242 | |
| 243 | while (!dir.equals(targetDir) && dir.delete()) { |
| 244 | if (logger.isDebugEnabled()) { |
| 245 | logger.debug("deleted: \"" + dir + "\""); |
| 246 | } |
| 247 | dir = dir.getParentFile(); |
| 248 | } |
| 249 | } |
| 250 | if (targetDir.delete()) { |
| 251 | if (logger.isDebugEnabled()) { |
| 252 | logger.debug("deleted: \"" + targetDir + "\""); |
| 253 | } |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | private void advanceProgress(ProgressFrame progressFrame, long progressIncrement) { |
| 258 | assert (progressFrame == null) || (progressIncrement > 0); |
| 259 | |
| 260 | if (progressFrame != null) { |
| 261 | progress += progressIncrement; |
| 262 | if (logger.isDebugEnabled()) { |
| 263 | logger.debug("progress = " + progress); |
| 264 | } |
| 265 | progressFrame.setProgress(progress); |
| 266 | } else { |
| 267 | if (logger.isDebugEnabled()) { |
| 268 | logger.debug("no progress"); |
| 269 | } |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | private void extractPdf( |
| 274 | File targetDir, |
| 275 | String[] imageNames, |
| 276 | ProgressFrame progressFrame, |
| 277 | long progressIncrement) |
| 278 | throws IOException { |
| 279 | assert targetDir != null; |
| 280 | assert imageNames.length > 0; |
| 281 | PDDocument pdf = getPdfDocument(); |
| 282 | |
| 283 | try { |
| 284 | List pages = pdf.getDocumentCatalog().getAllPages(); |
| 285 | int imageIndex = 0; |
| 286 | Iterator pageRider = pages.iterator(); |
| 287 | |
| 288 | progress = 0; |
| 289 | while (pageRider.hasNext()) { |
| 290 | PDPage page = (PDPage) pageRider.next(); |
| 291 | PDResources resources = page.getResources(); |
| 292 | Map images = resources.getImages(); |
| 293 | |
| 294 | if (images != null) { |
| 295 | Iterator imageRider = images.values().iterator(); |
| 296 | |
| 297 | while (imageRider.hasNext()) { |
| 298 | PDXObjectImage image = (PDXObjectImage) imageRider.next(); |
| 299 | File imageFile = new File(targetDir, imageNames[imageIndex]); |
| 300 | String name = imageFile.getAbsolutePath(); |
| 301 | |
| 302 | if (logger.isDebugEnabled()) { |
| 303 | logger.debug("extracting image: " + StringTools.instance().sourced(name)); |
| 304 | } |
| 305 | image.write2file(fileTools.getWithoutLastSuffix(name)); |
| 306 | advanceProgress(progressFrame, progressIncrement); |
| 307 | imageIndex += 1; |
| 308 | } |
| 309 | } |
| 310 | } |
| 311 | } finally { |
| 312 | pdf.close(); |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | //@ requires unrarCommand != null; |
| 317 | private void extractRar( |
| 318 | File targetDir, |
| 319 | String[] fileNamesToExtract, |
| 320 | ProgressFrame progressFrame, |
| 321 | long progressIncrement) |
| 322 | throws IOException { |
| 323 | Archive rarArchive = null; |
| 324 | |
| 325 | try { |
| 326 | rarArchive = new Archive(archiveFile); |
| 327 | try { |
| 328 | Set fileNamesToExtractSet = new HashSet(Arrays.asList(fileNamesToExtract)); |
| 329 | FileHeader fileHeader = rarArchive.nextFileHeader(); |
| 330 | |
| 331 | progress = 0; |
| 332 | |
| 333 | while (fileHeader != null) { |
| 334 | if (!fileHeader.isDirectory()) { |
| 335 | String filePathToExtract = getFile(fileHeader).getPath(); |
| 336 | |
| 337 | if (fileNamesToExtractSet.contains(filePathToExtract)) { |
| 338 | advanceProgress(progressFrame, progressIncrement); |
| 339 | |
| 340 | File targetFile = new File(targetDir, filePathToExtract); |
| 341 | File targetParent = targetFile.getParentFile(); |
| 342 | |
| 343 | fileTools.mkdirs(targetParent); |
| 344 | if (logger.isInfoEnabled()) { |
| 345 | logger.info("extract \"" + filePathToExtract + "\" to \"" + targetFile.getAbsolutePath() |
| 346 | + "\""); |
| 347 | } |
| 348 | OutputStream out = new BufferedOutputStream(new FileOutputStream(targetFile), BUFFER_SIZE); |
| 349 | |
| 350 | try { |
| 351 | ifCanceledThrowException(progressFrame); |
| 352 | rarArchive.extractFile(fileHeader, out); |
| 353 | } finally { |
| 354 | out.close(); |
| 355 | } |
| 356 | } else { |
| 357 | if (logger.isDebugEnabled()) { |
| 358 | logger.debug("ignore \"" + filePathToExtract + "\""); |
| 359 | } |
| 360 | } |
| 361 | } |
| 362 | fileHeader = rarArchive.nextFileHeader(); |
| 363 | } |
| 364 | } finally { |
| 365 | if (rarArchive != null) { |
| 366 | rarArchive.close(); |
| 367 | } |
| 368 | } |
| 369 | } catch (RarException error) { |
| 370 | // Just change the RarException to an IOException. It is unlikely that the stack trace |
| 371 | // or error message are of any use. |
| 372 | throw new IOException(error.getMessage()); |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | private void extractZip(File targetDir, String[] imageNames, ProgressFrame progressFrame, |
| 377 | long progressIncrement) |
| 378 | throws IOException { |
| 379 | assert targetDir != null; |
| 380 | assert imageNames.length > 0; |
| 381 | |
| 382 | ZipFile zipFile = new ZipFile(archiveFile); |
| 383 | |
| 384 | try { |
| 385 | Enumeration zipEntries = zipFile.getEntries(); |
| 386 | // Build a dictionary to quickly lookup image names for plain |
| 387 | // existence |
| 388 | String[] imageLookupMap = (String[]) imageNames.clone(); |
| 389 | |
| 390 | Arrays.sort(imageLookupMap); |
| 391 | |
| 392 | progress = 0; |
| 393 | while (zipEntries.hasMoreElements()) { |
| 394 | ZipEntry entry = (ZipEntry) zipEntries.nextElement(); |
| 395 | String name = entry.getName(); |
| 396 | |
| 397 | if (!entry.isDirectory() && (Arrays.binarySearch(imageLookupMap, name) >= 0)) { |
| 398 | advanceProgress(progressFrame, progressIncrement); |
| 399 | |
| 400 | File targetFile = new File(targetDir, name); |
| 401 | File targetParent = targetFile.getParentFile(); |
| 402 | |
| 403 | fileTools.mkdirs(targetParent); |
| 404 | if (logger.isInfoEnabled()) { |
| 405 | logger.info("extract \"" + name + "\" to \"" + targetFile.getAbsolutePath() |
| 406 | + "\""); |
| 407 | } |
| 408 | InputStream in = zipFile.getInputStream(entry); |
| 409 | |
| 410 | if (in == null) { |
| 411 | // At least turn Sun bug 4244499 in a clear error |
| 412 | // message. |
| 413 | String message = localeTools.getMessage( |
| 414 | "errors.nonAsciiCharactersMustBeRemoved", name); |
| 415 | |
| 416 | throw new IOException(message); |
| 417 | } |
| 418 | try { |
| 419 | BufferedInputStream inBuffered = new BufferedInputStream(in); |
| 420 | |
| 421 | try { |
| 422 | byte[] data = new byte[BUFFER_SIZE]; |
| 423 | FileOutputStream out = new FileOutputStream(targetFile); |
| 424 | |
| 425 | try { |
| 426 | BufferedOutputStream outBuffered = new BufferedOutputStream(out, BUFFER_SIZE); |
| 427 | |
| 428 | try { |
| 429 | int bytesRead; |
| 430 | |
| 431 | while ((bytesRead = inBuffered.read(data, 0, BUFFER_SIZE)) != -1) { |
| 432 | ifCanceledThrowException(progressFrame); |
| 433 | outBuffered.write(data, 0, bytesRead); |
| 434 | } |
| 435 | } finally { |
| 436 | outBuffered.close(); |
| 437 | } |
| 438 | } finally { |
| 439 | out.close(); |
| 440 | } |
| 441 | } finally { |
| 442 | inBuffered.close(); |
| 443 | } |
| 444 | } finally { |
| 445 | in.close(); |
| 446 | } |
| 447 | } else { |
| 448 | if (logger.isDebugEnabled()) { |
| 449 | logger.debug("ignore \"" + name + "\""); |
| 450 | } |
| 451 | } |
| 452 | } |
| 453 | } finally { |
| 454 | zipFile.close(); |
| 455 | } |
| 456 | } |
| 457 | |
| 458 | /** |
| 459 | * Check if progressFrame has been canceled, and if so, throw a OperationCanceledException. |
| 460 | * |
| 461 | * @see OperationCanceledException |
| 462 | */ |
| 463 | private void ifCanceledThrowException(ProgressFrame progressFrame) { |
| 464 | if ((progressFrame != null) && (progressFrame.isCanceled())) { |
| 465 | if (logger.isDebugEnabled()) { |
| 466 | logger.debug("throwing OperationCanceledException"); |
| 467 | } |
| 468 | throw new OperationCanceledException(); |
| 469 | } |
| 470 | } |
| 471 | |
| 472 | private List listPdf() |
| 473 | throws IOException { |
| 474 | List result = new ArrayList(); |
| 475 | PDDocument pdf = getPdfDocument(); |
| 476 | |
| 477 | try { |
| 478 | List pages = pdf.getDocumentCatalog().getAllPages(); |
| 479 | Iterator pageRider = pages.iterator(); |
| 480 | |
| 481 | while (pageRider.hasNext()) { |
| 482 | PDPage page = (PDPage) pageRider.next(); |
| 483 | PDResources resources = page.getResources(); |
| 484 | Map images = resources.getImages(); |
| 485 | |
| 486 | if (images != null) { |
| 487 | Iterator imageRider = images.values().iterator(); |
| 488 | |
| 489 | while (imageRider.hasNext()) { |
| 490 | PDXObjectImage image = (PDXObjectImage) imageRider.next(); |
| 491 | String suffix = image.getSuffix(); |
| 492 | |
| 493 | result.add(suffix); |
| 494 | } |
| 495 | } |
| 496 | } |
| 497 | |
| 498 | // Assign numeric names to all images, but preserve the original suffix. |
| 499 | int imageCount = result.size(); |
| 500 | |
| 501 | if (imageCount > 0) { |
| 502 | DecimalFormat format = stringTools.getLeadingZeroFormat(imageCount); |
| 503 | |
| 504 | for (int i = 0; i < imageCount; i += 1) { |
| 505 | String name = format.format(i) + "." + result.get(i); |
| 506 | |
| 507 | result.set(i, name); |
| 508 | } |
| 509 | } |
| 510 | } finally { |
| 511 | pdf.close(); |
| 512 | } |
| 513 | return result; |
| 514 | } |
| 515 | |
| 516 | //@ ensures \result.size() > 0; |
| 517 | private List listRar() |
| 518 | throws IOException { |
| 519 | List result = new ArrayList(); |
| 520 | |
| 521 | try { |
| 522 | Archive rarArchive = new Archive(archiveFile); |
| 523 | |
| 524 | try { |
| 525 | FileHeader fileHeader = rarArchive.nextFileHeader(); |
| 526 | |
| 527 | while (fileHeader != null) { |
| 528 | assert fileHeader.isFileHeader(); |
| 529 | if (!fileHeader.isDirectory()) { |
| 530 | String filePathToAdd = getFile(fileHeader).getPath(); |
| 531 | |
| 532 | result.add(filePathToAdd); |
| 533 | } |
| 534 | fileHeader = rarArchive.nextFileHeader(); |
| 535 | } |
| 536 | } finally { |
| 537 | if (rarArchive != null) { |
| 538 | rarArchive.close(); |
| 539 | } |
| 540 | } |
| 541 | } catch (RarException error) { |
| 542 | // Just change the RarException to an IOException. It is unlikely that the stack trace |
| 543 | // or error message are of any use. |
| 544 | throw new IOException(error.getMessage()); |
| 545 | } |
| 546 | return result; |
| 547 | } |
| 548 | |
| 549 | private List listZip() |
| 550 | throws IOException { |
| 551 | List result = new ArrayList(); |
| 552 | ZipFile zipFile = new ZipFile(archiveFile); |
| 553 | boolean foundAtLeastOneEntry = false; |
| 554 | |
| 555 | try { |
| 556 | Enumeration zipEntries = zipFile.getEntries(); |
| 557 | |
| 558 | while (zipEntries.hasMoreElements()) { |
| 559 | foundAtLeastOneEntry = true; |
| 560 | |
| 561 | ZipEntry entry = (ZipEntry) zipEntries.nextElement(); |
| 562 | |
| 563 | result.add(entry.getName()); |
| 564 | } |
| 565 | } finally { |
| 566 | zipFile.close(); |
| 567 | } |
| 568 | if (!foundAtLeastOneEntry) { |
| 569 | // This is the message java.util.ZipFile would cause in case the header is corrupted, |
| 570 | // although org.apache.tools.zip.ZipFile just returns no entries without any Exception. |
| 571 | String message = localeTools.getMessage("errors.cannotParseZipHeaderJzentry0"); |
| 572 | |
| 573 | throw new ZipException(message); |
| 574 | } |
| 575 | |
| 576 | return result; |
| 577 | } |
| 578 | } |