PDFFile.java 56 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557
  1. /*
  2. * $Id: PDFFile.java,v 1.15 2009/03/12 12:25:25 tomoke Exp $
  3. *
  4. * Copyright 2004 Sun Microsystems, Inc., 4150 Network Circle,
  5. * Santa Clara, California 95054, U.S.A. All rights reserved.
  6. *
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * This library is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this library; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. package com.sun.pdfview;
  22. import java.io.IOException;
  23. import java.util.ArrayList;
  24. import java.util.Collections;
  25. import java.util.HashMap;
  26. import java.util.Iterator;
  27. import java.util.Map;
  28. import java.util.StringTokenizer;
  29. import net.sf.andpdf.pdfviewer.ByteBuffer;
  30. import android.graphics.RectF;
  31. import com.sun.pdfview.action.GoToAction;
  32. import com.sun.pdfview.action.PDFAction;
  33. import com.sun.pdfview.decrypt.EncryptionUnsupportedByPlatformException;
  34. import com.sun.pdfview.decrypt.EncryptionUnsupportedByProductException;
  35. import com.sun.pdfview.decrypt.IdentityDecrypter;
  36. import com.sun.pdfview.decrypt.PDFAuthenticationFailureException;
  37. import com.sun.pdfview.decrypt.PDFDecrypter;
  38. import com.sun.pdfview.decrypt.PDFDecrypterFactory;
  39. import com.sun.pdfview.decrypt.PDFPassword;
  40. import com.sun.pdfview.decrypt.UnsupportedEncryptionException;
  41. /**
  42. * An encapsulation of a .pdf file. The methods of this class
  43. * can parse the contents of a PDF file, but those methods are
  44. * hidden. Instead, the public methods of this class allow
  45. * access to the pages in the PDF file. Typically, you create
  46. * a new PDFFile, ask it for the number of pages, and then
  47. * request one or more PDFPages.
  48. * @author Mike Wessler
  49. */
  50. public class PDFFile {
  51. public final static int NUL_CHAR = 0;
  52. public final static int FF_CHAR = 12;
  53. private String versionString = "1.1";
  54. private int majorVersion = 1;
  55. private int minorVersion = 1;
  56. /** the end of line character */
  57. /** the comment text to begin the file to determine it's version */
  58. private final static String VERSION_COMMENT = "%PDF-";
  59. /**
  60. * A ByteBuffer containing the file data
  61. */
  62. ByteBuffer buf;
  63. /**
  64. * the cross reference table mapping object numbers to locations
  65. * in the PDF file
  66. */
  67. PDFXref[] objIdx;
  68. /** the root PDFObject, as specified in the PDF file */
  69. PDFObject root = null;
  70. /** the Encrypt PDFObject, from the trailer */
  71. PDFObject encrypt = null;
  72. /** The Info PDFPbject, from the trailer, for simple metadata */
  73. PDFObject info = null;
  74. /** a mapping of page numbers to parsed PDF commands */
  75. Cache cache;
  76. /**
  77. * whether the file is printable or not (trailer -> Encrypt -> P & 0x4)
  78. */
  79. private boolean printable = true;
  80. /**
  81. * whether the file is saveable or not (trailer -> Encrypt -> P & 0x10)
  82. */
  83. private boolean saveable = true;
  84. /**
  85. * The default decrypter for streams and strings. By default, no
  86. * encryption is expected, and thus the IdentityDecrypter is used.
  87. */
  88. private PDFDecrypter defaultDecrypter = IdentityDecrypter.getInstance();
  89. /**
  90. * get a PDFFile from a .pdf file. The file must me a random access file
  91. * at the moment. It should really be a file mapping from the nio package.
  92. * <p>
  93. * Use the getPage(...) methods to get a page from the PDF file.
  94. * @param buf the RandomAccessFile containing the PDF.
  95. * @throws IOException if there's a problem reading from the buffer
  96. * @throws PDFParseException if the document appears to be malformed, or
  97. * its features are unsupported. If the file is encrypted in a manner that
  98. * the product or platform does not support then the exception's {@link
  99. * PDFParseException#getCause() cause} will be an instance of {@link
  100. * UnsupportedEncryptionException}.
  101. * @throws PDFAuthenticationFailureException if the file is password
  102. * protected and requires a password
  103. */
  104. public PDFFile(ByteBuffer buf) throws IOException {
  105. this(buf, null);
  106. }
  107. /**
  108. * get a PDFFile from a .pdf file. The file must me a random access file
  109. * at the moment. It should really be a file mapping from the nio package.
  110. * <p>
  111. * Use the getPage(...) methods to get a page from the PDF file.
  112. * @param buf the RandomAccessFile containing the PDF.
  113. * @param password the user or owner password
  114. * @throws IOException if there's a problem reading from the buffer
  115. * @throws PDFParseException if the document appears to be malformed, or
  116. * its features are unsupported. If the file is encrypted in a manner that
  117. * the product or platform does not support then the exception's {@link
  118. * PDFParseException#getCause() cause} will be an instance of {@link
  119. * UnsupportedEncryptionException}.
  120. * @throws PDFAuthenticationFailureException if the file is password
  121. * protected and the supplied password does not decrypt the document
  122. */
  123. public PDFFile(ByteBuffer buf, PDFPassword password) throws IOException {
  124. this.buf = buf;
  125. cache = new Cache();
  126. parseFile(password);
  127. }
  128. /**
  129. * Gets whether the owner of the file has given permission to print
  130. * the file.
  131. * @return true if it is okay to print the file
  132. */
  133. public boolean isPrintable() {
  134. return printable;
  135. }
  136. /**
  137. * Gets whether the owner of the file has given permission to save
  138. * a copy of the file.
  139. * @return true if it is okay to save the file
  140. */
  141. public boolean isSaveable() {
  142. return saveable;
  143. }
  144. /**
  145. * get the root PDFObject of this PDFFile. You generally shouldn't need
  146. * this, but we've left it open in case you want to go spelunking.
  147. */
  148. public PDFObject getRoot() {
  149. return root;
  150. }
  151. /**
  152. * return the number of pages in this PDFFile. The pages will be
  153. * numbered from 1 to getNumPages(), inclusive.
  154. */
  155. public int getNumPages() {
  156. try {
  157. return root.getDictRef("Pages").getDictRef("Count").getIntValue();
  158. } catch (Exception ioe) {
  159. return 0;
  160. }
  161. }
  162. /**
  163. * Get metadata (e.g., Author, Title, Creator) from the Info dictionary
  164. * as a string.
  165. * @param name the name of the metadata key (e.g., Author)
  166. * @return the info
  167. * @throws IOException if the metadata cannot be read
  168. */
  169. public String getStringMetadata(String name)
  170. throws IOException {
  171. if (info != null) {
  172. final PDFObject meta = info.getDictRef(name);
  173. return meta != null ? meta.getTextStringValue() : null;
  174. } else {
  175. return null;
  176. }
  177. }
  178. /**
  179. * Get the keys into the Info metadata, for use with
  180. * {@link #getStringMetadata(String)}
  181. * @return the keys present into the Info dictionary
  182. * @throws IOException if the keys cannot be read
  183. */
  184. public Iterator<String> getMetadataKeys()
  185. throws IOException {
  186. if (info != null) {
  187. return info.getDictKeys();
  188. } else {
  189. return Collections.<String>emptyList().iterator();
  190. }
  191. }
  192. /**
  193. * Used internally to track down PDFObject references. You should never
  194. * need to call this.
  195. * <p>
  196. * Since this is the only public method for tracking down PDF objects,
  197. * it is synchronized. This means that the PDFFile can only hunt down
  198. * one object at a time, preventing the file's location from getting
  199. * messed around.
  200. * <p>
  201. * This call stores the current buffer position before any changes are made
  202. * and restores it afterwards, so callers need not know that the position
  203. * has changed.
  204. *
  205. */
  206. public synchronized PDFObject dereference(PDFXref ref, PDFDecrypter decrypter)
  207. throws IOException {
  208. int id = ref.getID();
  209. // make sure the id is valid and has been read
  210. if (id >= objIdx.length || objIdx[id] == null) {
  211. return PDFObject.nullObj;
  212. }
  213. // check to see if this is already dereferenced
  214. PDFObject obj = objIdx[id].getObject();
  215. if (obj != null) {
  216. return obj;
  217. }
  218. int loc = objIdx[id].getFilePos();
  219. if (loc < 0) {
  220. return PDFObject.nullObj;
  221. }
  222. // store the current position in the buffer
  223. int startPos = buf.position();
  224. // move to where this object is
  225. buf.position(loc);
  226. // read the object and cache the reference
  227. obj= readObject(ref.getID(), ref.getGeneration(), decrypter);
  228. if (obj == null) {
  229. obj = PDFObject.nullObj;
  230. }
  231. objIdx[id].setObject(obj);
  232. // reset to the previous position
  233. buf.position(startPos);
  234. return obj;
  235. }
  236. /**
  237. * Is the argument a white space character according to the PDF spec?.
  238. * ISO Spec 32000-1:2008 - Table 1
  239. */
  240. public static boolean isWhiteSpace(int c) {
  241. switch (c) {
  242. case NUL_CHAR: // Null (NULL)
  243. case '\t': // Horizontal Tab (HT)
  244. case '\n': // Line Feed (LF)
  245. case FF_CHAR: // Form Feed (FF)
  246. case '\r': // Carriage Return (CR)
  247. case ' ': // Space (SP)
  248. return true;
  249. default:
  250. return false;
  251. }
  252. }
  253. /**
  254. * Is the argument a delimiter according to the PDF spec?<p>
  255. *
  256. * ISO 32000-1:2008 - Table 2
  257. *
  258. * @param c the character to test
  259. */
  260. public static boolean isDelimiter(int c) {
  261. switch (c) {
  262. case '(': // LEFT PARENTHESIS
  263. case ')': // RIGHT PARENTHESIS
  264. case '<': // LESS-THAN-SIGN
  265. case '>': // GREATER-THAN-SIGN
  266. case '[': // LEFT SQUARE BRACKET
  267. case ']': // RIGHT SQUARE BRACKET
  268. case '{': // LEFT CURLY BRACKET
  269. case '}': // RIGHT CURLY BRACKET
  270. case '/': // SOLIDUS
  271. case '%': // PERCENT SIGN
  272. return true;
  273. default:
  274. return false;
  275. }
  276. }
  277. /**
  278. * return true if the character is neither a whitespace or a delimiter.
  279. *
  280. * @param c the character to test
  281. * @return boolean
  282. */
  283. public static boolean isRegularCharacter (int c) {
  284. return !(isWhiteSpace(c) || isDelimiter(c));
  285. }
  286. /**
  287. * read the next object from the file
  288. * @param objNum the object number of the object containing the object
  289. * being read; negative only if the object number is unavailable (e.g., if
  290. * reading from the trailer, or reading at the top level, in which
  291. * case we can expect to be reading an object description)
  292. * @param objGen the object generation of the object containing the object
  293. * being read; negative only if the objNum is unavailable
  294. * @param decrypter the decrypter to use
  295. */
  296. private PDFObject readObject(
  297. int objNum, int objGen, PDFDecrypter decrypter) throws IOException {
  298. return readObject(objNum, objGen, false, decrypter);
  299. }
  300. /**
  301. * read the next object with a special catch for numbers
  302. * @param numscan if true, don't bother trying to see if a number is
  303. * an object reference (used when already in the middle of testing for
  304. * an object reference, and not otherwise)
  305. * @param objNum the object number of the object containing the object
  306. * being read; negative only if the object number is unavailable (e.g., if
  307. * reading from the trailer, or reading at the top level, in which
  308. * case we can expect to be reading an object description)
  309. * @param objGen the object generation of the object containing the object
  310. * being read; negative only if the objNum is unavailable
  311. * @param decrypter the decrypter to use
  312. */
  313. private PDFObject readObject(
  314. int objNum, int objGen,
  315. boolean numscan, PDFDecrypter decrypter) throws IOException {
  316. // skip whitespace
  317. int c;
  318. PDFObject obj = null;
  319. while (obj == null) {
  320. while (isWhiteSpace(c = buf.get())) {
  321. }
  322. // check character for special punctuation:
  323. if (c == '<') {
  324. // could be start of <hex data>, or start of <<dictionary>>
  325. c = buf.get();
  326. if (c == '<') {
  327. // it's a dictionary
  328. obj= readDictionary(objNum, objGen, decrypter);
  329. } else {
  330. buf.position(buf.position() - 1);
  331. obj= readHexString(objNum, objGen, decrypter);
  332. }
  333. } else if (c == '(') {
  334. obj= readLiteralString(objNum, objGen, decrypter);
  335. } else if (c == '[') {
  336. // it's an array
  337. obj= readArray(objNum, objGen, decrypter);
  338. } else if (c == '/') {
  339. // it's a name
  340. obj = readName();
  341. } else if (c == '%') {
  342. // it's a comment
  343. readLine();
  344. } else if ((c >= '0' && c <= '9') || c == '-' || c == '+' || c == '.') {
  345. // it's a number
  346. obj = readNumber((char) c);
  347. if (!numscan) {
  348. // It could be the start of a reference.
  349. // Check to see if there's another number, then "R".
  350. //
  351. // We can't use mark/reset, since this could be called
  352. // from dereference, which already is using a mark
  353. int startPos = buf.position();
  354. PDFObject testnum= readObject(-1, -1, true, decrypter);
  355. if (testnum != null &&
  356. testnum.getType() == PDFObject.NUMBER) {
  357. PDFObject testR= readObject(-1, -1, true, decrypter);
  358. if (testR != null &&
  359. testR.getType() == PDFObject.KEYWORD &&
  360. testR.getStringValue().equals("R")) {
  361. // yup. it's a reference.
  362. PDFXref xref = new PDFXref(obj.getIntValue(),
  363. testnum.getIntValue());
  364. // Create a placeholder that will be dereferenced
  365. // as needed
  366. obj = new PDFObject(this, xref);
  367. } else if (testR != null &&
  368. testR.getType() == PDFObject.KEYWORD &&
  369. testR.getStringValue().equals("obj")) {
  370. // it's an object description
  371. obj= readObjectDescription(
  372. obj.getIntValue(),
  373. testnum.getIntValue(),
  374. decrypter);
  375. } else {
  376. buf.position(startPos);
  377. }
  378. } else {
  379. buf.position(startPos);
  380. }
  381. }
  382. } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
  383. // it's a keyword
  384. obj = readKeyword((char) c);
  385. } else {
  386. // it's probably a closing character.
  387. // throwback
  388. buf.position(buf.position() - 1);
  389. break;
  390. }
  391. }
  392. return obj;
  393. }
  394. /**
  395. * requires the next few characters (after whitespace) to match the
  396. * argument.
  397. * @param match the next few characters after any whitespace that
  398. * must be in the file
  399. * @return true if the next characters match; false otherwise.
  400. */
  401. private boolean nextItemIs(String match) throws IOException {
  402. // skip whitespace
  403. int c;
  404. while (isWhiteSpace(c = buf.get())) {
  405. }
  406. for (int i = 0; i < match.length(); i++) {
  407. if (i > 0) {
  408. c = buf.get();
  409. }
  410. if (c != match.charAt(i)) {
  411. return false;
  412. }
  413. }
  414. return true;
  415. }
  416. /**
  417. * process a version string, to determine the major and minor versions
  418. * of the file.
  419. *
  420. * @param versionString
  421. */
  422. private void processVersion(String versionString) {
  423. try {
  424. StringTokenizer tokens = new StringTokenizer(versionString, ".");
  425. majorVersion = Integer.parseInt(tokens.nextToken());
  426. minorVersion = Integer.parseInt(tokens.nextToken());
  427. this.versionString = versionString;
  428. } catch (Exception e) {
  429. // ignore
  430. }
  431. }
  432. /**
  433. * return the major version of the PDF header.
  434. *
  435. * @return int
  436. */
  437. public int getMajorVersion() {
  438. return majorVersion;
  439. }
  440. /**
  441. * return the minor version of the PDF header.
  442. *
  443. * @return int
  444. */
  445. public int getMinorVersion() {
  446. return minorVersion;
  447. }
  448. /**
  449. * return the version string from the PDF header.
  450. *
  451. * @return String
  452. */
  453. public String getVersionString() {
  454. return versionString;
  455. }
  456. /**
  457. * read an entire &lt;&lt; dictionary &gt;&gt;. The initial
  458. * &lt;&lt; has already been read.
  459. * @param objNum the object number of the object containing the dictionary
  460. * being read; negative only if the object number is unavailable, which
  461. * should only happen if we're reading a dictionary placed directly
  462. * in the trailer
  463. * @param objGen the object generation of the object containing the object
  464. * being read; negative only if the objNum is unavailable
  465. * @param decrypter the decrypter to use
  466. * @return the Dictionary as a PDFObject.
  467. */
  468. private PDFObject readDictionary(
  469. int objNum, int objGen, PDFDecrypter decrypter) throws IOException {
  470. HashMap<String,PDFObject> hm = new HashMap<String,PDFObject>();
  471. // we've already read the <<. Now get /Name obj pairs until >>
  472. PDFObject name;
  473. while ((name= readObject(objNum, objGen, decrypter))!=null) {
  474. // make sure first item is a NAME
  475. if (name.getType() != PDFObject.NAME) {
  476. throw new PDFParseException("First item in dictionary must be a /Name. (Was " + name + ")");
  477. }
  478. PDFObject value= readObject(objNum, objGen, decrypter);
  479. if (value != null) {
  480. hm.put(name.getStringValue(), value);
  481. }
  482. }
  483. // System.out.println("End of dictionary at location "+raf.getFilePointer());
  484. if (!nextItemIs(">>")) {
  485. throw new PDFParseException("End of dictionary wasn't '>>'");
  486. }
  487. // System.out.println("Dictionary closed at location "+raf.getFilePointer());
  488. return new PDFObject(this, PDFObject.DICTIONARY, hm);
  489. }
  490. /**
  491. * read a character, and return its value as if it were a hexidecimal
  492. * digit.
  493. * @return a number between 0 and 15 whose value matches the next
  494. * hexidecimal character. Returns -1 if the next character isn't in
  495. * [0-9a-fA-F]
  496. */
  497. private int readHexDigit() throws IOException {
  498. int a;
  499. while (isWhiteSpace(a = buf.get())) {
  500. }
  501. switch (a) {
  502. case '0': case '1': case '2': case '3': case '4':
  503. case '5': case '6': case '7': case '8': case '9':
  504. a -= '0';
  505. break;
  506. case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
  507. a -= 'a' - 10;
  508. break;
  509. case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
  510. a -= 'A' - 10;
  511. break;
  512. default:
  513. a = -1;
  514. break;
  515. }
  516. return a;
  517. }
  518. /**
  519. * return the 8-bit value represented by the next two hex characters.
  520. * If the next two characters don't represent a hex value, return -1
  521. * and reset the read head. If there is only one hex character,
  522. * return its value as if there were an implicit 0 after it.
  523. */
  524. private int readHexPair() throws IOException {
  525. int first = readHexDigit();
  526. if (first < 0) {
  527. buf.position(buf.position() - 1);
  528. return -1;
  529. }
  530. int second = readHexDigit();
  531. if (second < 0) {
  532. buf.position(buf.position() - 1);
  533. return (first << 4);
  534. } else {
  535. return (first << 4) + second;
  536. }
  537. }
  538. /**
  539. * read a < hex string >. The initial < has already been read.
  540. * @param objNum the object number of the object containing the dictionary
  541. * being read; negative only if the object number is unavailable, which
  542. * should only happen if we're reading a string placed directly
  543. * in the trailer
  544. * @param objGen the object generation of the object containing the object
  545. * being read; negative only if the objNum is unavailable
  546. * @param decrypter the decrypter to use
  547. */
  548. private PDFObject readHexString(
  549. int objNum, int objGen, PDFDecrypter decrypter) throws IOException {
  550. // we've already read the <. Now get the hex bytes until >
  551. int val;
  552. StringBuffer sb = new StringBuffer();
  553. while ((val = readHexPair()) >= 0) {
  554. sb.append((char) val);
  555. }
  556. if (buf.get() != '>') {
  557. throw new PDFParseException("Bad character in Hex String");
  558. }
  559. String unicodeString = unicode(sb.toString());
  560. return new PDFObject(this, PDFObject.STRING,
  561. decrypter.decryptString(objNum, objGen, unicodeString));
  562. }
  563. /**
  564. * take a string and determine if it is unicode by looking at the lead
  565. * characters, and that the string must be a multiple of 2 chars long.
  566. * Convert a unicoded string's characters into the true unicode.
  567. *
  568. * @param input
  569. * @return
  570. */
  571. private String unicode(String input) {
  572. // determine if we have unicode, if so, translate it
  573. if (input.length() < 2 || (input.length() % 2) != 0) {
  574. return input;
  575. }
  576. int c0 = input.charAt(0) & 0xFF;
  577. int c1 = input.charAt(1) & 0xFF;
  578. if ((c0 == 0xFE && c1 == 0xFF) ||
  579. (c0 == 0xFF && c1 == 0xFE)) {
  580. // we have unicode
  581. boolean bigEndian = (input.charAt(1) == 0xFFFF);
  582. StringBuffer out = new StringBuffer();
  583. for (int i = 2; i < input.length(); i += 2) {
  584. if (bigEndian) {
  585. out.append((char) (((input.charAt(i + 1) & 0xFF) << 8) +
  586. (input.charAt(i) & 0xFF)));
  587. } else {
  588. out.append((char) (((input.charAt(i) & 0xFF) << 8) +
  589. (input.charAt(i + 1) & 0xFF)));
  590. }
  591. }
  592. return out.toString();
  593. } else {
  594. return input;
  595. }
  596. }
  597. /**
  598. * <p>read a ( character string ). The initial ( has already been read.
  599. * Read until a *balanced* ) appears.</p>
  600. *
  601. * <p>PDF Reference Section 3.8.1, Table 3.31 "PDF Data Types" defines
  602. * String data as:<pre>
  603. * "text string Bytes that represent characters encoded
  604. * using either PDFDocEncoding or UTF-16BE with a
  605. * leading byte-order marker (as defined in
  606. * "Text String Type" on page 158.)
  607. * </pre></p>
  608. *
  609. * <p>Section 5.3.2 defines character sequences and escapes.<br>
  610. * "The strings must conform to the syntax for string objects.
  611. * When a string is written by enclosing the data in parentheses,
  612. * bytes whose values are the same as those of the ASCII characters
  613. * left parenthesis (40), right parenthesis (41), and backslash (92)
  614. * must be preceded by a backslash character. All other byte values
  615. * between 0 and 255 may be used in a string object. <br>
  616. * These rules apply to each individual byte in a string object,
  617. * whether the string is interpreted by the text-showing operators
  618. * as single-byte or multiple-byte character codes."</p>
  619. *
  620. * <p>This only reads 8 bit basic 'strings' so as to avoid a text string
  621. * interpretation when one is not desired (e.g., for byte strings).
  622. * For a text string interpretation of a string, use
  623. * {@link PDFStringUtil#asTextString} ()} or
  624. * {@link PDFObject#getTextStringValue()} </p>
  625. * @param objNum the object number of the object containing the dictionary
  626. * being read; negative only if the object number is unavailable, which
  627. * should only happen if we're reading a dictionary placed directly
  628. * in the trailer
  629. * @param objGen the object generation of the object containing the object
  630. * being read; negative only if the objNum is unavailable
  631. * @param decrypter the decrypter to use
  632. */
  633. private PDFObject readLiteralString(
  634. int objNum, int objGen, PDFDecrypter decrypter) throws IOException {
  635. int c;
  636. // we've already read the (. now get the characters until a
  637. // *balanced* ) appears. Translate \r \n \t \b \f \( \) \\ \ddd
  638. // if a cr/lf follows a backslash, ignore the cr/lf
  639. int parencount = 1;
  640. StringBuffer sb = new StringBuffer();
  641. while (parencount > 0) {
  642. c = buf.get() & 0xFF;
  643. // process unescaped parenthesis
  644. if (c == '(') {
  645. parencount++;
  646. } else if (c == ')') {
  647. parencount--;
  648. if (parencount == 0) {
  649. c = -1;
  650. break;
  651. }
  652. } else if (c == '\\') {
  653. // time to do some work
  654. c = buf.get() & 0xFF;
  655. if (c >= '0' && c <= '9') {
  656. // \ddd form - three OCTAL digits
  657. int count = 0;
  658. int val = 0;
  659. while (c >= '0' && c <= '8' && count < 3) {
  660. val = val * 8 + c - '0';
  661. c = buf.get() & 0xFF;
  662. count++;
  663. }
  664. buf.position(buf.position() - 1);
  665. c = val;
  666. } else if (c == 'r') {
  667. c = '\n'; // translate to 0Ah
  668. } else if (c == 'n') {
  669. c = '\n';
  670. } else if (c == 't') {
  671. c = '\t';
  672. } else if (c == 'b') {
  673. c = '\b';
  674. } else if (c == 'f') {
  675. c = '\f';
  676. } else
  677. // ignore escaped EOL
  678. if (c == '\r') {
  679. // check for following \n
  680. c = buf.get() & 0xFF;
  681. if (c != '\n') {
  682. buf.position(buf.position() - 1);
  683. }
  684. c = -1;
  685. } else if (c == '\n') {
  686. c = -1;
  687. }
  688. }
  689. if (c >= 0) {
  690. sb.append((char) c);
  691. }
  692. }
  693. String unicodeString = unicode(sb.toString());
  694. return new PDFObject(this, PDFObject.STRING,
  695. decrypter.decryptString(objNum, objGen, unicodeString));
  696. }
  697. /**
  698. * Read a line of text. This follows the semantics of readLine() in
  699. * DataInput -- it reads character by character until a '/n' is
  700. * encountered. If a '/r' is encountered, it is discarded.
  701. */
  702. private String readLine() {
  703. StringBuffer sb = new StringBuffer();
  704. while (buf.remaining() > 0) {
  705. char c = (char) buf.get();
  706. if (c == '\r') {
  707. if (buf.remaining() > 0) {
  708. char n = (char) buf.get(buf.position());
  709. if (n == '\n') {
  710. buf.get();
  711. }
  712. }
  713. break;
  714. } else if (c == '\n') {
  715. break;
  716. }
  717. sb.append(c);
  718. }
  719. return sb.toString();
  720. }
  721. /**
  722. * read an [ array ]. The initial [ has already been read. PDFObjects
  723. * are read until ].
  724. * @param objNum the object number of the object containing the dictionary
  725. * being read; negative only if the object number is unavailable, which
  726. * should only happen if we're reading an array placed directly
  727. * in the trailer
  728. * @param objGen the object generation of the object containing the object
  729. * being read; negative only if the objNum is unavailable
  730. * @param decrypter the decrypter to use
  731. */
  732. private PDFObject readArray(
  733. int objNum, int objGen, PDFDecrypter decrypter) throws IOException {
  734. // we've already read the [. Now read objects until ]
  735. ArrayList<PDFObject> ary = new ArrayList<PDFObject>();
  736. PDFObject obj;
  737. while((obj= readObject(objNum, objGen, decrypter))!=null) {
  738. ary.add(obj);
  739. }
  740. if (buf.get() != ']') {
  741. throw new PDFParseException("Array should end with ']'");
  742. }
  743. PDFObject[] objlist = new PDFObject[ary.size()];
  744. for (int i = 0; i < objlist.length; i++) {
  745. objlist[i] = (PDFObject) ary.get(i);
  746. }
  747. return new PDFObject(this, PDFObject.ARRAY, objlist);
  748. }
  749. /**
  750. * read a /name. The / has already been read.
  751. */
  752. private PDFObject readName() throws IOException {
  753. // we've already read the / that begins the name.
  754. // all we have to check for is #hh hex notations.
  755. StringBuffer sb = new StringBuffer();
  756. int c;
  757. while (isRegularCharacter(c = buf.get())) {
  758. if (c < '!' && c > '~') {
  759. break; // out-of-range, should have been hex
  760. }
  761. // H.3.2.4 indicates version 1.1 did not do hex escapes
  762. if (c == '#' && (majorVersion != 1 && minorVersion != 1)) {
  763. int hex = readHexPair();
  764. if (hex >= 0) {
  765. c = hex;
  766. } else {
  767. throw new PDFParseException("Bad #hex in /Name");
  768. }
  769. }
  770. sb.append((char) c);
  771. }
  772. buf.position(buf.position() - 1);
  773. return new PDFObject(this, PDFObject.NAME, sb.toString());
  774. }
  775. /**
  776. * read a number. The initial digit or . or - is passed in as the
  777. * argument.
  778. */
  779. private PDFObject readNumber(char start) throws IOException {
  780. // we've read the first digit (it's passed in as the argument)
  781. boolean neg = start == '-';
  782. boolean sawdot = start == '.';
  783. double dotmult = sawdot ? 0.1 : 1;
  784. double value = (start >= '0' && start <= '9') ? start - '0' : 0;
  785. while (true) {
  786. int c = buf.get();
  787. if (c == '.') {
  788. if (sawdot) {
  789. throw new PDFParseException("Can't have two '.' in a number");
  790. }
  791. sawdot = true;
  792. dotmult = 0.1;
  793. } else if (c >= '0' && c <= '9') {
  794. int val = c - '0';
  795. if (sawdot) {
  796. value += val * dotmult;
  797. dotmult *= 0.1;
  798. } else {
  799. value = value * 10 + val;
  800. }
  801. } else {
  802. buf.position(buf.position() - 1);
  803. break;
  804. }
  805. }
  806. if (neg) {
  807. value = -value;
  808. }
  809. return new PDFObject(this, PDFObject.NUMBER, new Double(value));
  810. }
  811. /**
  812. * read a bare keyword. The initial character is passed in as the
  813. * argument.
  814. */
  815. private PDFObject readKeyword(char start) throws IOException {
  816. // we've read the first character (it's passed in as the argument)
  817. StringBuffer sb = new StringBuffer(String.valueOf(start));
  818. int c;
  819. while (isRegularCharacter(c = buf.get())) {
  820. sb.append((char) c);
  821. }
  822. buf.position(buf.position() - 1);
  823. return new PDFObject(this, PDFObject.KEYWORD, sb.toString());
  824. }
  825. /**
  826. * read an entire PDFObject. The intro line, which looks something
  827. * like "4 0 obj" has already been read.
  828. * @param objNum the object number of the object being read, being
  829. * the first number in the intro line (4 in "4 0 obj")
  830. * @param objGen the object generation of the object being read, being
  831. * the second number in the intro line (0 in "4 0 obj").
  832. * @param decrypter the decrypter to use
  833. */
  834. private PDFObject readObjectDescription(
  835. int objNum, int objGen, PDFDecrypter decrypter) throws IOException {
  836. // we've already read the 4 0 obj bit. Next thing up is the object.
  837. // object descriptions end with the keyword endobj
  838. long debugpos = buf.position();
  839. PDFObject obj= readObject(objNum, objGen, decrypter);
  840. // see if it's a dictionary. If so, this could be a stream.
  841. PDFObject endkey= readObject(objNum, objGen, decrypter);
  842. if (endkey.getType() != PDFObject.KEYWORD) {
  843. throw new PDFParseException("Expected 'stream' or 'endobj'");
  844. }
  845. if (obj.getType() == PDFObject.DICTIONARY && endkey.getStringValue().equals("stream")) {
  846. // skip until we see \n
  847. readLine();
  848. ByteBuffer data = readStream(obj);
  849. if (data == null) {
  850. data = ByteBuffer.allocate(0);
  851. }
  852. obj.setStream(data);
  853. endkey= readObject(objNum, objGen, decrypter);
  854. }
  855. // at this point, obj is the object, keyword should be "endobj"
  856. String endcheck = endkey.getStringValue();
  857. if (endcheck == null || !endcheck.equals("endobj")) {
  858. System.out.println("WARNING: object at " + debugpos + " didn't end with 'endobj'");
  859. //throw new PDFParseException("Object musst end with 'endobj'");
  860. }
  861. obj.setObjectId(objNum, objGen);
  862. return obj;
  863. }
  864. /**
  865. * read the stream portion of a PDFObject. Calls decodeStream to
  866. * un-filter the stream as necessary.
  867. *
  868. * @param dict the dictionary associated with this stream.
  869. * @return a ByteBuffer with the encoded stream data
  870. */
  871. private ByteBuffer readStream(PDFObject dict) throws IOException {
  872. // pointer is at the start of a stream. read the stream and
  873. // decode, based on the entries in the dictionary
  874. PDFObject lengthObj = dict.getDictRef("Length");
  875. int length = -1;
  876. if (lengthObj != null) {
  877. length = lengthObj.getIntValue();
  878. }
  879. if (length < 0) {
  880. throw new PDFParseException("Unknown length for stream");
  881. }
  882. // slice the data
  883. int start = buf.position();
  884. ByteBuffer streamBuf = buf.slice();
  885. streamBuf.limit(length);
  886. // move the current position to the end of the data
  887. buf.position(buf.position() + length);
  888. int ending = buf.position();
  889. if (!nextItemIs("endstream")) {
  890. System.out.println("read " + length + " chars from " + start + " to " +
  891. ending);
  892. throw new PDFParseException("Stream ended inappropriately");
  893. }
  894. return streamBuf;
  895. // now decode stream
  896. // return PDFDecoder.decodeStream(dict, streamBuf);
  897. }
  898. /**
  899. * read the cross reference table from a PDF file. When this method
  900. * is called, the file pointer must point to the start of the word
  901. * "xref" in the file. Reads the xref table and the trailer dictionary.
  902. * If dictionary has a /Prev entry, move file pointer
  903. * and read new trailer
  904. * @param password
  905. */
  906. private void readTrailer(PDFPassword password)
  907. throws
  908. IOException,
  909. PDFAuthenticationFailureException,
  910. EncryptionUnsupportedByProductException,
  911. EncryptionUnsupportedByPlatformException {
  912. // the table of xrefs
  913. objIdx = new PDFXref[50];
  914. PDFDecrypter newDefaultDecrypter = null;
  915. // read a bunch of nested trailer tables
  916. while (true) {
  917. // make sure we are looking at an xref table
  918. if (!nextItemIs("xref")) {
  919. throw new PDFParseException("Expected 'xref' at start of table");
  920. }
  921. // read a bunch of linked tabled
  922. while (true) {
  923. // read until the word "trailer"
  924. PDFObject obj=readObject(-1, -1, IdentityDecrypter.getInstance());
  925. if (obj.getType() == PDFObject.KEYWORD &&
  926. obj.getStringValue().equals("trailer")) {
  927. break;
  928. }
  929. // read the starting position of the reference
  930. if (obj.getType() != PDFObject.NUMBER) {
  931. throw new PDFParseException("Expected number for first xref entry");
  932. }
  933. int refstart = obj.getIntValue();
  934. // read the size of the reference table
  935. obj = readObject(-1, -1, IdentityDecrypter.getInstance());
  936. if (obj.getType() != PDFObject.NUMBER) {
  937. throw new PDFParseException("Expected number for length of xref table");
  938. }
  939. int reflen = obj.getIntValue();
  940. // skip a line
  941. readLine();
  942. // extend the objIdx table, if necessary
  943. if (refstart + reflen >= objIdx.length) {
  944. PDFXref nobjIdx[] = new PDFXref[refstart + reflen];
  945. System.arraycopy(objIdx, 0, nobjIdx, 0, objIdx.length);
  946. objIdx = nobjIdx;
  947. }
  948. // read reference lines
  949. for (int refID = refstart; refID < refstart + reflen; refID++) {
  950. // each reference line is 20 bytes long
  951. byte[] refline = new byte[20];
  952. buf.get(refline);
  953. // ignore this line if the object ID is already defined
  954. if (objIdx[refID] != null) {
  955. continue;
  956. }
  957. // see if it's an active object
  958. if (refline[17] == 'n') {
  959. objIdx[refID] = new PDFXref(refline);
  960. } else {
  961. objIdx[refID] = new PDFXref(null);
  962. }
  963. }
  964. }
  965. // at this point, the "trailer" word (not EOL) has been read.
  966. PDFObject trailerdict = readObject(-1, -1, IdentityDecrypter.getInstance());
  967. if (trailerdict.getType() != PDFObject.DICTIONARY) {
  968. throw new IOException("Expected dictionary after \"trailer\"");
  969. }
  970. // read the root object location
  971. if (root == null) {
  972. root = trailerdict.getDictRef("Root");
  973. if (root != null) {
  974. root.setObjectId(PDFObject.OBJ_NUM_TRAILER,
  975. PDFObject.OBJ_NUM_TRAILER);
  976. }
  977. }
  978. // read the encryption information
  979. if (encrypt == null) {
  980. encrypt = trailerdict.getDictRef("Encrypt");
  981. if (encrypt != null) {
  982. encrypt.setObjectId(PDFObject.OBJ_NUM_TRAILER,
  983. PDFObject.OBJ_NUM_TRAILER);
  984. }
  985. newDefaultDecrypter =
  986. PDFDecrypterFactory.createDecryptor(
  987. encrypt,
  988. trailerdict.getDictRef("ID"),
  989. password);
  990. }
  991. if (info == null) {
  992. info = trailerdict.getDictRef("Info");
  993. if (info != null) {
  994. if (!info.isIndirect()) {
  995. throw new PDFParseException(
  996. "Info in trailer must be an indirect reference");
  997. }
  998. info.setObjectId(PDFObject.OBJ_NUM_TRAILER,
  999. PDFObject.OBJ_NUM_TRAILER);
  1000. }
  1001. }
  1002. // read the location of the previous xref table
  1003. PDFObject prevloc = trailerdict.getDictRef("Prev");
  1004. if (prevloc != null) {
  1005. buf.position(prevloc.getIntValue());
  1006. } else {
  1007. break;
  1008. }
  1009. // see if we have an optional Version entry
  1010. if (root.getDictRef("Version") != null) {
  1011. processVersion(root.getDictRef("Version").getStringValue());
  1012. }
  1013. }
  1014. // make sure we found a root
  1015. if (root == null) {
  1016. throw new PDFParseException("No /Root key found in trailer dictionary");
  1017. }
  1018. // check what permissions are relevant
  1019. if (encrypt != null) {
  1020. PDFObject permissions = encrypt.getDictRef("P");
  1021. if (permissions!=null && !newDefaultDecrypter.isOwnerAuthorised()) {
  1022. int perms= permissions != null ? permissions.getIntValue() : 0;
  1023. if (permissions!=null) {
  1024. printable = (perms & 4) != 0;
  1025. saveable = (perms & 16) != 0;
  1026. }
  1027. }
  1028. // Install the new default decrypter only after the trailer has
  1029. // been read, as nothing we're reading passing through is encrypted
  1030. defaultDecrypter = newDefaultDecrypter;
  1031. }
  1032. // dereference the root object
  1033. root.dereference();
  1034. }
  1035. /**
  1036. * build the PDFFile reference table. Nothing in the PDFFile actually
  1037. * gets parsed, despite the name of this function. Things only get
  1038. * read and parsed when they're needed.
  1039. * @param password
  1040. */
  1041. private void parseFile(PDFPassword password) throws IOException {
  1042. // start at the begining of the file
  1043. buf.rewind();
  1044. String versionLine = readLine();
  1045. if (versionLine.startsWith(VERSION_COMMENT)) {
  1046. processVersion(versionLine.substring(VERSION_COMMENT.length()));
  1047. }
  1048. buf.rewind();
  1049. // back up about 32 characters from the end of the file to find
  1050. // startxref\n
  1051. byte[] scan = new byte[32];
  1052. int scanPos = buf.remaining() - scan.length;
  1053. int loc = 0;
  1054. while (scanPos >= 0) {
  1055. buf.position(scanPos);
  1056. buf.get(scan);
  1057. // find startxref in scan
  1058. String scans = new String(scan);
  1059. loc = scans.indexOf("startxref");
  1060. if (loc > 0) {
  1061. if (scanPos + loc + scan.length <= buf.limit()) {
  1062. scanPos = scanPos + loc;
  1063. loc = 0;
  1064. }
  1065. break;
  1066. }
  1067. scanPos -= scan.length - 10;
  1068. }
  1069. if (scanPos < 0) {
  1070. throw new IOException("This may not be a PDF File");
  1071. }
  1072. buf.position(scanPos);
  1073. buf.get(scan);
  1074. String scans = new String(scan);
  1075. loc += 10; // skip over "startxref" and first EOL char
  1076. if (scans.charAt(loc) < 32) {
  1077. loc++;
  1078. } // skip over possible 2nd EOL char
  1079. while (scans.charAt(loc) == 32) {
  1080. loc++;
  1081. } // skip over possible leading blanks
  1082. // read number
  1083. int numstart = loc;
  1084. while (loc < scans.length() &&
  1085. scans.charAt(loc) >= '0' &&
  1086. scans.charAt(loc) <= '9') {
  1087. loc++;
  1088. }
  1089. int xrefpos = Integer.parseInt(scans.substring(numstart, loc));
  1090. buf.position(xrefpos);
  1091. try {
  1092. readTrailer(password);
  1093. } catch (UnsupportedEncryptionException e) {
  1094. throw new PDFParseException(e.getMessage(), e);
  1095. }
  1096. }
  1097. /**
  1098. * Gets the outline tree as a tree of OutlineNode, which is a subclass
  1099. * of DefaultMutableTreeNode. If there is no outline tree, this method
  1100. * returns null.
  1101. */
  1102. public OutlineNode getOutline() throws IOException {
  1103. // find the outlines entry in the root object
  1104. PDFObject oroot = root.getDictRef("Outlines");
  1105. OutlineNode work = null;
  1106. OutlineNode outline = null;
  1107. if (oroot != null) {
  1108. // find the first child of the outline root
  1109. PDFObject scan = oroot.getDictRef("First");
  1110. outline = work = new OutlineNode("<top>");
  1111. // scan each sibling in turn
  1112. while (scan != null) {
  1113. // add the new node with it's name
  1114. String title = scan.getDictRef("Title").getTextStringValue();
  1115. OutlineNode build = new OutlineNode(title);
  1116. work.add(build);
  1117. // find the action
  1118. PDFAction action = null;
  1119. PDFObject actionObj = scan.getDictRef("A");
  1120. if (actionObj != null) {
  1121. action = PDFAction.getAction(actionObj, getRoot());
  1122. } else {
  1123. // try to create an action from a destination
  1124. PDFObject destObj = scan.getDictRef("Dest");
  1125. if (destObj != null) {
  1126. try {
  1127. PDFDestination dest =
  1128. PDFDestination.getDestination(destObj, getRoot());
  1129. action = new GoToAction(dest);
  1130. } catch (IOException ioe) {
  1131. // oh well
  1132. }
  1133. }
  1134. }
  1135. // did we find an action? If so, add it
  1136. if (action != null) {
  1137. build.setAction(action);
  1138. }
  1139. // find the first child of this node
  1140. PDFObject kid = scan.getDictRef("First");
  1141. if (kid != null) {
  1142. work = build;
  1143. scan = kid;
  1144. } else {
  1145. // no child. Process the next sibling
  1146. PDFObject next = scan.getDictRef("Next");
  1147. while (next == null) {
  1148. scan = scan.getDictRef("Parent");
  1149. next = scan.getDictRef("Next");
  1150. work = (OutlineNode) work.getParent();
  1151. if (work == null) {
  1152. break;
  1153. }
  1154. }
  1155. scan = next;
  1156. }
  1157. }
  1158. }
  1159. return outline;
  1160. }
  1161. /**
  1162. * Gets the page number (starting from 1) of the page represented by
  1163. * a particular PDFObject. The PDFObject must be a Page dictionary or
  1164. * a destination description (or an action).
  1165. * @return a number between 1 and the number of pages indicating the
  1166. * page number, or 0 if the PDFObject is not in the page tree.
  1167. */
  1168. public int getPageNumber(PDFObject page) throws IOException {
  1169. if (page.getType() == PDFObject.ARRAY) {
  1170. page = page.getAt(0);
  1171. }
  1172. // now we've got a page. Make sure.
  1173. PDFObject typeObj = page.getDictRef("Type");
  1174. if (typeObj == null || !typeObj.getStringValue().equals("Page")) {
  1175. return 0;
  1176. }
  1177. int count = 0;
  1178. while (true) {
  1179. PDFObject parent = page.getDictRef("Parent");
  1180. if (parent == null) {
  1181. break;
  1182. }
  1183. PDFObject kids[] = parent.getDictRef("Kids").getArray();
  1184. for (int i = 0; i < kids.length; i++) {
  1185. if (kids[i].equals(page)) {
  1186. break;
  1187. } else {
  1188. PDFObject kcount = kids[i].getDictRef("Count");
  1189. if (kcount != null) {
  1190. count += kcount.getIntValue();
  1191. } else {
  1192. count += 1;
  1193. }
  1194. }
  1195. }
  1196. page = parent;
  1197. }
  1198. return count;
  1199. }
  1200. /**
  1201. * Get the page commands for a given page in a separate thread.
  1202. *
  1203. * @param pagenum the number of the page to get commands for
  1204. */
  1205. public PDFPage getPage(int pagenum) {
  1206. return getPage(pagenum, false);
  1207. }
  1208. /**
  1209. * Get the page commands for a given page.
  1210. *
  1211. * @param pagenum the number of the page to get commands for
  1212. * @param wait if true, do not exit until the page is complete.
  1213. */
  1214. public PDFPage getPage(int pagenum, boolean wait) {
  1215. Integer key = new Integer(pagenum);
  1216. HashMap<String,PDFObject> resources = null;
  1217. PDFObject pageObj = null;
  1218. boolean needread = false;
  1219. PDFPage page = cache.getPage(key);
  1220. PDFParser parser = cache.getPageParser(key);
  1221. if (page == null) {
  1222. try {
  1223. // hunt down the page!
  1224. resources = new HashMap<String,PDFObject>();
  1225. PDFObject topPagesObj = root.getDictRef("Pages");
  1226. pageObj = findPage(topPagesObj, 0, pagenum, resources);
  1227. if (pageObj == null) {
  1228. return null;
  1229. }
  1230. page = createPage(pagenum, pageObj);
  1231. byte[] stream = getContents(pageObj);
  1232. parser = new PDFParser(page, stream, resources);
  1233. cache.addPage(key, page, parser);
  1234. } catch (IOException ioe) {
  1235. System.out.println("GetPage inner loop:");
  1236. ioe.printStackTrace();
  1237. return null;
  1238. }
  1239. }
  1240. if (parser != null && !parser.isFinished()) {
  1241. parser.go(wait);
  1242. }
  1243. return page;
  1244. }
  1245. /**
  1246. * Stop the rendering of a particular image on this page
  1247. */
  1248. public void stop(int pageNum) {
  1249. PDFParser parser = cache.getPageParser(new Integer(pageNum));
  1250. if (parser != null) {
  1251. // stop it
  1252. parser.stop();
  1253. }
  1254. }
  1255. /**
  1256. * get the stream representing the content of a particular page.
  1257. *
  1258. * @param pageObj the page object to get the contents of
  1259. * @return a concatenation of any content streams for the requested
  1260. * page.
  1261. */
  1262. private byte[] getContents(PDFObject pageObj) throws IOException {
  1263. // concatenate all the streams
  1264. PDFObject contentsObj = pageObj.getDictRef("Contents");
  1265. if (contentsObj == null) {
  1266. throw new IOException("No page contents!");
  1267. }
  1268. PDFObject contents[] = contentsObj.getArray();
  1269. // see if we have only one stream (the easy case)
  1270. if (contents.length == 1) {
  1271. return contents[0].getStream();
  1272. }
  1273. // first get the total length of all the streams
  1274. int len = 0;
  1275. for (int i = 0; i < contents.length; i++) {
  1276. byte[] data = contents[i].getStream();
  1277. if (data == null) {
  1278. throw new PDFParseException("No stream on content " + i +
  1279. ": " + contents[i]);
  1280. }
  1281. len += data.length;
  1282. }
  1283. // now assemble them all into one object
  1284. byte[] stream = new byte[len];
  1285. len = 0;
  1286. for (int i = 0; i < contents.length; i++) {
  1287. byte data[] = contents[i].getStream();
  1288. System.arraycopy(data, 0, stream, len, data.length);
  1289. len += data.length;
  1290. }
  1291. return stream;
  1292. }
  1293. /**
  1294. * Create a PDF Page object by finding the relevant inherited
  1295. * properties
  1296. *
  1297. * @param pageObj the PDF object for the page to be created
  1298. */
  1299. private PDFPage createPage(int pagenum, PDFObject pageObj)
  1300. throws IOException {
  1301. int rotation = 0;
  1302. RectF mediabox = null; // second choice, if no crop
  1303. RectF cropbox = null; // first choice
  1304. PDFObject mediaboxObj = getInheritedValue(pageObj, "MediaBox");
  1305. if (mediaboxObj != null) {
  1306. mediabox = parseRect(mediaboxObj);
  1307. }
  1308. PDFObject cropboxObj = getInheritedValue(pageObj, "CropBox");
  1309. if (cropboxObj != null) {
  1310. cropbox = parseRect(cropboxObj);
  1311. }
  1312. PDFObject rotateObj = getInheritedValue(pageObj, "Rotate");
  1313. if (rotateObj != null) {
  1314. rotation = rotateObj.getIntValue();
  1315. }
  1316. RectF bbox = ((cropbox == null) ? mediabox : cropbox);
  1317. return new PDFPage(pagenum, bbox, rotation, cache);
  1318. }
  1319. /**
  1320. * Get the PDFObject representing the content of a particular page. Note
  1321. * that the number of the page need not have anything to do with the
  1322. * label on that page. If there are two blank pages, and then roman
  1323. * numerals for the page number, then passing in 6 will get page (iv).
  1324. *
  1325. * @param pagedict the top of the pages tree
  1326. * @param start the page number of the first page in this dictionary
  1327. * @param getPage the number of the page to find; NOT the page's label.
  1328. * @param resources a HashMap that will be filled with any resource
  1329. * definitions encountered on the search for the page
  1330. */
  1331. private PDFObject findPage(PDFObject pagedict, int start, int getPage,
  1332. Map<String,PDFObject> resources) throws IOException {
  1333. PDFObject rsrcObj = pagedict.getDictRef("Resources");
  1334. if (rsrcObj != null) {
  1335. resources.putAll(rsrcObj.getDictionary());
  1336. }
  1337. PDFObject typeObj = pagedict.getDictRef("Type");
  1338. if (typeObj != null && typeObj.getStringValue().equals("Page")) {
  1339. // we found our page!
  1340. return pagedict;
  1341. }
  1342. // find the first child for which (start + count) > getPage
  1343. PDFObject kidsObj = pagedict.getDictRef("Kids");
  1344. if (kidsObj != null) {
  1345. PDFObject[] kids = kidsObj.getArray();
  1346. for (int i = 0; i < kids.length; i++) {
  1347. int count = 1;
  1348. // BUG: some PDFs (T1Format.pdf) don't have the Type tag.
  1349. // use the Count tag to indicate a Pages dictionary instead.
  1350. PDFObject countItem = kids[i].getDictRef("Count");
  1351. // if (kids[i].getDictRef("Type").getStringValue().equals("Pages")) {
  1352. if (countItem != null) {
  1353. count = countItem.getIntValue();
  1354. }
  1355. if (start + count >= getPage) {
  1356. return findPage(kids[i], start, getPage, resources);
  1357. }
  1358. start += count;
  1359. }
  1360. }
  1361. return null;
  1362. }
  1363. /**
  1364. * Find a property value in a page that may be inherited. If the value
  1365. * is not defined in the page itself, follow the page's "parent" links
  1366. * until the value is found or the top of the tree is reached.
  1367. *
  1368. * @param pageObj the object representing the page
  1369. * @param propName the name of the property we are looking for
  1370. */
  1371. private PDFObject getInheritedValue(PDFObject pageObj, String propName)
  1372. throws IOException {
  1373. // see if we have the property
  1374. PDFObject propObj = pageObj.getDictRef(propName);
  1375. if (propObj != null) {
  1376. return propObj;
  1377. }
  1378. // recursively see if any of our parent have it
  1379. PDFObject parentObj = pageObj.getDictRef("Parent");
  1380. if (parentObj != null) {
  1381. return getInheritedValue(parentObj, propName);
  1382. }
  1383. // no luck
  1384. return null;
  1385. }
  1386. /**
  1387. * get a Rectangle2D.Float representation for a PDFObject that is an
  1388. * array of four Numbers.
  1389. * @param obj a PDFObject that represents an Array of exactly four
  1390. * Numbers.
  1391. */
  1392. public RectF parseRect(PDFObject obj) throws IOException {
  1393. if (obj.getType() == PDFObject.ARRAY) {
  1394. PDFObject bounds[] = obj.getArray();
  1395. if (bounds.length == 4) {
  1396. return new RectF(bounds[0].getFloatValue(),
  1397. bounds[1].getFloatValue(),
  1398. bounds[2].getFloatValue() - bounds[0].getFloatValue(),
  1399. bounds[3].getFloatValue() - bounds[1].getFloatValue());
  1400. } else {
  1401. throw new PDFParseException("Rectangle definition didn't have 4 elements");
  1402. }
  1403. } else {
  1404. throw new PDFParseException("Rectangle definition not an array");
  1405. }
  1406. }
  1407. /**
  1408. * Get the default decrypter for the document
  1409. * @return the default decrypter; never null, even for documents that
  1410. * aren't encrypted
  1411. */
  1412. public PDFDecrypter getDefaultDecrypter() {
  1413. return defaultDecrypter;
  1414. }
  1415. }