LZWDecode.java 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. /*
  2. * $Id: LZWDecode.java,v 1.4 2009/02/22 00:45:32 tomoke Exp $
  3. *
  4. * Copyright 2004 Sun Microsystems, Inc., 4150 Network Circle,
  5. * Santa Clara, California 95054, U.S.A. All rights reserved.
  6. *
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * This library is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this library; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. package com.sun.pdfview.decode;
  22. import java.io.ByteArrayOutputStream;
  23. import java.io.IOException;
  24. import net.sf.andpdf.pdfviewer.ByteBuffer;
  25. import com.sun.pdfview.PDFObject;
  26. import com.sun.pdfview.PDFParseException;
  27. /**
  28. * decode an LZW-encoded array of bytes. LZW is a patented algorithm.
  29. *
  30. * <p>Feb 21, 2009 Legal statement on Intellectual Property from Unisys</p><pre>
  31. * <b><u>LZW Patent Information</u></b> (http://www.unisys.com/about__unisys/lzw)
  32. * <u>License Information on GIF and Other LZW-based Technologies
  33. * </u><p>
  34. * <b><i>Unisys U.S. LZW Patent No. 4,558,302 expired on June 20, 2003,
  35. * the counterpart patents in the United Kingdom, France, Germany and
  36. * Italy expired on June 18, 2004, the Japanese counterpart patents
  37. * expired on June 20, 2004 and the counterpart Canadian patent
  38. * expired on July 7, 2004.
  39. * </i></b><p>
  40. * Unisys Corporation holds and has patents pending on a number of
  41. * improvements on the inventions claimed in the above-expired patents.
  42. * Information on these improvement patents and terms under which they
  43. * may be licensed can be obtained by contacting the following:
  44. *<p>
  45. * Unisys Corporation
  46. * Welch Patent Licensing Department
  47. * Mail Stop E8-114
  48. * Unisys Way
  49. * Blue Bell, PA 19424
  50. *<p>
  51. * Via the Internet, send email to Robert.Marley@unisys.com.
  52. *<p>
  53. * Via facsimile, send inquiries to Welch Patent Licensing Department at
  54. * 215-986-3090.
  55. *<p>
  56. * The above is presented for information purposes only, and is subject
  57. * to change by Unisys. Additionally, this information should not be
  58. * considered as legally obligating Unisys in any way with regard to license
  59. * availability, or as to the terms and conditions offered for a license,
  60. * or with regard to the interpretation of any license agreements.
  61. * You should consult with your own legal counsel regarding your
  62. * particular situation.
  63. * </pre></p>
  64. *
  65. * @author Mike Wessler
  66. */
  67. public class LZWDecode {
  68. ByteBuffer buf;
  69. int bytepos;
  70. int bitpos;
  71. byte[] dict[] = new byte[4096][];
  72. int dictlen = 0;
  73. int bitspercode = 9;
  74. static int STOP = 257;
  75. static int CLEARDICT = 256;
  76. /**
  77. * initialize this decoder with an array of encoded bytes
  78. * @param buf the buffer of bytes
  79. */
  80. private LZWDecode(ByteBuffer buf) throws PDFParseException {
  81. for (int i = 0; i < 256; i++) {
  82. dict[i] = new byte[1];
  83. dict[i][0] = (byte) i;
  84. }
  85. dictlen = 258;
  86. bitspercode = 9;
  87. this.buf = buf;
  88. bytepos = 0;
  89. bitpos = 0;
  90. }
  91. /**
  92. * reset the dictionary to the initial 258 entries
  93. */
  94. private void resetDict() {
  95. dictlen = 258;
  96. bitspercode = 9;
  97. }
  98. /**
  99. * get the next code from the input stream
  100. */
  101. private int nextCode() {
  102. int fillbits = bitspercode;
  103. int value = 0;
  104. if (bytepos >= buf.limit() - 1) {
  105. return -1;
  106. }
  107. while (fillbits > 0) {
  108. int nextbits = buf.get(bytepos); // bitsource
  109. int bitsfromhere = 8 - bitpos; // how many bits can we take?
  110. if (bitsfromhere > fillbits) { // don't take more than we need
  111. bitsfromhere = fillbits;
  112. }
  113. value |= ((nextbits >> (8 - bitpos - bitsfromhere)) &
  114. (0xff >> (8 - bitsfromhere))) << (fillbits - bitsfromhere);
  115. fillbits -= bitsfromhere;
  116. bitpos += bitsfromhere;
  117. if (bitpos >= 8) {
  118. bitpos = 0;
  119. bytepos++;
  120. }
  121. }
  122. return value;
  123. }
  124. /**
  125. * decode the array.
  126. * @return the uncompressed byte array
  127. */
  128. private ByteBuffer decode() throws PDFParseException {
  129. // algorithm derived from:
  130. // http://www.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html
  131. // and the PDFReference
  132. int cW = CLEARDICT;
  133. ByteArrayOutputStream baos = new ByteArrayOutputStream();
  134. while (true) {
  135. int pW = cW;
  136. cW = nextCode();
  137. if (cW == -1) {
  138. throw new PDFParseException("Missed the stop code in LZWDecode!");
  139. }
  140. if (cW == STOP) {
  141. break;
  142. } else if (cW == CLEARDICT) {
  143. resetDict();
  144. // pW= -1;
  145. } else if (pW == CLEARDICT) {
  146. baos.write(dict[cW], 0, dict[cW].length);
  147. } else {
  148. if (cW < dictlen) { // it's a code in the dictionary
  149. baos.write(dict[cW], 0, dict[cW].length);
  150. byte[] p = new byte[dict[pW].length + 1];
  151. System.arraycopy(dict[pW], 0, p, 0, dict[pW].length);
  152. p[dict[pW].length] = dict[cW][0];
  153. dict[dictlen++] = p;
  154. } else { // not in the dictionary (should==dictlen)
  155. // if (cW!=dictlen) {
  156. // System.out.println("Got a bouncy code: "+cW+" (dictlen="+dictlen+")");
  157. // }
  158. byte[] p = new byte[dict[pW].length + 1];
  159. System.arraycopy(dict[pW], 0, p, 0, dict[pW].length);
  160. p[dict[pW].length] = p[0];
  161. baos.write(p, 0, p.length);
  162. dict[dictlen++] = p;
  163. }
  164. if (dictlen >= (1 << bitspercode) - 1 && bitspercode < 12) {
  165. bitspercode++;
  166. }
  167. }
  168. }
  169. return ByteBuffer.wrap(baos.toByteArray());
  170. }
  171. /**
  172. * decode an array of LZW-encoded bytes to a byte array.
  173. *
  174. * @param buf the buffer of encoded bytes
  175. * @param params parameters for the decoder (unused)
  176. * @return the decoded uncompressed bytes
  177. */
  178. public static ByteBuffer decode(ByteBuffer buf, PDFObject params)
  179. throws IOException {
  180. // decode the array
  181. LZWDecode me = new LZWDecode(buf);
  182. ByteBuffer outBytes = me.decode();
  183. // undo a predictor algorithm, if any was used
  184. if (params != null && params.getDictionary().containsKey("Predictor")) {
  185. Predictor predictor = Predictor.getPredictor(params);
  186. if (predictor != null) {
  187. outBytes = predictor.unpredict(outBytes);
  188. }
  189. }
  190. return outBytes;
  191. }
  192. }