Source for java.util.zip.Deflater

   1: /* Deflater.java - Compress a data stream
   2:    Copyright (C) 1999, 2000, 2001, 2004, 2005 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: package java.util.zip;
  39: 
  40: /**
  41:  * This is the Deflater class.  The deflater class compresses input
  42:  * with the deflate algorithm described in RFC 1951.  It has several
  43:  * compression levels and three different strategies described below.
  44:  * 
  45:  * This class is <i>not</i> thread safe.  This is inherent in the API, due
  46:  * to the split of deflate and setInput.
  47:  * 
  48:  * @author Jochen Hoenicke
  49:  * @author Tom Tromey
  50:  */
  51: public class Deflater
  52: {
  53:   /**
  54:    * The best and slowest compression level.  This tries to find very
  55:    * long and distant string repetitions.  
  56:    */
  57:   public static final int BEST_COMPRESSION = 9;
  58:   /**
  59:    * The worst but fastest compression level.  
  60:    */
  61:   public static final int BEST_SPEED = 1;
  62:   /**
  63:    * The default compression level.
  64:    */
  65:   public static final int DEFAULT_COMPRESSION = -1;
  66:   /**
  67:    * This level won't compress at all but output uncompressed blocks.
  68:    */
  69:   public static final int NO_COMPRESSION = 0;
  70: 
  71:   /**
  72:    * The default strategy.
  73:    */
  74:   public static final int DEFAULT_STRATEGY = 0;
  75:   /**
  76:    * This strategy will only allow longer string repetitions.  It is
  77:    * useful for random data with a small character set.
  78:    */
  79:   public static final int FILTERED = 1;
  80: 
  81:   /** 
  82:    * This strategy will not look for string repetitions at all.  It
  83:    * only encodes with Huffman trees (which means, that more common
  84:    * characters get a smaller encoding.  
  85:    */
  86:   public static final int HUFFMAN_ONLY = 2;
  87: 
  88:   /**
  89:    * The compression method.  This is the only method supported so far.
  90:    * There is no need to use this constant at all.
  91:    */
  92:   public static final int DEFLATED = 8;
  93: 
  94:   /*
  95:    * The Deflater can do the following state transitions:
  96:    *
  97:    * (1) -> INIT_STATE   ----> INIT_FINISHING_STATE ---.
  98:    *        /  | (2)      (5)                         |
  99:    *       /   v          (5)                         |
 100:    *   (3)| SETDICT_STATE ---> SETDICT_FINISHING_STATE |(3)
 101:    *       \   | (3)                 |        ,-------'
 102:    *        |  |                     | (3)   /
 103:    *        v  v          (5)        v      v
 104:    * (1) -> BUSY_STATE   ----> FINISHING_STATE
 105:    *                                | (6)
 106:    *                                v
 107:    *                           FINISHED_STATE
 108:    *    \_____________________________________/
 109:    *          | (7)
 110:    *          v
 111:    *        CLOSED_STATE
 112:    *
 113:    * (1) If we should produce a header we start in INIT_STATE, otherwise
 114:    *     we start in BUSY_STATE.
 115:    * (2) A dictionary may be set only when we are in INIT_STATE, then
 116:    *     we change the state as indicated.
 117:    * (3) Whether a dictionary is set or not, on the first call of deflate
 118:    *     we change to BUSY_STATE.
 119:    * (4) -- intentionally left blank -- :)
 120:    * (5) FINISHING_STATE is entered, when flush() is called to indicate that
 121:    *     there is no more INPUT.  There are also states indicating, that
 122:    *     the header wasn't written yet.
 123:    * (6) FINISHED_STATE is entered, when everything has been flushed to the
 124:    *     internal pending output buffer.
 125:    * (7) At any time (7)
 126:    * 
 127:    */
 128: 
 129:   private static final int IS_SETDICT              = 0x01;
 130:   private static final int IS_FLUSHING             = 0x04;
 131:   private static final int IS_FINISHING            = 0x08;
 132:   
 133:   private static final int INIT_STATE              = 0x00;
 134:   private static final int SETDICT_STATE           = 0x01;
 135:   private static final int INIT_FINISHING_STATE    = 0x08;
 136:   private static final int SETDICT_FINISHING_STATE = 0x09;
 137:   private static final int BUSY_STATE              = 0x10;
 138:   private static final int FLUSHING_STATE          = 0x14;
 139:   private static final int FINISHING_STATE         = 0x1c;
 140:   private static final int FINISHED_STATE          = 0x1e;
 141:   private static final int CLOSED_STATE            = 0x7f;
 142: 
 143:   /** Compression level. */
 144:   private int level;
 145: 
 146:   /** should we include a header. */
 147:   private boolean noHeader;
 148: 
 149:   /** The current state. */
 150:   private int state;
 151: 
 152:   /** The total bytes of output written. */
 153:   private long totalOut;
 154:  
 155:   /** The pending output. */
 156:   private DeflaterPending pending;
 157: 
 158:   /** The deflater engine. */
 159:   private DeflaterEngine engine;
 160: 
 161:   /**
 162:    * Creates a new deflater with default compression level.
 163:    */
 164:   public Deflater()
 165:   {
 166:     this(DEFAULT_COMPRESSION, false);
 167:   }
 168: 
 169:   /**
 170:    * Creates a new deflater with given compression level.
 171:    * @param lvl the compression level, a value between NO_COMPRESSION
 172:    * and BEST_COMPRESSION, or DEFAULT_COMPRESSION.  
 173:    * @exception IllegalArgumentException if lvl is out of range.
 174:    */
 175:   public Deflater(int lvl)
 176:   {
 177:     this(lvl, false);
 178:   }
 179: 
 180:   /**
 181:    * Creates a new deflater with given compression level.
 182:    * @param lvl the compression level, a value between NO_COMPRESSION
 183:    * and BEST_COMPRESSION.  
 184:    * @param nowrap true, iff we should suppress the deflate header at the
 185:    * beginning and the adler checksum at the end of the output.  This is
 186:    * useful for the GZIP format.
 187:    * @exception IllegalArgumentException if lvl is out of range.
 188:    */
 189:   public Deflater(int lvl, boolean nowrap)
 190:   {
 191:     if (lvl == DEFAULT_COMPRESSION)
 192:       lvl = 6;
 193:     else if (lvl < NO_COMPRESSION || lvl > BEST_COMPRESSION)
 194:       throw new IllegalArgumentException();
 195: 
 196:     pending = new DeflaterPending();
 197:     engine = new DeflaterEngine(pending);
 198:     this.noHeader = nowrap;
 199:     setStrategy(DEFAULT_STRATEGY);
 200:     setLevel(lvl);
 201:     reset();
 202:   }
 203: 
 204:   /** 
 205:    * Resets the deflater.  The deflater acts afterwards as if it was
 206:    * just created with the same compression level and strategy as it
 207:    * had before.  
 208:    */
 209:   public void reset() 
 210:   {
 211:     state = (noHeader ? BUSY_STATE : INIT_STATE);
 212:     totalOut = 0;
 213:     pending.reset();
 214:     engine.reset();
 215:   }
 216:   
 217:   /**
 218:    * Frees all objects allocated by the compressor.  There's no
 219:    * reason to call this, since you can just rely on garbage
 220:    * collection.  Exists only for compatibility against Sun's JDK,
 221:    * where the compressor allocates native memory.
 222:    * If you call any method (even reset) afterwards the behaviour is
 223:    * <i>undefined</i>.  
 224:    */
 225:   public void end()
 226:   {
 227:     engine = null;
 228:     pending = null;
 229:     state = CLOSED_STATE;
 230:   }
 231: 
 232:   /** 
 233:    * Gets the current adler checksum of the data that was processed so
 234:    * far.
 235:    */
 236:   public int getAdler()
 237:   {
 238:     return engine.getAdler();
 239:   }
 240: 
 241:   /** 
 242:    * Gets the number of input bytes processed so far.
 243:    */
 244:   public int getTotalIn()
 245:   {
 246:     return (int) engine.getTotalIn();
 247:   }
 248: 
 249:   /** 
 250:    * Gets the number of input bytes processed so far.
 251:    * @since 1.5
 252:    */
 253:   public long getBytesRead()
 254:   {
 255:     return engine.getTotalIn();
 256:   }
 257: 
 258:   /** 
 259:    * Gets the number of output bytes so far.
 260:    */
 261:   public int getTotalOut()
 262:   {
 263:     return (int) totalOut;
 264:   }
 265: 
 266:   /** 
 267:    * Gets the number of output bytes so far.
 268:    * @since 1.5
 269:    */
 270:   public long getBytesWritten()
 271:   {
 272:     return totalOut;
 273:   }
 274: 
 275:   /** 
 276:    * Finalizes this object.
 277:    */
 278:   protected void finalize()
 279:   {
 280:     /* Exists solely for compatibility.  We don't have any native state. */
 281:   }
 282: 
 283:   /** 
 284:    * Flushes the current input block.  Further calls to deflate() will
 285:    * produce enough output to inflate everything in the current input
 286:    * block.  This is not part of Sun's JDK so I have made it package
 287:    * private.  It is used by DeflaterOutputStream to implement
 288:    * flush().
 289:    */
 290:   void flush() {
 291:     state |= IS_FLUSHING;
 292:   }
 293: 
 294:   /** 
 295:    * Finishes the deflater with the current input block.  It is an error
 296:    * to give more input after this method was called.  This method must
 297:    * be called to force all bytes to be flushed.
 298:    */
 299:   public void finish() {
 300:     state |= IS_FLUSHING | IS_FINISHING;
 301:   }
 302: 
 303:   /** 
 304:    * Returns true iff the stream was finished and no more output bytes
 305:    * are available.
 306:    */
 307:   public boolean finished()
 308:   {
 309:     return state == FINISHED_STATE && pending.isFlushed();
 310:   }
 311: 
 312:   /**
 313:    * Returns true, if the input buffer is empty.
 314:    * You should then call setInput(). <br>
 315:    *
 316:    * <em>NOTE</em>: This method can also return true when the stream
 317:    * was finished.  
 318:    */
 319:   public boolean needsInput()
 320:   {
 321:     return engine.needsInput();
 322:   }
 323: 
 324:   /**
 325:    * Sets the data which should be compressed next.  This should be only
 326:    * called when needsInput indicates that more input is needed.
 327:    * If you call setInput when needsInput() returns false, the
 328:    * previous input that is still pending will be thrown away.
 329:    * The given byte array should not be changed, before needsInput() returns
 330:    * true again.
 331:    * This call is equivalent to <code>setInput(input, 0, input.length)</code>.
 332:    * @param input the buffer containing the input data.
 333:    * @exception IllegalStateException if the buffer was finished() or ended().
 334:    */
 335:   public void setInput(byte[] input)
 336:   {
 337:     setInput(input, 0, input.length);
 338:   }
 339: 
 340:   /**
 341:    * Sets the data which should be compressed next.  This should be
 342:    * only called when needsInput indicates that more input is needed.
 343:    * The given byte array should not be changed, before needsInput() returns
 344:    * true again.
 345:    * @param input the buffer containing the input data.
 346:    * @param off the start of the data.
 347:    * @param len the length of the data.  
 348:    * @exception IllegalStateException if the buffer was finished() or ended()
 349:    * or if previous input is still pending.
 350:    */
 351:   public void setInput(byte[] input, int off, int len)
 352:   {
 353:     if ((state & IS_FINISHING) != 0)
 354:       throw new IllegalStateException("finish()/end() already called");
 355:     engine.setInput(input, off, len);
 356:   }
 357: 
 358:   /** 
 359:    * Sets the compression level.  There is no guarantee of the exact
 360:    * position of the change, but if you call this when needsInput is
 361:    * true the change of compression level will occur somewhere near
 362:    * before the end of the so far given input.  
 363:    * @param lvl the new compression level.
 364:    */
 365:   public void setLevel(int lvl)
 366:   {
 367:     if (lvl == DEFAULT_COMPRESSION)
 368:       lvl = 6;
 369:     else if (lvl < NO_COMPRESSION || lvl > BEST_COMPRESSION)
 370:       throw new IllegalArgumentException();
 371: 
 372: 
 373:     if (level != lvl)
 374:       {
 375:     level = lvl;
 376:     engine.setLevel(lvl);
 377:       }
 378:   }
 379: 
 380:   /** 
 381:    * Sets the compression strategy. Strategy is one of
 382:    * DEFAULT_STRATEGY, HUFFMAN_ONLY and FILTERED.  For the exact
 383:    * position where the strategy is changed, the same as for
 384:    * setLevel() applies.
 385:    * @param stgy the new compression strategy.
 386:    */
 387:   public void setStrategy(int stgy)
 388:   {
 389:     if (stgy != DEFAULT_STRATEGY && stgy != FILTERED
 390:     && stgy != HUFFMAN_ONLY)
 391:       throw new IllegalArgumentException();
 392:     engine.setStrategy(stgy);
 393:   }
 394: 
 395:   /**
 396:    * Deflates the current input block to the given array.  It returns 
 397:    * the number of bytes compressed, or 0 if either 
 398:    * needsInput() or finished() returns true or length is zero.
 399:    * @param output the buffer where to write the compressed data.
 400:    */
 401:   public int deflate(byte[] output)
 402:   {
 403:     return deflate(output, 0, output.length);
 404:   }
 405: 
 406:   /**
 407:    * Deflates the current input block to the given array.  It returns 
 408:    * the number of bytes compressed, or 0 if either 
 409:    * needsInput() or finished() returns true or length is zero.
 410:    * @param output the buffer where to write the compressed data.
 411:    * @param offset the offset into the output array.
 412:    * @param length the maximum number of bytes that may be written.
 413:    * @exception IllegalStateException if end() was called.
 414:    * @exception IndexOutOfBoundsException if offset and/or length
 415:    * don't match the array length.  
 416:    */
 417:   public int deflate(byte[] output, int offset, int length)
 418:   {
 419:     int origLength = length;
 420: 
 421:     if (state == CLOSED_STATE)
 422:       throw new IllegalStateException("Deflater closed");
 423: 
 424:     if (state < BUSY_STATE)
 425:       {
 426:     /* output header */
 427:     int header = (DEFLATED + 
 428:               ((DeflaterConstants.MAX_WBITS - 8) << 4)) << 8;
 429:     int level_flags = (level - 1) >> 1;
 430:     if (level_flags < 0 || level_flags > 3) 
 431:       level_flags = 3;
 432:     header |= level_flags << 6;
 433:     if ((state & IS_SETDICT) != 0)
 434:       /* Dictionary was set */
 435:       header |= DeflaterConstants.PRESET_DICT;
 436:     header += 31 - (header % 31);
 437: 
 438:     pending.writeShortMSB(header);
 439:     if ((state & IS_SETDICT) != 0)
 440:       {
 441:         int chksum = engine.getAdler();
 442:         engine.resetAdler();
 443:         pending.writeShortMSB(chksum >> 16);
 444:         pending.writeShortMSB(chksum & 0xffff);
 445:       }
 446: 
 447:     state = BUSY_STATE | (state & (IS_FLUSHING | IS_FINISHING));
 448:       }
 449: 
 450:     for (;;)
 451:       {
 452:     int count = pending.flush(output, offset, length);
 453:     offset += count;
 454:     totalOut += count;
 455:     length -= count;
 456:     if (length == 0 || state == FINISHED_STATE)
 457:       break;
 458: 
 459:     if (!engine.deflate((state & IS_FLUSHING) != 0, 
 460:                 (state & IS_FINISHING) != 0))
 461:       {
 462:         if (state == BUSY_STATE)
 463:           /* We need more input now */
 464:           return origLength - length;
 465:         else if (state == FLUSHING_STATE)
 466:           {
 467:         if (level != NO_COMPRESSION)
 468:           {
 469:             /* We have to supply some lookahead.  8 bit lookahead
 470:              * are needed by the zlib inflater, and we must fill 
 471:              * the next byte, so that all bits are flushed.
 472:              */
 473:             int neededbits = 8 + ((-pending.getBitCount()) & 7);
 474:             while (neededbits > 0)
 475:               {
 476:             /* write a static tree block consisting solely of
 477:              * an EOF:
 478:              */
 479:             pending.writeBits(2, 10);
 480:             neededbits -= 10;
 481:               }
 482:           }
 483:         state = BUSY_STATE;
 484:           }
 485:         else if (state == FINISHING_STATE)
 486:           {
 487:         pending.alignToByte();
 488:         /* We have completed the stream */
 489:         if (!noHeader)
 490:           {
 491:             int adler = engine.getAdler();
 492:             pending.writeShortMSB(adler >> 16);
 493:             pending.writeShortMSB(adler & 0xffff);
 494:           }
 495:         state = FINISHED_STATE;
 496:           }
 497:       }
 498:       }
 499: 
 500:     return origLength - length;
 501:   }
 502: 
 503:   /**
 504:    * Sets the dictionary which should be used in the deflate process.
 505:    * This call is equivalent to <code>setDictionary(dict, 0,
 506:    * dict.length)</code>.  
 507:    * @param dict the dictionary.  
 508:    * @exception IllegalStateException if setInput () or deflate ()
 509:    * were already called or another dictionary was already set.  
 510:    */
 511:   public void setDictionary(byte[] dict)
 512:   {
 513:     setDictionary(dict, 0, dict.length);
 514:   }
 515: 
 516:   /**
 517:    * Sets the dictionary which should be used in the deflate process.
 518:    * The dictionary should be a byte array containing strings that are
 519:    * likely to occur in the data which should be compressed.  The
 520:    * dictionary is not stored in the compressed output, only a
 521:    * checksum.  To decompress the output you need to supply the same
 522:    * dictionary again.
 523:    * @param dict the dictionary.
 524:    * @param offset an offset into the dictionary.
 525:    * @param length the length of the dictionary.
 526:    * @exception IllegalStateException if setInput () or deflate () were
 527:    * already called or another dictionary was already set.
 528:    */
 529:   public void setDictionary(byte[] dict, int offset, int length)
 530:   {
 531:     if (state != INIT_STATE)
 532:       throw new IllegalStateException();
 533: 
 534:     state = SETDICT_STATE;
 535:     engine.setDictionary(dict, offset, length);
 536:   }
 537: }