X Tutup
Skip to content

Commit ebac02f

Browse files
authored
Update UncompressInputStream.java
Fixed bugs where final bytes of file were not decoded in available() and the EOF condition for mainloop. Added some comments Indented some unindented lines Added method uncompress(InputStream,OutputStream) and called it from main(String[]) and from uncompress(String, FileOutputStream) Rewrote skip method Amended logging code in uncompress(String, FileOutputStream)
1 parent 32255b1 commit ebac02f

File tree

1 file changed

+101
-70
lines changed

1 file changed

+101
-70
lines changed

biojava-core/src/main/java/org/biojava/nbio/core/util/UncompressInputStream.java

Lines changed: 101 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,24 @@
7575
* @author Ronald Tschalar
7676
* @author Unidata Program Center
7777
* @author Richard Holland - making LZW_MAGIC package-visible.
78+
*
79+
* @version 0.3-5 2008/01/19
80+
* @author Fred Hansen (zweibieren@yahoo.com)
81+
* Fixed available() and the EOF condition for mainloop.
82+
* Also added some comments.
83+
*
84+
* @version 1.0 2018/01/08
85+
* @author Fred Hansen (zweibieren@yahoo.com)
86+
* added uncompress(InputStream,OutputStream)
87+
* and called it from main(String[])
88+
* and uncompress(String, FileOutputStream)
89+
* normalize indentation
90+
* rewrite skip method
91+
* amend logging code in uncompress(String, FileOutputStream)
7892
*/
7993
public class UncompressInputStream extends FilterInputStream {
80-
81-
private final static Logger logger = LoggerFactory.getLogger(UncompressInputStream.class);
94+
private final static Logger logger
95+
= LoggerFactory.getLogger(UncompressInputStream.class);
8296

8397
/**
8498
* @param is the input stream to decompress
@@ -90,10 +104,9 @@ public UncompressInputStream(InputStream is) throws IOException {
90104
}
91105

92106

93-
byte[] one = new byte[1];
94-
95107
@Override
96-
public synchronized int read() throws IOException {
108+
public synchronized int read() throws IOException {
109+
byte[] one = new byte[1];
97110
int b = read(one, 0, 1);
98111
if (b == 1)
99112
return (one[0] & 0xff);
@@ -108,7 +121,7 @@ public synchronized int read() throws IOException {
108121

109122
private int[] tab_prefix;
110123
private byte[] tab_suffix;
111-
private int[] zeros = new int[256];
124+
final private int[] zeros = new int[256];
112125
private byte[] stack;
113126

114127
// various state
@@ -123,20 +136,27 @@ public synchronized int read() throws IOException {
123136
private int stackp;
124137
private int free_ent;
125138

126-
// input buffer
127-
private byte[] data = new byte[10000];
128-
private int bit_pos = 0, end = 0, got = 0;
139+
/* input buffer
140+
The input stream must be considered in chunks
141+
Each chunk is of length eight times the current code length.
142+
Thus the chunk contains eight codes; NOT on byte boundaries.
143+
*/
144+
final private byte[] data = new byte[10000];
145+
private int
146+
bit_pos = 0, // current bitwise location in bitstream
147+
end = 0, // index of next byte to fill in data
148+
got = 0; // number of bytes gotten by most recent read()
129149
private boolean eof = false;
130150
private static final int EXTRA = 64;
131151

132152

133153
@Override
134-
public synchronized int read(byte[] buf, int off, int len)
154+
public synchronized int read(byte[] buf, int off, int len)
135155
throws IOException {
136156
if (eof) return -1;
137157
int start = off;
138158

139-
/* Using local copies of various variables speeds things up by as
159+
/* Using local copies of various variables speeds things up by as
140160
* much as 30% !
141161
*/
142162
int[] l_tab_prefix = tab_prefix;
@@ -153,9 +173,7 @@ public synchronized int read(byte[] buf, int off, int len)
153173
byte[] l_data = data;
154174
int l_bit_pos = bit_pos;
155175

156-
157-
// empty stack if stuff still left
158-
176+
// empty stack if stuff still left
159177
int s_size = l_stack.length - l_stackp;
160178
if (s_size > 0) {
161179
int num = (s_size >= len) ? len : s_size;
@@ -170,17 +188,15 @@ public synchronized int read(byte[] buf, int off, int len)
170188
return off - start;
171189
}
172190

173-
174-
// loop, filling local buffer until enough data has been decompressed
175-
191+
// loop, filling local buffer until enough data has been decompressed
176192
main_loop: do {
177193
if (end < EXTRA) fill();
178194

179-
int bit_in = (got > 0) ? (end - end % l_n_bits) << 3 :
180-
(end << 3) - (l_n_bits - 1);
195+
int bit_end = (got > 0)
196+
? (end - end % l_n_bits) << 3 // set to a "chunk" boundary
197+
: (end << 3) - (l_n_bits - 1); // no more data, set to last code
181198

182-
while (l_bit_pos < bit_in) {
183-
// handle 1-byte reads correctly
199+
while (l_bit_pos < bit_end) { // handle 1-byte reads correctly
184200
if (len == 0) {
185201
n_bits = l_n_bits;
186202
maxcode = l_maxcode;
@@ -326,7 +342,10 @@ public synchronized int read(byte[] buf, int off, int len)
326342
}
327343

328344
l_bit_pos = resetbuf(l_bit_pos);
329-
} while (got > 0);
345+
} while
346+
// old code: (got>0) fails if code width expands near EOF
347+
(got > 0 // usually true
348+
|| l_bit_pos < (end << 3) - (l_n_bits - 1)); // last few bytes
330349

331350
n_bits = l_n_bits;
332351
maxcode = l_maxcode;
@@ -346,37 +365,36 @@ public synchronized int read(byte[] buf, int off, int len)
346365
* Moves the unread data in the buffer to the beginning and resets
347366
* the pointers.
348367
*/
349-
private final int resetbuf(int bit_pos) {
368+
private int resetbuf(int bit_pos) {
350369
int pos = bit_pos >> 3;
351370
System.arraycopy(data, pos, data, 0, end - pos);
352371
end -= pos;
353372
return 0;
354373
}
355374

356375

357-
private final void fill() throws IOException {
376+
private void fill() throws IOException {
358377
got = in.read(data, end, data.length - 1 - end);
359378
if (got > 0) end += got;
360379
}
361380

362381

363382
@Override
364-
public synchronized long skip(long num) throws IOException {
365-
byte[] tmp = new byte[(int) num];
366-
int got = read(tmp, 0, (int) num);
367-
368-
if (got > 0)
369-
return got;
370-
else
371-
return 0L;
383+
public synchronized long skip(long num) throws IOException {
384+
return Math.max(0,
385+
read(new byte[(int) num], 0, (int) num));
372386
}
373387

374388

375389
@Override
376-
public synchronized int available() throws IOException {
390+
public synchronized int available() throws IOException {
377391
if (eof) return 0;
378-
379-
return in.available();
392+
// the old code was: return in.available();
393+
// it fails because this.read() can return bytes
394+
// even after in.available() is zero
395+
// -- zweibieren
396+
int avail = in.available();
397+
return (avail == 0) ? 1 : avail;
380398
}
381399

382400

@@ -389,8 +407,7 @@ public synchronized int available() throws IOException {
389407
private static final int HDR_BLOCK_MODE = 0x80;
390408

391409
private void parse_header() throws IOException {
392-
// read in and check magic number
393-
410+
// read in and check magic number
394411
int t = in.read();
395412
if (t < 0) throw new EOFException("Failed to read magic number");
396413
int magic = (t & 0xff) << 8;
@@ -402,9 +419,7 @@ private void parse_header() throws IOException {
402419
"magic number 0x" +
403420
Integer.toHexString(magic) + ")");
404421

405-
406-
// read in header byte
407-
422+
// read in header byte
408423
int header = in.read();
409424
if (header < 0) throw new EOFException("Failed to read header");
410425

@@ -425,9 +440,7 @@ private void parse_header() throws IOException {
425440
logger.debug("block mode: {}", block_mode);
426441
logger.debug("max bits: {}", maxbits);
427442

428-
429-
// initialize stuff
430-
443+
// initialize stuff
431444
maxmaxcode = 1 << maxbits;
432445
n_bits = INIT_BITS;
433446
maxcode = (1 << n_bits) - 1;
@@ -451,59 +464,77 @@ private void parse_header() throws IOException {
451464
* @return false
452465
*/
453466
@Override
454-
public boolean markSupported() {
467+
public boolean markSupported() {
455468
return false;
456469
}
457470

458-
static public void uncompress( String fileInName, FileOutputStream out) throws IOException {
471+
/**
472+
* Read a named file and uncompress it.
473+
* @param fileInName Name of compressed file.
474+
* @param out A destination for the result. It is closed after data is sent.
475+
* @return number of bytes sent to the output stream,
476+
* @throws IOException for any error
477+
*/
478+
public static long uncompress(String fileInName, FileOutputStream out)
479+
throws IOException {
459480
long start = System.currentTimeMillis();
460-
461-
InputStream in = new UncompressInputStream( new FileInputStream(fileInName));
462-
463-
// int total = 0;
464-
byte[] buffer = new byte[100000];
465-
while (true) {
466-
int bytesRead = in.read(buffer);
467-
if (bytesRead == -1) break;
468-
out.write(buffer, 0, bytesRead);
469-
// total += bytesRead;
481+
long total;
482+
try (InputStream fin = new FileInputStream(fileInName)) {
483+
total = uncompress(fin, out);
470484
}
471-
in.close();
472485
out.close();
473486

474487
if (debugTiming) {
475488
long end = System.currentTimeMillis();
476-
// logger.debug("Decompressed " + total + " bytes");
477-
logger.warn("Time: {} seconds", (end - start) / 1000);
489+
logger.info("Decompressed {} bytes", total);
490+
UncompressInputStream.logger.info("Time: {} seconds", (end - start) / 1000);
478491
}
492+
return total;
479493
}
480494

495+
/**
496+
* Read an input stream and uncompress it to an output stream.
497+
* @param in the incoming InputStream. It is NOT closed.
498+
* @param out the destination OutputStream. It is NOT closed.
499+
* @return number of bytes sent to the output stream
500+
* @throws IOException for any error
501+
*/
502+
public static long uncompress(InputStream in, OutputStream out)
503+
throws IOException {
504+
UncompressInputStream ucis = new UncompressInputStream(in);
505+
long total = 0;
506+
byte[] buffer = new byte[100000];
507+
while (true) {
508+
int bytesRead = ucis.read(buffer);
509+
if (bytesRead == -1) break;
510+
out.write(buffer, 0, bytesRead);
511+
total += bytesRead;
512+
}
513+
return total;
514+
}
481515

482516
private static final boolean debugTiming = false;
483517

518+
/**
519+
* Reads a file, uncompresses it, and sends the result to stdout.
520+
* Also writes trivial statistics to stderr.
521+
* @param args An array with one String element, the name of the file to read.
522+
* @throws IOException for any failure
523+
*/
484524
public static void main(String[] args) throws Exception {
485525
if (args.length != 1) {
486526
logger.info("Usage: UncompressInputStream <file>");
487527
System.exit(1);
488528
}
489-
490-
InputStream in =
491-
new UncompressInputStream(new FileInputStream(args[0]));
492-
493-
byte[] buf = new byte[100000];
494-
int tot = 0;
495529
long beg = System.currentTimeMillis();
496530

497-
while (true) {
498-
int got = in.read(buf);
499-
if (got < 0) break;
500-
System.out.write(buf, 0, got);
501-
tot += got;
531+
long tot;
532+
try (InputStream in = new FileInputStream(args[0])) {
533+
tot = uncompress(in, System.out);
502534
}
503535

504536
long end = System.currentTimeMillis();
505537
logger.info("Decompressed {} bytes", tot);
506538
logger.info("Time: {} seconds", (end - beg) / 1000);
507-
in.close();
508539
}
509540
}

0 commit comments

Comments
 (0)
X Tutup