我想使用Gzip压缩在java中压缩输入流.
假设我们有一个未压缩的输入流(1GB数据..).我希望结果来自源的压缩输入流:
public InputStream getCompressedStream(InputStream unCompressedStream) {
// Not working because it's uncompressing the stream, I want the opposite.
return new GZIPInputStream(unCompressedStream);
}
Run Code Online (Sandbox Code Playgroud)
Mic*_*raz 11
DeflaterInputStream不是你想要的,因为它缺少gzip头文件/预告片并使用稍微不同的压缩.
如果从OutputStream(push)更改为InputStream(pull),则需要执行不同的操作.
GzipOutputStream的作用是:
如果要对InputStreams执行相同操作,则需要包含以下内容的流:
最好的方法是提供3个不同的流并将它们合并为一个.幸运的是,有一个SequenceInputStream可以为您组合流.
这是我的实现加上一个简单的单元测试:
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.util.Enumeration;
import java.util.zip.CRC32;
import java.util.zip.Deflater;
import java.util.zip.DeflaterInputStream;
import java.util.zip.DeflaterOutputStream;
/**
* @author mwyraz
* Wraps an input stream and compresses it's contents. Similiar to DeflateInputStream but adds GZIP-header and trailer
* See GzipOutputStream for details.
* LICENSE: Free to use. Contains some lines from GzipOutputStream, so oracle's license might apply as well!
*/
public class GzipCompressingInputStream extends SequenceInputStream
{
public GzipCompressingInputStream(InputStream in) throws IOException
{
this(in,512);
}
public GzipCompressingInputStream(InputStream in, int bufferSize) throws IOException
{
super(new StatefullGzipStreamEnumerator(in,bufferSize));
}
static enum StreamState
{
HEADER,
CONTENT,
TRAILER
}
protected static class StatefullGzipStreamEnumerator implements Enumeration<InputStream>
{
protected final InputStream in;
protected final int bufferSize;
protected StreamState state;
public StatefullGzipStreamEnumerator(InputStream in, int bufferSize)
{
this.in=in;
this.bufferSize=bufferSize;
state=StreamState.HEADER;
}
public boolean hasMoreElements()
{
return state!=null;
}
public InputStream nextElement()
{
switch (state)
{
case HEADER:
state=StreamState.CONTENT;
return createHeaderStream();
case CONTENT:
state=StreamState.TRAILER;
return createContentStream();
case TRAILER:
state=null;
return createTrailerStream();
}
return null;
}
static final int GZIP_MAGIC = 0x8b1f;
static final byte[] GZIP_HEADER=new byte[] {
(byte) GZIP_MAGIC, // Magic number (short)
(byte)(GZIP_MAGIC >> 8), // Magic number (short)
Deflater.DEFLATED, // Compression method (CM)
0, // Flags (FLG)
0, // Modification time MTIME (int)
0, // Modification time MTIME (int)
0, // Modification time MTIME (int)
0, // Modification time MTIME (int)
0, // Extra flags (XFLG)
0 // Operating system (OS)
};
protected InputStream createHeaderStream()
{
return new ByteArrayInputStream(GZIP_HEADER);
}
protected InternalGzipCompressingInputStream contentStream;
protected InputStream createContentStream()
{
contentStream=new InternalGzipCompressingInputStream(new CRC32InputStream(in), bufferSize);
return contentStream;
}
protected InputStream createTrailerStream()
{
return new ByteArrayInputStream(contentStream.createTrailer());
}
}
/**
* Internal stream without header/trailer
*/
protected static class CRC32InputStream extends FilterInputStream
{
protected CRC32 crc = new CRC32();
protected long byteCount;
public CRC32InputStream(InputStream in)
{
super(in);
}
@Override
public int read() throws IOException
{
int val=super.read();
if (val>=0)
{
crc.update(val);
byteCount++;
}
return val;
}
@Override
public int read(byte[] b, int off, int len) throws IOException
{
len=super.read(b, off, len);
if (len>=0)
{
crc.update(b,off,len);
byteCount+=len;
}
return len;
}
public long getCrcValue()
{
return crc.getValue();
}
public long getByteCount()
{
return byteCount;
}
}
/**
* Internal stream without header/trailer
*/
protected static class InternalGzipCompressingInputStream extends DeflaterInputStream
{
protected final CRC32InputStream crcIn;
public InternalGzipCompressingInputStream(CRC32InputStream in, int bufferSize)
{
super(in, new Deflater(Deflater.DEFAULT_COMPRESSION, true),bufferSize);
crcIn=in;
}
public void close() throws IOException
{
if (in != null)
{
try
{
def.end();
in.close();
}
finally
{
in = null;
}
}
}
protected final static int TRAILER_SIZE = 8;
public byte[] createTrailer()
{
byte[] trailer= new byte[TRAILER_SIZE];
writeTrailer(trailer, 0);
return trailer;
}
/*
* Writes GZIP member trailer to a byte array, starting at a given
* offset.
*/
private void writeTrailer(byte[] buf, int offset)
{
writeInt((int)crcIn.getCrcValue(), buf, offset); // CRC-32 of uncompr. data
writeInt((int)crcIn.getByteCount(), buf, offset + 4); // Number of uncompr. bytes
}
/*
* Writes integer in Intel byte order to a byte array, starting at a
* given offset.
*/
private void writeInt(int i, byte[] buf, int offset)
{
writeShort(i & 0xffff, buf, offset);
writeShort((i >> 16) & 0xffff, buf, offset + 2);
}
/*
* Writes short integer in Intel byte order to a byte array, starting
* at a given offset
*/
private void writeShort(int s, byte[] buf, int offset)
{
buf[offset] = (byte)(s & 0xff);
buf[offset + 1] = (byte)((s >> 8) & 0xff);
}
}
}
Run Code Online (Sandbox Code Playgroud)
import static org.junit.Assert.*;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.zip.CRC32;
import java.util.zip.GZIPInputStream;
import org.junit.Test;
public class TestGzipCompressingInputStream
{
@Test
public void test() throws Exception
{
testCompressor("test1 test2 test3");
testCompressor("1MB binary data",createTestPattern(1024*1024));
for (int i=0;i<4096;i++)
{
testCompressor(i+" bytes of binary data",createTestPattern(i));
}
}
protected byte[] createTestPattern(int size)
{
byte[] data=new byte[size];
byte pattern=0;
for (int i=0;i<size;i++)
{
data[i]=pattern++;
}
return data;
}
protected void testCompressor(String data) throws IOException
{
testCompressor("String: "+data,data.getBytes());
}
protected void testCompressor(String dataInfo, byte[] data) throws IOException
{
InputStream uncompressedIn=new ByteArrayInputStream(data);
InputStream compressedIn=new GzipCompressingInputStream(uncompressedIn);
InputStream uncompressedOut=new GZIPInputStream(compressedIn);
byte[] result=StreamHelper.readBinaryStream(uncompressedOut);
assertTrue("Test failed for: "+dataInfo,Arrays.equals(data,result));
}
}
Run Code Online (Sandbox Code Playgroud)
这是我编写的一个版本,其中没有 CRC/GZIP Magic cookie,因为它委托给 GZIPOutputStream。它的内存效率也很高,因为它只使用足够的内存来缓冲压缩(42MB 文件使用 45k 缓冲区)。性能与压缩到内存相同。
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.zip.GZIPOutputStream;
/**
* Compresses an InputStream in a memory-optimal, on-demand way only compressing enough to fill a buffer.
*
* @author Ben La Monica
*/
public class GZIPCompressingInputStream extends InputStream {
private InputStream in;
private GZIPOutputStream gz;
private OutputStream delegate;
private byte[] buf = new byte[8192];
private byte[] readBuf = new byte[8192];
int read = 0;
int write = 0;
public GZIPCompressingInputStream(InputStream in) throws IOException {
this.in = in;
this.delegate = new OutputStream() {
private void growBufferIfNeeded(int len) {
if ((write + len) >= buf.length) {
// grow the array if we don't have enough space to fulfill the incoming data
byte[] newbuf = new byte[(buf.length + len) * 2];
System.arraycopy(buf, 0, newbuf, 0, buf.length);
buf = newbuf;
}
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
growBufferIfNeeded(len);
System.arraycopy(b, off, buf, write, len);
write += len;
}
@Override
public void write(int b) throws IOException {
growBufferIfNeeded(1);
buf[write++] = (byte) b;
}
};
this.gz = new GZIPOutputStream(delegate);
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
compressStream();
int numBytes = Math.min(len, write-read);
if (numBytes > 0) {
System.arraycopy(buf, read, b, off, numBytes);
read += numBytes;
} else if (len > 0) {
// if bytes were requested, but we have none, then we're at the end of the stream
return -1;
}
return numBytes;
}
private void compressStream() throws IOException {
// if the reader has caught up with the writer, then zero the positions out
if (read == write) {
read = 0;
write = 0;
}
while (write == 0) {
// feed the gzip stream data until it spits out a block
int val = in.read(readBuf);
if (val == -1) {
// nothing left to do, we've hit the end of the stream. finalize and break out
gz.close();
break;
} else if (val > 0) {
gz.write(readBuf, 0, val);
}
}
}
@Override
public int read() throws IOException {
compressStream();
if (write == 0) {
// write should not be 0 if we were able to get data from compress stream, must mean we're at the end
return -1;
} else {
// reading a single byte
return buf[read++] & 0xFF;
}
}
}
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
19840 次 |
| 最近记录: |