文字列を圧縮してgzipストリームに圧縮して、効果は悪くありません
package test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
public class testZip {
//
public static byte[] compress(String str) throws IOException {
if (str == null || str.length() == 0) {
return null;
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip = new GZIPOutputStream(out);
gzip.write(str.getBytes("UTF-8"));
gzip.close();
return out.toByteArray();
}
//
public static byte[] uncompress(byte[] str) throws IOException {
if (str == null || str.length == 0) {
return null;
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
ByteArrayInputStream in = new ByteArrayInputStream(str);
GZIPInputStream gunzip = new GZIPInputStream(in);
byte[] buffer = new byte[256];
int n;
while ((n = gunzip.read(buffer)) >= 0) {
out.write(buffer, 0, n);
}
return out.toByteArray();
}
public static void main(String[] args) throws IOException {
StringBuffer bf = new StringBuffer();
bf.append("234235423sdfgsatg43qr4rfsetuyw45t3wfeszdfvm 0394tivq0m234rfqa2,-r0kaw03 5jhtqca9203rjm0,qva9tj0qa3wj445");
String data= bf.toString();
System.out.println(" :" + data);
System.out.println(" :" + data.length());
String outdata = new String(testZip.compress(data));
System.out.println(" :" + outdata);
System.out.println(" :" + outdata.length());
String undata = null;
undata = new String(testZip.uncompress(outdata.getBytes("UTF-8")));
System.out.println(" :" + undata);
System.out.println(" :" + undata.length());
}
}
圧縮はbyte[]のバイトストリームしか得られず、データサイズは重複データに対してずっと小さく、私はテスト時に2000個のMD 5値を使って、6 Kから1以下に圧縮して、効果は悪くありません