Avroデータシーケンス化
69514 ワード
Apache Avroはプログラミング言語とは独立したデータシーケンス化システムである.HadoopのWritableタイプの不足を解決するために設計されています.言語の移植性が欠けています.Avroモードは通常jsonで書かれ、データは通常バイナリフォーマットで符号化される.
Avroの使用
Avroの使用は、コンパイルSchemaと非コンパイルSchemaの2種類に分けられます
コンパイルSchemaはschema:empを定義する.avsc Schemaをコンパイルし、Javaクラスを生成し、ソースコードをidea にインポートする
コンパイルされたソースは次のとおりです.シリアル化 反直列化
非コンパイルSchema
empをコンパイルする必要はありません.AVscファイル、直接使用シリアル化 反直列化
Avroの使用
Avroの使用は、コンパイルSchemaと非コンパイルSchemaの2種類に分けられます
コンパイルSchema
//json
{
"namespace": "tutorialspoint.com",
"type": "record",
"name": "emp",
"fields": [
{"name": "name", "type": "string"},
{"name": "id", "type": "int"},
{"name": "salary", "type": "int"},
{"name": "age", "type": "int"},
{"name": "address", "type": "string"}
]
}
// .
cmd> java -jar avro-tools-1.8.2.jar compile schema emp.avsc .
// idea
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.8.2</version>
</dependency>
コンパイルされたソースは次のとおりです.
package Tutorialspoint;
import org.apache.avro.specific.SpecificData;
import org.apache.avro.message.BinaryMessageEncoder;
import org.apache.avro.message.BinaryMessageDecoder;
import org.apache.avro.message.SchemaStore;
@SuppressWarnings("all")
@org.apache.avro.specific.AvroGenerated
public class Employee extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
private static final long serialVersionUID = -8873171083721622992L;
public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Employee\",\"namespace\":\"Tutorialspoint\",\"fields\":[{\"name\":\"Name\",\"type\":\"string\"},{\"name\":\"age\",\"type\":\"int\"}]}");
public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
private static SpecificData MODEL$ = new SpecificData();
private static final BinaryMessageEncoder<Employee> ENCODER =
new BinaryMessageEncoder<Employee>(MODEL$, SCHEMA$);
private static final BinaryMessageDecoder<Employee> DECODER =
new BinaryMessageDecoder<Employee>(MODEL$, SCHEMA$);
/**
* Return the BinaryMessageDecoder instance used by this class.
*/
public static BinaryMessageDecoder<Employee> getDecoder() {
return DECODER;
}
/**
* Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}.
* @param resolver a {@link SchemaStore} used to find schemas by fingerprint
*/
public static BinaryMessageDecoder<Employee> createDecoder(SchemaStore resolver) {
return new BinaryMessageDecoder<Employee>(MODEL$, SCHEMA$, resolver);
}
/** Serializes this Employee to a ByteBuffer. */
public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException {
return ENCODER.encode(this);
}
/** Deserializes a Employee from a ByteBuffer. */
public static Employee fromByteBuffer(
java.nio.ByteBuffer b) throws java.io.IOException {
return DECODER.decode(b);
}
@Deprecated public CharSequence Name;
@Deprecated public int age;
/**
* Default constructor. Note that this does not initialize fields
* to their default values from the schema. If that is desired then
* one should use newBuilder()
.
*/
public Employee() {}
/**
* All-args constructor.
* @param Name The new value for Name
* @param age The new value for age
*/
public Employee(CharSequence Name, Integer age) {
this.Name = Name;
this.age = age;
}
public org.apache.avro.Schema getSchema() { return SCHEMA$; }
// Used by DatumWriter. Applications should not call.
public Object get(int field$) {
switch (field$) {
case 0: return Name;
case 1: return age;
default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
// Used by DatumReader. Applications should not call.
@SuppressWarnings(value="unchecked")
public void put(int field$, Object value$) {
switch (field$) {
case 0: Name = (CharSequence)value$; break;
case 1: age = (Integer)value$; break;
default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
/**
* Gets the value of the 'Name' field.
* @return The value of the 'Name' field.
*/
public CharSequence getName() {
return Name;
}
/**
* Sets the value of the 'Name' field.
* @param value the value to set.
*/
public void setName(CharSequence value) {
this.Name = value;
}
/**
* Gets the value of the 'age' field.
* @return The value of the 'age' field.
*/
public Integer getAge() {
return age;
}
/**
* Sets the value of the 'age' field.
* @param value the value to set.
*/
public void setAge(Integer value) {
this.age = value;
}
/**
* Creates a new Employee RecordBuilder.
* @return A new Employee RecordBuilder
*/
public static Builder newBuilder() {
return new Builder();
}
/**
* Creates a new Employee RecordBuilder by copying an existing Builder.
* @param other The existing builder to copy.
* @return A new Employee RecordBuilder
*/
public static Builder newBuilder(Builder other) {
return new Builder(other);
}
/**
* Creates a new Employee RecordBuilder by copying an existing Employee instance.
* @param other The existing instance to copy.
* @return A new Employee RecordBuilder
*/
public static Builder newBuilder(Employee other) {
return new Builder(other);
}
/**
* RecordBuilder for Employee instances.
*/
public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Employee>
implements org.apache.avro.data.RecordBuilder<Employee> {
private CharSequence Name;
private int age;
/** Creates a new Builder */
private Builder() {
super(SCHEMA$);
}
/**
* Creates a Builder by copying an existing Builder.
* @param other The existing Builder to copy.
*/
private Builder(Builder other) {
super(other);
if (isValidValue(fields()[0], other.Name)) {
this.Name = data().deepCopy(fields()[0].schema(), other.Name);
fieldSetFlags()[0] = true;
}
if (isValidValue(fields()[1], other.age)) {
this.age = data().deepCopy(fields()[1].schema(), other.age);
fieldSetFlags()[1] = true;
}
}
/**
* Creates a Builder by copying an existing Employee instance
* @param other The existing instance to copy.
*/
private Builder(Employee other) {
super(SCHEMA$);
if (isValidValue(fields()[0], other.Name)) {
this.Name = data().deepCopy(fields()[0].schema(), other.Name);
fieldSetFlags()[0] = true;
}
if (isValidValue(fields()[1], other.age)) {
this.age = data().deepCopy(fields()[1].schema(), other.age);
fieldSetFlags()[1] = true;
}
}
/**
* Gets the value of the 'Name' field.
* @return The value.
*/
public CharSequence getName() {
return Name;
}
/**
* Sets the value of the 'Name' field.
* @param value The value of 'Name'.
* @return This builder.
*/
public Builder setName(CharSequence value) {
validate(fields()[0], value);
this.Name = value;
fieldSetFlags()[0] = true;
return this;
}
/**
* Checks whether the 'Name' field has been set.
* @return True if the 'Name' field has been set, false otherwise.
*/
public boolean hasName() {
return fieldSetFlags()[0];
}
/**
* Clears the value of the 'Name' field.
* @return This builder.
*/
public Builder clearName() {
Name = null;
fieldSetFlags()[0] = false;
return this;
}
/**
* Gets the value of the 'age' field.
* @return The value.
*/
public Integer getAge() {
return age;
}
/**
* Sets the value of the 'age' field.
* @param value The value of 'age'.
* @return This builder.
*/
public Builder setAge(int value) {
validate(fields()[1], value);
this.age = value;
fieldSetFlags()[1] = true;
return this;
}
/**
* Checks whether the 'age' field has been set.
* @return True if the 'age' field has been set, false otherwise.
*/
public boolean hasAge() {
return fieldSetFlags()[1];
}
/**
* Clears the value of the 'age' field.
* @return This builder.
*/
public Builder clearAge() {
fieldSetFlags()[1] = false;
return this;
}
@SuppressWarnings("unchecked")
public Employee build() {
try {
Employee record = new Employee();
record.Name = fieldSetFlags()[0] ? this.Name : (CharSequence) defaultValue(fields()[0]);
record.age = fieldSetFlags()[1] ? this.age : (Integer) defaultValue(fields()[1]);
return record;
} catch (Exception e) {
throw new org.apache.avro.AvroRuntimeException(e);
}
}
}
@SuppressWarnings("unchecked")
private static final org.apache.avro.io.DatumWriter<Employee>
WRITER$ = (org.apache.avro.io.DatumWriter<Employee>)MODEL$.createDatumWriter(SCHEMA$);
@Override public void writeExternal(java.io.ObjectOutput out)
throws java.io.IOException {
WRITER$.write(this, SpecificData.getEncoder(out));
}
@SuppressWarnings("unchecked")
private static final org.apache.avro.io.DatumReader<Employee>
READER$ = (org.apache.avro.io.DatumReader<Employee>)MODEL$.createDatumReader(SCHEMA$);
@Override public void readExternal(java.io.ObjectInput in)
throws java.io.IOException {
READER$.read(this, SpecificData.getDecoder(in));
}
}
/**
*
*/
@Test
public void write() throws IOException {
// Writer
SpecificDatumWriter empDatumWriter = new SpecificDatumWriter<Employee>(Employee.class);
//
DataFileWriter<Employee> empFileWriter = new DataFileWriter<Employee>(empDatumWriter);
//
Employee e = new Employee();
e.setAge(12);
e.setName("Bob");
//
empFileWriter.create(e.getSchema(), new File("F:/hadoop/avro/e.avro"));
//
empFileWriter.append(e);
empFileWriter.append(e);
empFileWriter.append(e);
//
empFileWriter.close();
}
/**
*
*/
@Test
public void read() throws IOException {
// Reader
SpecificDatumReader empDatumWriter = new SpecificDatumReader(Employee.class);
//
DataFileReader<Employee> dataReader = new DataFileReader<Employee>(new File("F:/hadoop/avro/e.avro"), empDatumWriter);
Iterator<Employee> it = dataReader.iterator();
while(it.hasNext()) {
System.out.println(it.next());
}
}
:
{"Name": "Bob", "age": 12}
{"Name": "Bob", "age": 12}
{"Name": "Bob", "age": 12}
非コンパイルSchema
empをコンパイルする必要はありません.AVscファイル、直接使用
/**
* Schema ,
*/
@Test
public void writeInSchme() throws IOException {
// avsc
Schema schema = new Schema.Parser().parse(new File("F:/hadoop/avro/emp.avsc"));
//
GenericRecord e = new GenericData.Record(schema);
// Javabean
e.put("Name", "tomas");
e.put("age", 25);
DatumWriter<GenericRecord> w1 = new SpecificDatumWriter(schema);
DataFileWriter<GenericRecord> w2 = new DataFileWriter(w1);
w2.create(schema, new File("F:/hadoop/avro/e2.avro"));
w2.append(e);
w2.append(e);
w2.append(e);
w2.close();
}
/**
* avro
*/
@Test
public void readInSchema() throws Exception {
// avsc 。
Schema schema = new Schema.Parser().parse(new File("F:/hadoop/avro/emp.avsc"));
GenericRecord e1 = new GenericData.Record(schema);
DatumReader r1 = new SpecificDatumReader (schema);
DataFileReader r2 = new DataFileReader(new File("F:/hadoop/avro/e2.avro"),r1);
while(r2.hasNext()){
GenericRecord rec = (GenericRecord)r2.next();
System.out.println(rec.get("Name"));
}
r2.close();
}
:
tomas
tomas
tomas