Avroデータシーケンス化


Apache Avroはプログラミング言語とは独立したデータシーケンス化システムである.HadoopのWritableタイプの不足を解決するために設計されています.言語の移植性が欠けています.Avroモードは通常jsonで書かれ、データは通常バイナリフォーマットで符号化される.
Avroの使用
Avroの使用は、コンパイルSchemaと非コンパイルSchemaの2種類に分けられます
コンパイルSchema
  • はschema:empを定義する.avsc
  • //json     
    {
    	"namespace": "tutorialspoint.com",
    	"type": "record",
    	"name": "emp",
    	"fields": [
    		{"name": "name", "type": "string"},
    		{"name": "id", "type": "int"},
    		{"name": "salary", "type": "int"},
    		{"name": "age", "type": "int"},
    		{"name": "address", "type": "string"}
    	]
    }
    
  • Schemaをコンパイルし、Javaクラスを生成し、ソースコードをidea
  • にインポートする
    // .       
    cmd> java -jar avro-tools-1.8.2.jar compile schema emp.avsc .
    
    // idea     
    <dependency>
        <groupId>org.apache.avro</groupId>
        <artifactId>avro</artifactId>
        <version>1.8.2</version>
    </dependency>
    
    

    コンパイルされたソースは次のとおりです.
    package Tutorialspoint;
    
    import org.apache.avro.specific.SpecificData;
    import org.apache.avro.message.BinaryMessageEncoder;
    import org.apache.avro.message.BinaryMessageDecoder;
    import org.apache.avro.message.SchemaStore;
    
    @SuppressWarnings("all")
    @org.apache.avro.specific.AvroGenerated
    public class Employee extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
      private static final long serialVersionUID = -8873171083721622992L;
      public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Employee\",\"namespace\":\"Tutorialspoint\",\"fields\":[{\"name\":\"Name\",\"type\":\"string\"},{\"name\":\"age\",\"type\":\"int\"}]}");
      public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
    
      private static SpecificData MODEL$ = new SpecificData();
    
      private static final BinaryMessageEncoder<Employee> ENCODER =
          new BinaryMessageEncoder<Employee>(MODEL$, SCHEMA$);
    
      private static final BinaryMessageDecoder<Employee> DECODER =
          new BinaryMessageDecoder<Employee>(MODEL$, SCHEMA$);
    
      /**
       * Return the BinaryMessageDecoder instance used by this class.
       */
      public static BinaryMessageDecoder<Employee> getDecoder() {
        return DECODER;
      }
    
      /**
       * Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}.
       * @param resolver a {@link SchemaStore} used to find schemas by fingerprint
       */
      public static BinaryMessageDecoder<Employee> createDecoder(SchemaStore resolver) {
        return new BinaryMessageDecoder<Employee>(MODEL$, SCHEMA$, resolver);
      }
    
      /** Serializes this Employee to a ByteBuffer. */
      public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException {
        return ENCODER.encode(this);
      }
    
      /** Deserializes a Employee from a ByteBuffer. */
      public static Employee fromByteBuffer(
          java.nio.ByteBuffer b) throws java.io.IOException {
        return DECODER.decode(b);
      }
    
      @Deprecated public CharSequence Name;
      @Deprecated public int age;
    
      /**
       * Default constructor.  Note that this does not initialize fields
       * to their default values from the schema.  If that is desired then
       * one should use newBuilder().
       */
      public Employee() {}
    
      /**
       * All-args constructor.
       * @param Name The new value for Name
       * @param age The new value for age
       */
      public Employee(CharSequence Name, Integer age) {
        this.Name = Name;
        this.age = age;
      }
    
      public org.apache.avro.Schema getSchema() { return SCHEMA$; }
      // Used by DatumWriter.  Applications should not call.
      public Object get(int field$) {
        switch (field$) {
        case 0: return Name;
        case 1: return age;
        default: throw new org.apache.avro.AvroRuntimeException("Bad index");
        }
      }
    
      // Used by DatumReader.  Applications should not call.
      @SuppressWarnings(value="unchecked")
      public void put(int field$, Object value$) {
        switch (field$) {
        case 0: Name = (CharSequence)value$; break;
        case 1: age = (Integer)value$; break;
        default: throw new org.apache.avro.AvroRuntimeException("Bad index");
        }
      }
    
      /**
       * Gets the value of the 'Name' field.
       * @return The value of the 'Name' field.
       */
      public CharSequence getName() {
        return Name;
      }
    
      /**
       * Sets the value of the 'Name' field.
       * @param value the value to set.
       */
      public void setName(CharSequence value) {
        this.Name = value;
      }
    
      /**
       * Gets the value of the 'age' field.
       * @return The value of the 'age' field.
       */
      public Integer getAge() {
        return age;
      }
    
      /**
       * Sets the value of the 'age' field.
       * @param value the value to set.
       */
      public void setAge(Integer value) {
        this.age = value;
      }
    
      /**
       * Creates a new Employee RecordBuilder.
       * @return A new Employee RecordBuilder
       */
      public static Builder newBuilder() {
        return new Builder();
      }
    
      /**
       * Creates a new Employee RecordBuilder by copying an existing Builder.
       * @param other The existing builder to copy.
       * @return A new Employee RecordBuilder
       */
      public static Builder newBuilder(Builder other) {
        return new Builder(other);
      }
    
      /**
       * Creates a new Employee RecordBuilder by copying an existing Employee instance.
       * @param other The existing instance to copy.
       * @return A new Employee RecordBuilder
       */
      public static Builder newBuilder(Employee other) {
        return new Builder(other);
      }
    
      /**
       * RecordBuilder for Employee instances.
       */
      public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Employee>
        implements org.apache.avro.data.RecordBuilder<Employee> {
    
        private CharSequence Name;
        private int age;
    
        /** Creates a new Builder */
        private Builder() {
          super(SCHEMA$);
        }
    
        /**
         * Creates a Builder by copying an existing Builder.
         * @param other The existing Builder to copy.
         */
        private Builder(Builder other) {
          super(other);
          if (isValidValue(fields()[0], other.Name)) {
            this.Name = data().deepCopy(fields()[0].schema(), other.Name);
            fieldSetFlags()[0] = true;
          }
          if (isValidValue(fields()[1], other.age)) {
            this.age = data().deepCopy(fields()[1].schema(), other.age);
            fieldSetFlags()[1] = true;
          }
        }
    
        /**
         * Creates a Builder by copying an existing Employee instance
         * @param other The existing instance to copy.
         */
        private Builder(Employee other) {
                super(SCHEMA$);
          if (isValidValue(fields()[0], other.Name)) {
            this.Name = data().deepCopy(fields()[0].schema(), other.Name);
            fieldSetFlags()[0] = true;
          }
          if (isValidValue(fields()[1], other.age)) {
            this.age = data().deepCopy(fields()[1].schema(), other.age);
            fieldSetFlags()[1] = true;
          }
        }
    
        /**
          * Gets the value of the 'Name' field.
          * @return The value.
          */
        public CharSequence getName() {
          return Name;
        }
    
        /**
          * Sets the value of the 'Name' field.
          * @param value The value of 'Name'.
          * @return This builder.
          */
        public Builder setName(CharSequence value) {
          validate(fields()[0], value);
          this.Name = value;
          fieldSetFlags()[0] = true;
          return this;
        }
    
        /**
          * Checks whether the 'Name' field has been set.
          * @return True if the 'Name' field has been set, false otherwise.
          */
        public boolean hasName() {
          return fieldSetFlags()[0];
        }
    
    
        /**
          * Clears the value of the 'Name' field.
          * @return This builder.
          */
        public Builder clearName() {
          Name = null;
          fieldSetFlags()[0] = false;
          return this;
        }
    
        /**
          * Gets the value of the 'age' field.
          * @return The value.
          */
        public Integer getAge() {
          return age;
        }
    
        /**
          * Sets the value of the 'age' field.
          * @param value The value of 'age'.
          * @return This builder.
          */
        public Builder setAge(int value) {
          validate(fields()[1], value);
          this.age = value;
          fieldSetFlags()[1] = true;
          return this;
        }
    
        /**
          * Checks whether the 'age' field has been set.
          * @return True if the 'age' field has been set, false otherwise.
          */
        public boolean hasAge() {
          return fieldSetFlags()[1];
        }
    
    
        /**
          * Clears the value of the 'age' field.
          * @return This builder.
          */
        public Builder clearAge() {
          fieldSetFlags()[1] = false;
          return this;
        }
    
        @SuppressWarnings("unchecked")
        public Employee build() {
          try {
            Employee record = new Employee();
            record.Name = fieldSetFlags()[0] ? this.Name : (CharSequence) defaultValue(fields()[0]);
            record.age = fieldSetFlags()[1] ? this.age : (Integer) defaultValue(fields()[1]);
            return record;
          } catch (Exception e) {
            throw new org.apache.avro.AvroRuntimeException(e);
          }
        }
      }
    
      @SuppressWarnings("unchecked")
      private static final org.apache.avro.io.DatumWriter<Employee>
        WRITER$ = (org.apache.avro.io.DatumWriter<Employee>)MODEL$.createDatumWriter(SCHEMA$);
    
      @Override public void writeExternal(java.io.ObjectOutput out)
        throws java.io.IOException {
        WRITER$.write(this, SpecificData.getEncoder(out));
      }
    
      @SuppressWarnings("unchecked")
      private static final org.apache.avro.io.DatumReader<Employee>
        READER$ = (org.apache.avro.io.DatumReader<Employee>)MODEL$.createDatumReader(SCHEMA$);
    
      @Override public void readExternal(java.io.ObjectInput in)
        throws java.io.IOException {
        READER$.read(this, SpecificData.getDecoder(in));
      }
    
    }
    
    
  • シリアル化
  • /**
     *    
     */
    @Test
    public void write() throws IOException {
        //  Writer  
        SpecificDatumWriter empDatumWriter = new SpecificDatumWriter<Employee>(Employee.class);
        //    
        DataFileWriter<Employee> empFileWriter = new DataFileWriter<Employee>(empDatumWriter);
        //    
        Employee e = new Employee();
        e.setAge(12);
        e.setName("Bob");
        //        
        empFileWriter.create(e.getSchema(), new File("F:/hadoop/avro/e.avro"));
        //    
        empFileWriter.append(e);
        empFileWriter.append(e);
        empFileWriter.append(e);
    
        //   
        empFileWriter.close();
    }
    
  • 反直列化
  • /**
     *     
     */
    @Test
    public void read() throws IOException {
        //  Reader  
        SpecificDatumReader empDatumWriter = new SpecificDatumReader(Employee.class);
        //    
        DataFileReader<Employee> dataReader = new DataFileReader<Employee>(new File("F:/hadoop/avro/e.avro"), empDatumWriter);
    
        Iterator<Employee> it = dataReader.iterator();
        while(it.hasNext()) {
            System.out.println(it.next());
        }
    }{"Name": "Bob", "age": 12}
    {"Name": "Bob", "age": 12}
    {"Name": "Bob", "age": 12}
    

    非コンパイルSchema
    empをコンパイルする必要はありません.AVscファイル、直接使用
  • シリアル化
  • /**
     *     Schema      ,    
     */
    @Test
    public void writeInSchme() throws IOException {
        //     avsc  
        Schema schema = new Schema.Parser().parse(new File("F:/hadoop/avro/emp.avsc"));
    
        //  
        GenericRecord e = new GenericData.Record(schema);
        //  Javabean  
        e.put("Name", "tomas");
        e.put("age", 25);
    
        DatumWriter<GenericRecord> w1 = new SpecificDatumWriter(schema);
        DataFileWriter<GenericRecord> w2 = new DataFileWriter(w1);
        w2.create(schema, new File("F:/hadoop/avro/e2.avro"));
        w2.append(e);
        w2.append(e);
        w2.append(e);
        w2.close();
    }
    
  • 反直列化
  • /**
     *    avro  
     */
    @Test
    public void readInSchema() throws  Exception {
        //     avsc  。
        Schema schema = new Schema.Parser().parse(new File("F:/hadoop/avro/emp.avsc"));
    
        GenericRecord e1 = new GenericData.Record(schema);
        DatumReader r1 = new SpecificDatumReader (schema);
        DataFileReader r2 = new DataFileReader(new File("F:/hadoop/avro/e2.avro"),r1);
        while(r2.hasNext()){
            GenericRecord rec = (GenericRecord)r2.next();
            System.out.println(rec.get("Name"));
        }
        r2.close();
    }
    
      :
    tomas
    tomas
    tomas