Hadoop基础-Apache Avro串行化的与反串行化
作者:尹正杰
版权声明:原创作品,谢绝转载!否则将追究法律责任。
一.Apache Avro简介
1>.Apache Avro的来源
Apache Avro 是一个中立性语言,它是有Hadoop之父Doug Cutting开发而来。因为hadoop的Writerable的串行化只支持Java语言,即非跨语言。所以Doug Cutting开发了Avro ,它是一个语言独立的数据结构,也就是说它是跨语言的。
2>.Avro特点
Apache Avro™是一个数据序列化系统。它有以下特点:
第一:丰富的数据结构。
第二:紧凑,快速的二进制数据格式。
第三:一个容器文件,用于存储持久性数据。
第四:远程过程调用(RPC)。
第五:与动态语言的简单集成。读取或写入数据文件不需要代码生成,也不需要使用或实现RPC协议。代码生成是一种可选的优化,只有静态类型语言才值得实现。
想要了解更多Avro,可以参考Apache官网(http://avro.apache.org/docs/1.8.2/),我就懒得搬运了,直接上干货,本篇博客介绍了两种Avro序列化与反序列化的方式。
3>.安装Avro
下载Avro:http://mirror.bit.edu.cn/apache/avro/avro-1.8.2/
二.Avro环境准备
其实部署Avro的流程大致可以用下图来表示,配置起来相对来说还是蛮简单的,具体操作如下:
1>.创建emp.avsc文件,内容如下
1 { 2 "namespace": "tutorialspoint.com", 3 "type": "record", 4 "name": "Emp", 5 "fields": [ 6 {"name": "name", "type": "string"}, 7 {"name": "id", "type": "int"}, 8 {"name": "salary", "type": "int"}, 9 {"name": "age", "type": "int"}, 10 {"name": "address", "type": "string"} 11 ] 12 }
2>.将下载的avro-1.8.2.jar和avro-tools-1.8.2.jar文件放在emp.avsc同级目录
3>.编译schema文件
4>.查看文件内容
1 /** 2 * Autogenerated by Avro 3 * 4 * DO NOT EDIT DIRECTLY 5 */ 6 package tutorialspoint.com; 7 8 import org.apache.avro.specific.SpecificData; 9 import org.apache.avro.message.BinaryMessageEncoder; 10 import org.apache.avro.message.BinaryMessageDecoder; 11 import org.apache.avro.message.SchemaStore; 12 13 @SuppressWarnings("all") 14 @org.apache.avro.specific.AvroGenerated 15 public class Emp extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { 16 private static final long serialVersionUID = 6405205887550658768L; 17 public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Emp\",\"namespace\":\"tutorialspoint.com\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"salary\",\"type\":\"int\"},{\"name\":\"age\",\"type\":\"int\"},{\"name\":\"address\",\"type\":\"string\"}]}"); 18 public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } 19 20 private static SpecificData MODEL$ = new SpecificData(); 21 22 private static final BinaryMessageEncoder<Emp> ENCODER = 23 new BinaryMessageEncoder<Emp>(MODEL$, SCHEMA$); 24 25 private static final BinaryMessageDecoder<Emp> DECODER = 26 new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$); 27 28 /** 29 * Return the BinaryMessageDecoder instance used by this class. 30 */ 31 public static BinaryMessageDecoder<Emp> getDecoder() { 32 return DECODER; 33 } 34 35 /** 36 * Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}. 37 * @param resolver a {@link SchemaStore} used to find schemas by fingerprint 38 */ 39 public static BinaryMessageDecoder<Emp> createDecoder(SchemaStore resolver) { 40 return new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$, resolver); 41 } 42 43 /** Serializes this Emp to a ByteBuffer. */ 44 public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { 45 return ENCODER.encode(this); 46 } 47 48 /** Deserializes a Emp from a ByteBuffer. */ 49 public static Emp fromByteBuffer( 50 java.nio.ByteBuffer b) throws java.io.IOException { 51 return DECODER.decode(b); 52 } 53 54 @Deprecated public java.lang.CharSequence name; 55 @Deprecated public int id; 56 @Deprecated public int salary; 57 @Deprecated public int age; 58 @Deprecated public java.lang.CharSequence address; 59 60 /** 61 * Default constructor. Note that this does not initialize fields 62 * to their default values from the schema. If that is desired then 63 * one should use <code>newBuilder()</code>. 64 */ 65 public Emp() {} 66 67 /** 68 * All-args constructor. 69 * @param name The new value for name 70 * @param id The new value for id 71 * @param salary The new value for salary 72 * @param age The new value for age 73 * @param address The new value for address 74 */ 75 public Emp(java.lang.CharSequence name, java.lang.Integer id, java.lang.Integer salary, java.lang.Integer age, java.lang.CharSequence address) { 76 this.name = name; 77 this.id = id; 78 this.salary = salary; 79 this.age = age; 80 this.address = address; 81 } 82 83 public org.apache.avro.Schema getSchema() { return SCHEMA$; } 84 // Used by DatumWriter. Applications should not call. 85 public java.lang.Object get(int field$) { 86 switch (field$) { 87 case 0: return name; 88 case 1: return id; 89 case 2: return salary; 90 case 3: return age; 91 case 4: return address; 92 default: throw new org.apache.avro.AvroRuntimeException("Bad index"); 93 } 94 } 95 96 // Used by DatumReader. Applications should not call. 97 @SuppressWarnings(value="unchecked") 98 public void put(int field$, java.lang.Object value$) { 99 switch (field$) { 100 case 0: name = (java.lang.CharSequence)value$; break; 101 case 1: id = (java.lang.Integer)value$; break; 102 case 2: salary = (java.lang.Integer)value$; break; 103 case 3: age = (java.lang.Integer)value$; break; 104 case 4: address = (java.lang.CharSequence)value$; break; 105 default: throw new org.apache.avro.AvroRuntimeException("Bad index"); 106 } 107 } 108 109 /** 110 * Gets the value of the 'name' field. 111 * @return The value of the 'name' field. 112 */ 113 public java.lang.CharSequence getName() { 114 return name; 115 } 116 117 /** 118 * Sets the value of the 'name' field. 119 * @param value the value to set. 120 */ 121 public void setName(java.lang.CharSequence value) { 122 this.name = value; 123 } 124 125 /** 126 * Gets the value of the 'id' field. 127 * @return The value of the 'id' field. 128 */ 129 public java.lang.Integer getId() { 130 return id; 131 } 132 133 /** 134 * Sets the value of the 'id' field. 135 * @param value the value to set. 136 */ 137 public void setId(java.lang.Integer value) { 138 this.id = value; 139 } 140 141 /** 142 * Gets the value of the 'salary' field. 143 * @return The value of the 'salary' field. 144 */ 145 public java.lang.Integer getSalary() { 146 return salary; 147 } 148 149 /** 150 * Sets the value of the 'salary' field. 151 * @param value the value to set. 152 */ 153 public void setSalary(java.lang.Integer value) { 154 this.salary = value; 155 } 156 157 /** 158 * Gets the value of the 'age' field. 159 * @return The value of the 'age' field. 160 */ 161 public java.lang.Integer getAge() { 162 return age; 163 } 164 165 /** 166 * Sets the value of the 'age' field. 167 * @param value the value to set. 168 */ 169 public void setAge(java.lang.Integer value) { 170 this.age = value; 171 } 172 173 /** 174 * Gets the value of the 'address' field. 175 * @return The value of the 'address' field. 176 */ 177 public java.lang.CharSequence getAddress() { 178 return address; 179 } 180 181 /** 182 * Sets the value of the 'address' field. 183 * @param value the value to set. 184 */ 185 public void setAddress(java.lang.CharSequence value) { 186 this.address = value; 187 } 188 189 /** 190 * Creates a new Emp RecordBuilder. 191 * @return A new Emp RecordBuilder 192 */ 193 public static tutorialspoint.com.Emp.Builder newBuilder() { 194 return new tutorialspoint.com.Emp.Builder(); 195 } 196 197 /** 198 * Creates a new Emp RecordBuilder by copying an existing Builder. 199 * @param other The existing builder to copy. 200 * @return A new Emp RecordBuilder 201 */ 202 public static tutorialspoint.com.Emp.Builder newBuilder(tutorialspoint.com.Emp.Builder other) { 203 return new tutorialspoint.com.Emp.Builder(other); 204 } 205 206 /** 207 * Creates a new Emp RecordBuilder by copying an existing Emp instance. 208 * @param other The existing instance to copy. 209 * @return A new Emp RecordBuilder 210 */ 211 public static tutorialspoint.com.Emp.Builder newBuilder(tutorialspoint.com.Emp other) { 212 return new tutorialspoint.com.Emp.Builder(other); 213 } 214 215 /** 216 * RecordBuilder for Emp instances. 217 */ 218 public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Emp> 219 implements org.apache.avro.data.RecordBuilder<Emp> { 220 221 private java.lang.CharSequence name; 222 private int id; 223 private int salary; 224 private int age; 225 private java.lang.CharSequence address; 226 227 /** Creates a new Builder */ 228 private Builder() { 229 super(SCHEMA$); 230 } 231 232 /** 233 * Creates a Builder by copying an existing Builder. 234 * @param other The existing Builder to copy. 235 */ 236 private Builder(tutorialspoint.com.Emp.Builder other) { 237 super(other); 238 if (isValidValue(fields()[0], other.name)) { 239 this.name = data().deepCopy(fields()[0].schema(), other.name); 240 fieldSetFlags()[0] = true; 241 } 242 if (isValidValue(fields()[1], other.id)) { 243 this.id = data().deepCopy(fields()[1].schema(), other.id); 244 fieldSetFlags()[1] = true; 245 } 246 if (isValidValue(fields()[2], other.salary)) { 247 this.salary = data().deepCopy(fields()[2].schema(), other.salary); 248 fieldSetFlags()[2] = true; 249 } 250 if (isValidValue(fields()[3], other.age)) { 251 this.age = data().deepCopy(fields()[3].schema(), other.age); 252 fieldSetFlags()[3] = true; 253 } 254 if (isValidValue(fields()[4], other.address)) { 255 this.address = data().deepCopy(fields()[4].schema(), other.address); 256 fieldSetFlags()[4] = true; 257 } 258 } 259 260 /** 261 * Creates a Builder by copying an existing Emp instance 262 * @param other The existing instance to copy. 263 */ 264 private Builder(tutorialspoint.com.Emp other) { 265 super(SCHEMA$); 266 if (isValidValue(fields()[0], other.name)) { 267 this.name = data().deepCopy(fields()[0].schema(), other.name); 268 fieldSetFlags()[0] = true; 269 } 270 if (isValidValue(fields()[1], other.id)) { 271 this.id = data().deepCopy(fields()[1].schema(), other.id); 272 fieldSetFlags()[1] = true; 273 } 274 if (isValidValue(fields()[2], other.salary)) { 275 this.salary = data().deepCopy(fields()[2].schema(), other.salary); 276 fieldSetFlags()[2] = true; 277 } 278 if (isValidValue(fields()[3], other.age)) { 279 this.age = data().deepCopy(fields()[3].schema(), other.age); 280 fieldSetFlags()[3] = true; 281 } 282 if (isValidValue(fields()[4], other.address)) { 283 this.address = data().deepCopy(fields()[4].schema(), other.address); 284 fieldSetFlags()[4] = true; 285 } 286 } 287 288 /** 289 * Gets the value of the 'name' field. 290 * @return The value. 291 */ 292 public java.lang.CharSequence getName() { 293 return name; 294 } 295 296 /** 297 * Sets the value of the 'name' field. 298 * @param value The value of 'name'. 299 * @return This builder. 300 */ 301 public tutorialspoint.com.Emp.Builder setName(java.lang.CharSequence value) { 302 validate(fields()[0], value); 303 this.name = value; 304 fieldSetFlags()[0] = true; 305 return this; 306 } 307 308 /** 309 * Checks whether the 'name' field has been set. 310 * @return True if the 'name' field has been set, false otherwise. 311 */ 312 public boolean hasName() { 313 return fieldSetFlags()[0]; 314 } 315 316 317 /** 318 * Clears the value of the 'name' field. 319 * @return This builder. 320 */ 321 public tutorialspoint.com.Emp.Builder clearName() { 322 name = null; 323 fieldSetFlags()[0] = false; 324 return this; 325 } 326 327 /** 328 * Gets the value of the 'id' field. 329 * @return The value. 330 */ 331 public java.lang.Integer getId() { 332 return id; 333 } 334 335 /** 336 * Sets the value of the 'id' field. 337 * @param value The value of 'id'. 338 * @return This builder. 339 */ 340 public tutorialspoint.com.Emp.Builder setId(int value) { 341 validate(fields()[1], value); 342 this.id = value; 343 fieldSetFlags()[1] = true; 344 return this; 345 } 346 347 /** 348 * Checks whether the 'id' field has been set. 349 * @return True if the 'id' field has been set, false otherwise. 350 */ 351 public boolean hasId() { 352 return fieldSetFlags()[1]; 353 } 354 355 356 /** 357 * Clears the value of the 'id' field. 358 * @return This builder. 359 */ 360 public tutorialspoint.com.Emp.Builder clearId() { 361 fieldSetFlags()[1] = false; 362 return this; 363 } 364 365 /** 366 * Gets the value of the 'salary' field. 367 * @return The value. 368 */ 369 public java.lang.Integer getSalary() { 370 return salary; 371 } 372 373 /** 374 * Sets the value of the 'salary' field. 375 * @param value The value of 'salary'. 376 * @return This builder. 377 */ 378 public tutorialspoint.com.Emp.Builder setSalary(int value) { 379 validate(fields()[2], value); 380 this.salary = value; 381 fieldSetFlags()[2] = true; 382 return this; 383 } 384 385 /** 386 * Checks whether the 'salary' field has been set. 387 * @return True if the 'salary' field has been set, false otherwise. 388 */ 389 public boolean hasSalary() { 390 return fieldSetFlags()[2]; 391 } 392 393 394 /** 395 * Clears the value of the 'salary' field. 396 * @return This builder. 397 */ 398 public tutorialspoint.com.Emp.Builder clearSalary() { 399 fieldSetFlags()[2] = false; 400 return this; 401 } 402 403 /** 404 * Gets the value of the 'age' field. 405 * @return The value. 406 */ 407 public java.lang.Integer getAge() { 408 return age; 409 } 410 411 /** 412 * Sets the value of the 'age' field. 413 * @param value The value of 'age'. 414 * @return This builder. 415 */ 416 public tutorialspoint.com.Emp.Builder setAge(int value) { 417 validate(fields()[3], value); 418 this.age = value; 419 fieldSetFlags()[3] = true; 420 return this; 421 } 422 423 /** 424 * Checks whether the 'age' field has been set. 425 * @return True if the 'age' field has been set, false otherwise. 426 */ 427 public boolean hasAge() { 428 return fieldSetFlags()[3]; 429 } 430 431 432 /** 433 * Clears the value of the 'age' field. 434 * @return This builder. 435 */ 436 public tutorialspoint.com.Emp.Builder clearAge() { 437 fieldSetFlags()[3] = false; 438 return this; 439 } 440 441 /** 442 * Gets the value of the 'address' field. 443 * @return The value. 444 */ 445 public java.lang.CharSequence getAddress() { 446 return address; 447 } 448 449 /** 450 * Sets the value of the 'address' field. 451 * @param value The value of 'address'. 452 * @return This builder. 453 */ 454 public tutorialspoint.com.Emp.Builder setAddress(java.lang.CharSequence value) { 455 validate(fields()[4], value); 456 this.address = value; 457 fieldSetFlags()[4] = true; 458 return this; 459 } 460 461 /** 462 * Checks whether the 'address' field has been set. 463 * @return True if the 'address' field has been set, false otherwise. 464 */ 465 public boolean hasAddress() { 466 return fieldSetFlags()[4]; 467 } 468 469 470 /** 471 * Clears the value of the 'address' field. 472 * @return This builder. 473 */ 474 public tutorialspoint.com.Emp.Builder clearAddress() { 475 address = null; 476 fieldSetFlags()[4] = false; 477 return this; 478 } 479 480 @Override 481 @SuppressWarnings("unchecked") 482 public Emp build() { 483 try { 484 Emp record = new Emp(); 485 record.name = fieldSetFlags()[0] ? this.name : (java.lang.CharSequence) defaultValue(fields()[0]); 486 record.id = fieldSetFlags()[1] ? this.id : (java.lang.Integer) defaultValue(fields()[1]); 487 record.salary = fieldSetFlags()[2] ? this.salary : (java.lang.Integer) defaultValue(fields()[2]); 488 record.age = fieldSetFlags()[3] ? this.age : (java.lang.Integer) defaultValue(fields()[3]); 489 record.address = fieldSetFlags()[4] ? this.address : (java.lang.CharSequence) defaultValue(fields()[4]); 490 return record; 491 } catch (java.lang.Exception e) { 492 throw new org.apache.avro.AvroRuntimeException(e); 493 } 494 } 495 } 496 497 @SuppressWarnings("unchecked") 498 private static final org.apache.avro.io.DatumWriter<Emp> 499 WRITER$ = (org.apache.avro.io.DatumWriter<Emp>)MODEL$.createDatumWriter(SCHEMA$); 500 501 @Override public void writeExternal(java.io.ObjectOutput out) 502 throws java.io.IOException { 503 WRITER$.write(this, SpecificData.getEncoder(out)); 504 } 505 506 @SuppressWarnings("unchecked") 507 private static final org.apache.avro.io.DatumReader<Emp> 508 READER$ = (org.apache.avro.io.DatumReader<Emp>)MODEL$.createDatumReader(SCHEMA$); 509 510 @Override public void readExternal(java.io.ObjectInput in) 511 throws java.io.IOException { 512 READER$.read(this, SpecificData.getDecoder(in)); 513 } 514 515 }