Diyo

$HADOOP_USER_NAME

#创建命名空间
create_namespace \'bd1902\'

#展示所有命名空间
list_namespace

#删除命名空间,The namespace must be empty.
drop_namespace \'IMUT\'

 

create \'t1\', \'f1\', \'f2\', \'f3\'
create \'t1\', {NAME => \'f1\'}, {NAME => \'f2\'}, {NAME => \'f3\'}


#创建一张表,指定版本号为3

create \'bd1902:student\', \'baseinfo\', \'extrainfo\'
create \'bd1902:student1\', {NAME => \'baseinfo\', VERSIONS => 3},{NAME => \'extrainfo\',VERSIONS => 5}

create \'bd1803:employee\', \'baseinfo\', \'extrainfo\'
create \'bd1803:employee1\', {NAME => \'baseinfo\', VERSIONS => 3},{NAME => \'extrainfo\',VERSIONS => 5}


describe \'bd1902:student2\'

describe \'bigdata:test1\'


hbase 热点问题,数据倾斜
读操作 写操作
1. 默认分区
2. rowkey递增

解决热点问题:
1 预分区 (建表过程中)
2 随机产生rowkey hash 、MD5 、 SHA256


#创建表,预定义分区,在rowkey为0<= <10 10<= 20 20<= 30

create \'bd1902:student2\', {NAME=>\'baseinfo\',VERSIONS=>3}, SPLITS => [\'1000\', \'2000\', \'3000\', \'4000\'] 四个分界点,分五个区


create \'bd1803:employee3\', {NAME=>\'baseinfo\',VERSIONS=>3}, SPLITS => [\'1000\', \'2000\', \'3000\', \'4000\']
put \'hbase_test:teacher3\',\'2000009\',\'baseinfo:name\',\'zhangsan\'

#创建表,分区标准在文件中,如果rowkey以0001等开头,进行分区使用| 或者 ~ 帮助划分rowkey区域,文件放在进入hbase shell 的目录下

create \'bd1902:student3\',\'baseinfo\',{SPLITS_FILE => \'/home/briup/splits.txt\'}

测试ROWKEY开闭区间:(左闭右开)
put \'bd1902:student2\',\'1000\',\'baseinfo:name\' ,\'jack\'


create \'bd1803:employee4\',\'baseinfo\',{SPLITS_FILE => \'/home/hbase/sps.txt\'}
create \'bd1803:employee4\', \'baseinfo\', SPLITS_FILE => \'sps.txt\'

#使用HexStringSplit算法进行分区,分成10个region,适合散列字符不包含中文,适合16进制的rowkey或者前缀是16进制的rowkey (哈希算法、SHA32)

create \'bd1902:student4\', \'baseinfo\', {NUMREGIONS => 10, SPLITALGO => \'HexStringSplit\'}


create \'bd1803:employee5\', \'baseinfo\', {NUMREGIONS => 10, SPLITALGO => \'HexStringSplit\'}

#使用UniformSplit算法进行分区,rowkey可以包含中文,适合随机字节数组rowkey

create \'bd1902:student5\', \'baseinfo\', {NUMREGIONS => 5, SPLITALGO => \'UniformSplit\'}


create \'bd1803:employee6\', \'baseinfo\', {NUMREGIONS => 5, SPLITALGO => \'UniformSplit\'}
put \'bd1803:employee3\', \'2000\', \'baseinfo:name\', \'张三\'

 

JRuby (脚本)变量
#create 返回引用值
t1 = create \'t1\', \'f1\'

#alter修改表结构--增加列族
alter \'bd1902:student\' ,{NAME => \'extrainfo\' ,VERSIONS => 5},{NAME => \'secret\',VERSIONS => 5 }

alter \'bigdata:test2\', {NAME => \'extrainfo\', IN_MEMORY => true}, {NAME => \'secret\', VERSIONS => 5}

#alter修改表结构--删除列族
alter \'bd1902:student\',{METHOD => \'delete\',NAME => \'baseinfo\'}

alter \'bigdata:test2\', {METHOD => \'delete\',NAME => \'baseinfo\'}

---------------------------------------

#插入数据 兼顾更新
Cell
put \'ns:t\',\'r\',\'cf:q\',\'v\'[,\'t\']
put \'bd1902:student1\',\'1001\',\'baseinfo:name\',\'Kenvin\'
put \'bd1902:student1\',\'1001\',\'baseinfo:gender\',\'male\'
put \'bd1902:student1\',\'1001\',\'baseinfo:age\',\'40\'
put \'bd1902:student1\',\'1001\',\'baseinfo:pos\',\'CTO\'


put \'bd1902:student1\',\'1002\',\'baseinfo:name\',\'Terry\'
put \'bd1902:student1\',\'1002\',\'baseinfo:gender\',\'male\'
put \'bd1902:student1\',\'1002\',\'baseinfo:age\',\'36\'
put \'bd1902:student1\',\'1002\',\'baseinfo:pos\',\'Manager\'
put \'bd1902:student1\',\'2001\',\'baseinfo:name\',\'Wood\'
put \'bd1902:student1\',\'2001\',\'baseinfo:gender\',\'male\'
put \'bd1902:student1\',\'2001\',\'baseinfo:age\',\'32\'
put \'bd1902:student1\',\'2001\',\'baseinfo:pos\',\'Manager\'
put \'bd1902:student1\',\'2002\',\'baseinfo:name\',\'Terry\'
put \'bd1902:student1\',\'2002\',\'baseinfo:gender\',\'male\'
put \'bd1902:student1\',\'2002\',\'baseinfo:age\',\'30\'
put \'bd1902:student1\',\'2002\',\'baseinfo:pos\',\'Teacher\'
put \'bd1902:student1\',\'3001\',\'baseinfo:name\',\'Lurry\'
put \'bd1902:student1\',\'3001\',\'baseinfo:gender\',\'male\'
put \'bd1902:student1\',\'3001\',\'baseinfo:age\',\'36\'
put \'bd1902:student1\',\'3001\',\'baseinfo:pos\',\'Teacher\'

scan \'bd1902:student1\'


put \'bd1803:employee1\',\'1001\',\'baseinfo:gender\',\'male\'
put \'briup:employee3\',\'2000\',\'baseinfo:name\',\'tom\'

#插入指定timestamp
put \'hbase_test:teacher5\',\'100000000\',\'extrainfo:salary\',\'5000\',1488888888888

 

#查询
get 单行查询
scan 多行查询

#获得某一个特定值

get \'t1\', \'r1\', [\'c1\', \'c2\']

get \'bigdata:test1\',\'10\',\'baseinfo:name\'

#获得前5个版本的数据
get \'bd1803:employee1\',\'1001\',{COLUMN => \'baseinfo:position\',VERSIONS => 5}

#获得某个时间段数据,不一定是时间最新的数据
get \'hbase_test:teacher2\', \'10001\', {TIMERANGE => [1479371084728, 1479373228331]}

 


#scan 扫描某张表 select *
scan \'bd1803:employee1\'

scan \'bd1902:student1\'


#scan 扫描 表中某一列
scan \'test1:student5\',{COLUMNS=>\'baseinfo:name\'}

 

#scan 使用limit 进行行数限制
scan \'test1:student5\',{COLUMNS=>\'baseinfo:name\',LIMIT=>2}

#scan 指定从某一行开始扫描
scan \'hbase_test:teacher2\',{COLUMNS=>\'baseinfo:name\',LIMIT=>2,STARTROW=>\'20001\'}

 

#scan 扫描所有版本
scan \'bigdata:test1\',\'10\',{VERSIONS=>5}

#在hbase 对于hfile没有进行过合并操作之前
#scan 超出版本限制也能访问到
scan \'briup:employee3\',{VERSIONS=>5,RAW=>true}


#scan 使用过滤器 行键前缀过滤器,只有这一个有属性
scan \'bigdata:test1\', {ROWPREFIXFILTER => \'10\'}
scan \'bd1902:student1\', {ROWPREFIXFILTER => \'1002\'}

#scan 使用空值行健过滤器,只返回行健
scan \'bigdata:test1\',{FILTER=>\'KeyOnlyFilter()\'}

scan \'bigdata:test1\',{FILTER=>"ColumnPrefixFilter(\'na\') "}

1 数值 数字
2 CompareFilter.CompareOp 比较符 >
3 ByteArrayComparable binary:1000 substring:
4 byte[] \'\'

scan \'bd1803:employee1\',{FILTER=>"RowFilter(>,\'binary:1001\')"}

scan \'bd1902:student1\',{FILTER=>"RowFilter(>,\'binary:2000\')"}

 


#scan 使用行健过滤器,binary: 帮助数据类型转化
scan \'hbase_test:teacher2\',{FILTER =>"RowFilter(!=,\'binary:10001\')"}

#scan 使用列名过滤器
scan \'test1:student5\',{FILTER =>"QualifierFilter(>=,\'binary:baseinfo:name\')"}

#scan 使用子串过滤器
scan \'test1:student5\',{FILTER =>"ValueFilter(=,\'binary:zhao\')"}

#列名前缀过滤器
scan \'test1:student5\',{FILTER =>"ColumnPrefixFilter(\'name\')"}

#scan 使用多种过滤器进行条件结合
scan \'hbase_test:teacher2\',{FILTER =>"(ValueFilter(=,\'binary:hello\')) OR (RowFilter (>,\'binary:10\'))"}

 

#scan 使用page过滤器,限制每页展示数量
scan \'bigdata:test1\',{FILTER =>org.apache.hadoop.hbase.filter.KeyOnlyFilter.new()}

#scan 使用行健过滤器,进行正则表达式的匹配
scan \'test1\', {FILTER => RowFilter.new(CompareFilter::CompareOp.valueOf(\'EQUAL\'),RegexStringComparator.new(\'.*ll.*\'))}

scan \'bd1902:student1\', {FILTER => org.apache.hadoop.hbase.filter.RowFilter.new(org.apache.hadoop.hbase.filter.CompareFilter::CompareOp.valueOf(\'EQUAL\'),org.apache.hadoop.hbase.filter.RegexStringComparator.new(\'.*3.*\'))}

//-----------------------


#删除数据
delete \'t1\',\'r1\',\'c1\'


#清空某张表
truncate \'t1\'

#disable 某张表
disable \'bigdata:test1\'

#删除某张表
drop \'bigdata:test2\'


#大合并 hfile
major_compact \'583b13b5efb36a6ae7794d7e60b4c3a8\'
major_compact \'bigdata:test2\'

#小合并


#移动region
move \'ENCODED_REGIONNAME\', \'SERVER_NAME\'
#第一个参数指的是region最后一部分编号(逗号分隔每部分)
move \'a39dc69bd00d19e556ae17e4aeb1ebe1\',\'datanode02,16020,1479354142616\'


// 行过滤器
// 1 行健范围
ByteArrayComparable com1 = new BinaryComparator(Bytes.toBytes("briup004"));
RowFilter rf1 = new RowFilter(CompareOp.LESS, com1);
// 2 行健子串范围
ByteArrayComparable com2 = new SubstringComparator("007");
RowFilter rf2 = new RowFilter(CompareOp.EQUAL, com2);
// 3 某个列标示符的值范围
SingleColumnValueFilter scf1 = new SingleColumnValueFilter
(Bytes.toBytes("infos"), Bytes.toBytes("name"), CompareOp.LESS_OR_EQUAL, Bytes.toBytes("张三"));
// 4 匹配正则表达式
ByteArrayComparable com3 = new SubstringComparator("test.");
SingleColumnValueFilter scf2 = new SingleColumnValueFilter
(Bytes.toBytes("infos"), Bytes.toBytes("name"), CompareOp.EQUAL,com3);
// 5 匹配子串 不区分大小写
ByteArrayComparable com4 = new SubstringComparator("te");
SingleColumnValueFilter scf3 = new SingleColumnValueFilter
(Bytes.toBytes("infos"), Bytes.toBytes("name"), CompareOp.EQUAL,com4);

 

分类:

技术点:

相关文章:

  • 2021-10-15
  • 2021-10-15
  • 2021-10-15
  • 2021-10-15
  • 2021-10-15
  • 2021-11-07
  • 2021-11-07
  • 2021-11-14
猜你喜欢
  • 2021-11-07
  • 2021-12-22
  • 2021-12-10
  • 2021-11-17
  • 2021-10-25
  • 2021-10-25
相关资源
相似解决方案