目前有两个库可以操作HBASE:hbase-thrift 和 happybase
happybase使用起来比较简单方便,因此重点学习该库,hbase-thrift只做简要介绍。
(一)hbase-thrift
1、使用前先添加库和依赖库:
pip install thrift pip install hbase-thrift pip install google-cloud pip install google-cloud-vision pip install kazoo
2、连接数据库的配置信息:
#先在Linux上启动HBASE server #/opt/cloudera/parcels/CDH/lib/hbase/bin/hbase-daemon.sh --config /opt/cloudera/parcels/CDH/lib/hbase/conf foreground_start thrift --infoport 9096 -p 9091 #再运行该python脚本连接服务器 from thrift.transport import TSocket from hbase import Hbase from hbase.ttypes import * host = "xxx.xxx.xxx.xxx" port = 9091 framed = False socket = TSocket.TSocket(host, port) if framed: transport = TTransport.TFramedTransport(socket) else: transport = TTransport.TBufferedTransport(socket) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol)
3、操作数据库
print ("Thrift2 Demo") print ("This demo assumes you have a table called \"example\" with a column family called \"family1\"") #打开连接 transport.open() # 获取所有表名 tableNames = client.getTableNames() print(\'tableNames:\', tableNames) #关闭连接 transport.close() ################################################# # #结果为: # Thrift2 Demo # This demo assumes you have a table called "example" with a column family called "family1" # tableNames: [\'lrx_hbase_test\', \'lrx_hbase_test2\', \'lrx_hbase_test3\', \'lrx_test\']
(二)happybase
# pip install thrift # pip install happybase # 先在Linux上启动HBASE server # /opt/cloudera/parcels/CDH/lib/hbase/bin/hbase-daemon.sh --config /opt/cloudera/parcels/CDH/lib/hbase/conf foreground_start thrift --infoport 9096 -p 9091 & # 再运行python脚本连接服务器 import happybase from conf import setting # 创建连接,通过参数size来设置连接池中连接的个数 connection = happybase.Connection(**setting.HBASE) # 打开传输,无返回值 connection.open() # 创建表,无返回值 # connection.create_table(\'lrx_test\', # { # \'data\':dict() # }) # 获取一个表对象,返回一个happybase.table.Table对象(返回二进制表名) table0 = connection.table(\'lrx_test\') print(\'表对象为:\') print(table0) #<happybase.table.Table name=b\'lrx_test\'> # 获取表实例,返回一个happybase.table.Table对象(返回表名) table = happybase.Table(\'lrx_test\',connection) print(\'表实例为:\') print(table) #<happybase.table.Table name=\'lrx_test\'> # 插入数据,无返回值 ----在row1行,data:1列插入值value1 for i in range(5): table.put(\'row%s\' %i,{\'data:%s\'%i:\'%s\' %i} ) table.put(\'row5\',{\'data:5\':\'value1\'}) # 获取单元格数据,返回一个list content = table.cells(\'row1\',\'data:1\') print (content) #[b\'value1\', b\'value1\'] # 获取计数器列的值,返回当前单元格的值 # content2 = table.counter_get(\'row2\',\'data:2\') # print(content2) #0 # 获取一个扫描器,返回一个generator scanner = table.scan() for k,v in scanner: print(k,v) ########################################### # #结果为: # #b\'row0\' {b\'data:0\': b\'0\'} # b\'row1\' {b\'data:1\': b\'value1\'} # b\'row2\' {b\'data:2\': b\'2\'} # b\'row3\' {b\'data:3\': b\'3\'} # b\'row4\' {b\'data:4\': b\'4\'} print(scanner) #<generator object Table.scan at 0x000001E17CCDAF10> # 获取一行数据,返回一个dict info = table.row(\'row2\') info1={} for k,v in info.items(): info1[k.decode()]=v.decode() print(info1) #获取表名 table = connection.tables() print(table) # 关闭传输,无返回值 connection.close()