yiruliu

目前有两个库可以操作HBASE:hbase-thrift 和  happybase

happybase使用起来比较简单方便,因此重点学习该库,hbase-thrift只做简要介绍。

(一)hbase-thrift

1、使用前先添加库和依赖库:

pip install thrift
pip install hbase-thrift
pip install google-cloud
pip install google-cloud-vision
pip install kazoo

2、连接数据库的配置信息:

#先在Linux上启动HBASE server
#/opt/cloudera/parcels/CDH/lib/hbase/bin/hbase-daemon.sh --config /opt/cloudera/parcels/CDH/lib/hbase/conf foreground_start thrift --infoport 9096 -p 9091
#再运行该python脚本连接服务器

from thrift.transport import TSocket
from hbase import Hbase
from hbase.ttypes import *

host = "xxx.xxx.xxx.xxx"
port = 9091
framed = False

socket = TSocket.TSocket(host, port)
if framed:
    transport = TTransport.TFramedTransport(socket)
else:
    transport = TTransport.TBufferedTransport(socket)
protocol = TBinaryProtocol.TBinaryProtocol(transport)
client = Hbase.Client(protocol)

3、操作数据库

print ("Thrift2 Demo")
print ("This demo assumes you have a table called \"example\" with a column family called \"family1\"")

#打开连接
transport.open()

# 获取所有表名
tableNames = client.getTableNames()
print(\'tableNames:\', tableNames)

#关闭连接
transport.close()


#################################################
# #结果为:
# Thrift2 Demo
# This demo assumes you have a table called "example" with a column family called "family1"
# tableNames: [\'lrx_hbase_test\', \'lrx_hbase_test2\', \'lrx_hbase_test3\', \'lrx_test\']

 (二)happybase

# pip install thrift
# pip install happybase
# 先在Linux上启动HBASE server
# /opt/cloudera/parcels/CDH/lib/hbase/bin/hbase-daemon.sh --config /opt/cloudera/parcels/CDH/lib/hbase/conf foreground_start thrift --infoport 9096 -p 9091 &
# 再运行python脚本连接服务器
import happybase
from conf import setting
# 创建连接,通过参数size来设置连接池中连接的个数
connection = happybase.Connection(**setting.HBASE)
# 打开传输,无返回值
connection.open()
# 创建表,无返回值
# connection.create_table(\'lrx_test\',
#                         {
#                             \'data\':dict()
#                         })
# 获取一个表对象,返回一个happybase.table.Table对象(返回二进制表名)
table0 = connection.table(\'lrx_test\')
print(\'表对象为:\')
print(table0) #<happybase.table.Table name=b\'lrx_test\'>
# 获取表实例,返回一个happybase.table.Table对象(返回表名)
table = happybase.Table(\'lrx_test\',connection)
print(\'表实例为:\')
print(table) #<happybase.table.Table name=\'lrx_test\'>
# 插入数据,无返回值 ----在row1行,data:1列插入值value1
for i in range(5):
    table.put(\'row%s\' %i,{\'data:%s\'%i:\'%s\' %i} )
table.put(\'row5\',{\'data:5\':\'value1\'})

# 获取单元格数据,返回一个list
content = table.cells(\'row1\',\'data:1\')
print (content) #[b\'value1\', b\'value1\']
# 获取计数器列的值,返回当前单元格的值
# content2 = table.counter_get(\'row2\',\'data:2\')
# print(content2) #0
# 获取一个扫描器,返回一个generator
scanner = table.scan()
for k,v in scanner:
    print(k,v)
###########################################
# #结果为:
# #b\'row0\' {b\'data:0\': b\'0\'}
# b\'row1\' {b\'data:1\': b\'value1\'}
# b\'row2\' {b\'data:2\': b\'2\'}
# b\'row3\' {b\'data:3\': b\'3\'}
# b\'row4\' {b\'data:4\': b\'4\'}

print(scanner) #<generator object Table.scan at 0x000001E17CCDAF10>
# 获取一行数据,返回一个dict
info = table.row(\'row2\')
info1={}
for k,v in info.items():
    info1[k.decode()]=v.decode()
print(info1)
#获取表名
table = connection.tables()
print(table)
# 关闭传输,无返回值
connection.close()

 

分类:

技术点:

相关文章: