postgresql-JSON使用

json,jsonb区别

json和jsonb，而两者唯一的区别在于效率,json是对输入的完整拷贝，使用时再去解析，所以它会保留输入的空格，重复键以及顺序等。而jsonb是解析输入后保存的二进制，它在解析时会删除不必要的空格和重复的键，顺序和输入可能也不相同。使用时不用再次解析。两者对重复键的处理都是保留最后一个键值对。效率的差别：json类型存储快，查询慢，jsonb类型存储稍慢，查询较快(支持许多额外的操作符)。

关于json和jsonb存储和解析效率连接

这里主要测试jsonb的增删改查

json和jsonb共同操作符

操作符	返回类型	数组[1,2,3]	{"a":1,"b":2,"c":3}	{"a":{"b":{"c":1}},"d":[4,5,6]}
->	json	select \'[1,2,3]\'::jsonb ->2 = 3	select \'{"a":1,"b":2,"c":3}\'::jsonb-> \'a\'=1	select \'{"a":{"b":{"c":1}},"d":[4,5,6]}\'::jsonb ->\'a\'={"b": {"c": 1}}
->>	text	select \'[1,2,3]\'::jsonb ->>2 = 3	select \'{"a":1,"b":2,"c":3}\'::jsonb->> \'a\'=1	select \'{"a":{"b":{"c":1}},"d":[4,5,6]}\'::jsonb ->>\'a\'={"b": {"c": 1}}
#>	json	--	--	select \'{"a":{"b":{"c":1}},"d":[4,5,6]}\'::jsonb #> \'{a,b}\' ={"c": 1}
#>>	text	--	--	select \'{"a":{"b":{"c":1}},"d":[4,5,6]}\'::jsonb #> \'{a,b}\' ={"c": 1}

jsonb额外操作符

操作符	右操作数类型	描述	例子
@>	jsonb	左边的 JSON 值是否包含顶层右边JSON路径/值项?	\'{"a":1, "b":2}\'::jsonb @> \'{"b":2}\'::jsonb
<@	jsonb	左边的JSON路径/值是否包含在顶层右边JSON值中？	\'{"b":2}\'::jsonb <@ \'{"a":1, "b":2}\'::jsonb
?	text	字符串是否作为顶层键值存在于JSON值中？	\'{"a":1, "b":2}\'::jsonb ? \'b\'
?\|	text[]	这些数组字符串中的任何一个是否作为顶层键值存在？	\'{"a":1, "b":2, "c":3}\'::jsonb ?\|array[\'b\',c\']
?&	text[]	这些数组字符串是否作为顶层键值存在？	\'["a", "b"]\'::jsonb ?& array[\'a\', \'b\']
\|\|	jsonb	连接两个jsonb值到新的jsonb值	\'["a", "b"]\'::jsonb\| \'["c", "d"]\'::jsonb
-	text	从左操作数中删除键/值对或字符串元素。基于键值匹配键/值对。	\'{"a": "b"}\'::jsonb - \'a\'
-	integer	删除指定索引的数组元素（负整数结尾）。如果顶层容器不是一个数组，那么抛出错误。	\'["a", "b"]\'::jsonb - 1
#-	text[]	删除指定路径的域或元素（JSON数组，负整数结尾）	\'["a", {"b":1}]\'::jsonb #- \'{1,b}\'

jsonb增删改

--1.1建表
abase=> create table test_jsonb(c_bh char(32),j_jsonb jsonb);
CREATE TABLE

--插入数据
insert into test_jsonb(c_bh,j_jsonb) values(replace(uuid_generate_v4()::text,\'-\',\'\'),\'{"c_xm":"张三","c_mx":{"c_ssdw":"一大队","c_dwbm":"11"}}\');
INSERT 0 1
--查看数据
abase=# select * from test_jsonb where j_jsonb @> \'{"c_xm":"张三","c_mx":{"c_ssdw":"一大队","c_dwbm":"11"}}\';              
               c_bh               |                            j_jsonb                             
----------------------------------+--------------------------------------------
 c217c624152943ab93f502117514f432 | {"c_mx": {"c_dwbm": "11", "c_ssdw": "一大队"}, "c_xm": "张三"}
(1 row)
--1.2操作符||可用于添加元素，添加元素\'{"c_id":"111"}\'
abase=# update test_jsonb set j_jsonb = j_jsonb ||\'{"c_id":"111"}\'::jsonb  where c_bh = \'c217c624152943ab93f502117514f432\';
UPDATE 1
abase=# select j_jsonb from test_jsonb where c_bh = \'c217c624152943ab93f502117514f432\';
                                    j_jsonb                                    
-------------------------------------------------------------------------------
 {"c_id": "111", "c_mx": {"c_dwbm": "11", "c_ssdw": "一大队"}, "c_xm": "张三"}
(1 row)


--1.3更新元素（方法1），如果jsonb中有相同的元素则覆盖,使用\'||\'将\'{"c_id":"111"}\'更新为112
abase=# update test_jsonb set j_jsonb = j_jsonb ||\'{"c_id":"112"}\'::jsonb  where c_bh = \'c217c624152943ab93f502117514f432\';
UPDATE 1
abase=# select j_jsonb from test_jsonb where c_bh = \'c217c624152943ab93f502117514f432\';
                                    j_jsonb                                    
-------------------------------------------------------------------------------
 {"c_id": "112", "c_mx": {"c_dwbm": "11", "c_ssdw": "一大队"}, "c_xm": "张三"}
(1 row)
--更新元素（方法2），使用jsonb_set，将"c_id": "112"更新为123
abase=# update test_jsonb set j_jsonb=  jsonb_set(j_jsonb,\'{c_id}\',\'"123"\'::jsonb,false)  where c_bh = \'c217c624152943ab93f502117514f432\';
UPDATE 1
abase=# select j_jsonb from test_jsonb where c_bh = \'c217c624152943ab93f502117514f432\';
                                    j_jsonb                                    
-------------------------------------------------------------------------------
 {"c_id": "123", "c_mx": {"c_dwbm": "11", "c_ssdw": "一大队"}, "c_xm": "张三"}
(1 row)


--1.4更新嵌套元素，使用jsonb_set（pg9.5以上才支持），更新c_ssdw为二大队
abase=# update test_jsonb set j_jsonb=  jsonb_set(j_jsonb,\'{c_mx,c_ssdw}\',\'"二大队"\'::jsonb,false)  where c_bh = \'c217c624152943ab93f502117514f432\';
UPDATE 1
abase=# select j_jsonb from test_jsonb where c_bh = \'c217c624152943ab93f502117514f432\';
                                    j_jsonb                                    
-------------------------------------------------------------------------------
 {"c_id": "123", "c_mx": {"c_dwbm": "11", "c_ssdw": "二大队"}, "c_xm": "张三"}
(1 row)


--1.5删除元素，删除c_id元素
abase=# update test_jsonb set  j_jsonb = j_jsonb-\'c_id\' where c_bh = \'c217c624152943ab93f502117514f432\' ;
UPDATE 1
abase=# select j_jsonb from test_jsonb where c_bh = \'c217c624152943ab93f502117514f432\';
                            j_jsonb                             
----------------------------------------------------------------
 {"c_mx": {"c_dwbm": "11", "c_ssdw": "二大队"}, "c_xm": "张三"}
(1 row)

jsonb查询

--1.随机文本脚本
abase=> create or replace function random_string(INTEGER)  
abase-> RETURNS TEXT AS  
abase-> $BODY$  
abase$> select array_to_string(  
abase$>     array(  
abase$>         select substring(  
abase$>             \'pg社区的作风非常严谨，一个补丁可能在邮件组中讨论几个月甚至几年，根据大家的意见反复的修正，补丁合并到主干已经非常成熟，所以pg的稳定性也是远近闻名的\'   
abase$>         from (ceil(random()*73))::int FOR 2  
abase$>         )  
abase$>         from generate_series(1,$1)  
abase$>     ),\'\'  
abase$> )  $BODY$  
abase-> LANGUAGE sql VOLATILE;
CREATE FUNCTION

--2.初始化数据：
abase=> insert into test_jsonb select replace(uuid_generate_v4()::text,\'-\',\'\'),(\'{"a":\'||random()*100||\', "kxhbsl":"\'|| random_string(10) ||\'"}\')::jsonb    from generate_series(1,2000000);
INSERT 0 2000000
abase=> insert into test_jsonb select replace(uuid_generate_v4()::text,\'-\',\'\'),(\'{"a":\'||random()*100||\', "kxhbsl":"索尼是大法官"}\')::jsonb    from generate_series(1,10000);
INSERT 0 10000

--3.第一种查询：获取包含\'{"kxhbsl": "索尼是大法官"}\'，全表扫描
abase=# explain analyze select j_jsonb->>\'kxhbsl\',j_jsonb from test_jsonb where j_jsonb @> \'{"kxhbsl": "索尼是大法官"}\';
                                                            QUERY PLAN                                                       
-------------------------------------------------------------------------------------------------------------
 Gather  (cost=1000.00..53379.78 rows=2010 width=134) (actual time=470.729..490.979 rows=10000 loops=1)
   Workers Planned: 2
   Workers Launched: 2
   ->  Parallel Seq Scan on test_jsonb  (cost=0.00..52175.85 rows=838 width=134) (actual time=465.234..480.57
3 rows=3333 loops=3)
         Filter: (j_jsonb @> \'{"kxhbsl": "索尼是大法官"}\'::jsonb)
         Rows Removed by Filter: 666667
 Planning time: 0.318 ms
 Execution time: 506.204 ms
(8 rows)

--j_jsonb字段创建gin索引后，可走索引
abase=# create index i_t_test_jsonb_j_jsonb on test_jsonb using gin(j_jsonb);
CREATE INDEX
abase=#  explain analyze select j_jsonb->>\'kxhbsl\',* from test_jsonb where j_jsonb @> \'{"kxhbsl": "索尼是大法官"}\';
                                                              QUERY PLAN                                                              
-------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on test_jsonb  (cost=59.58..6664.09 rows=2010 width=167) (actual time=3.579..17.065 rows=10
000 loops=1)
   Recheck Cond: (j_jsonb @> \'{"kxhbsl": "索尼是大法官"}\'::jsonb)
   Heap Blocks: exact=481
   ->  Bitmap Index Scan on i_t_test_jsonb_j_jsonb  (cost=0.00..59.08 rows=2010 width=0) (actual time=3.480..
3.480 rows=10000 loops=1)
         Index Cond: (j_jsonb @> \'{"kxhbsl": "索尼是大法官"}\'::jsonb)
 Planning time: 0.429 ms
 Execution time: 17.964 ms
(7 rows)


--4.第二种查询，获取包含:\'索尼是大法官\'，全表扫描
abase=#  explain analyze select j_jsonb->>\'kxhbsl\',j_jsonb from test_jsonb where j_jsonb -> \'kxhbsl\' ? \'索尼是大法官\';
                                                             QUERY PLAN                                                           
-------------------------------------------------------------------------------------------------------------
 Gather  (cost=1000.00..55473.53 rows=2010 width=134) (actual time=1724.170..1769.543 rows=10000 loops=1)
   Workers Planned: 2
   Workers Launched: 0
   ->  Parallel Seq Scan on test_jsonb  (cost=0.00..54269.60 rows=838 width=134) (actual time=1723.752..1767.
187 rows=10000 loops=1)
         Filter: ((j_jsonb -> \'kxhbsl\'::text) ? \'索尼是大法官\'::text)
         Rows Removed by Filter: 2000000
 Planning time: 0.267 ms
 Execution time: 1770.422 ms
(8 rows)

--针对jsonb字段的kxhbsl元素创建gin索引。 可走索引
abase=# create index i_t_test_jsonb_j_jsonb_kxhbsl on test_jsonb using gin((j_jsonb->\'kxhbsl\'));
CREATE INDEX
abase=#  explain analyze select j_jsonb->>\'kxhbsl\',j_jsonb from test_jsonb where j_jsonb -> \'kxhbsl\' ? \'索尼是大法官\';
                                                                  QUERY PLAN                                                                
-------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on test_jsonb  (cost=39.58..6649.12 rows=2010 width=134) (actual time=2.166..13.999 rows=10
000 loops=1)
   Recheck Cond: ((j_jsonb -> \'kxhbsl\'::text) ? \'索尼是大法官\'::text)
   Heap Blocks: exact=481
   ->  Bitmap Index Scan on i_t_test_jsonb_j_jsonb_kxhbsl  (cost=0.00..39.08 rows=2010 width=0) (actual time=
2.045..2.045 rows=10000 loops=1)
         Index Cond: ((j_jsonb -> \'kxhbsl\'::text) ? \'索尼是大法官\'::text)
 Planning time: 0.221 ms
 Execution time: 14.715 ms
(7 rows)
--或者等价写法：
abase=# explain analyze select j_jsonb->>\'kxhbsl\',j_jsonb from test_jsonb where j_jsonb -> \'kxhbsl\' @>\'"索尼是大法官"\';
                                                                  QUERY PLAN                                                             
-------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on test_jsonb  (cost=39.58..6649.12 rows=2010 width=134) (actual time=2.080..14.959 rows=10
000 loops=1)
   Recheck Cond: ((j_jsonb -> \'kxhbsl\'::text) @> \'"索尼是大法官"\'::jsonb)
   Heap Blocks: exact=481
   ->  Bitmap Index Scan on i_t_test_jsonb_j_jsonb_kxhbsl  (cost=0.00..39.08 rows=2010 width=0) (actual time=
1.980..1.980 rows=10000 loops=1)
         Index Cond: ((j_jsonb -> \'kxhbsl\'::text) @> \'"索尼是大法官"\'::jsonb)
 Planning time: 0.199 ms
 Execution time: 15.635 ms
(7 rows)

--5.第三种查询，获取\'{"kxhbsl": "索尼是大法官"}\'，全表扫描
abase=# explain analyze select * from test_jsonb where j_jsonb->>\'kxhbsl\' = \'索尼是大法官\';
                                                            QUERY PLAN                                                             
-------------------------------------------------------------------------------------------------------------
 Gather  (cost=1000.00..56272.50 rows=10050 width=135) (actual time=458.676..476.454 rows=10000 loops=1)
   Workers Planned: 2
   Workers Launched: 2
   ->  Parallel Seq Scan on test_jsonb  (cost=0.00..54267.50 rows=4188 width=135) (actual time=453.472..466.5
44 rows=3333 loops=3)
         Filter: ((j_jsonb ->> \'kxhbsl\'::text) = \'索尼是大法官\'::text)
         Rows Removed by Filter: 666667
 Planning time: 0.821 ms
 Execution time: 492.763 ms
(8 rows)
--针对这类查询，j_jsonb->>\'kxhbsl\'返回类型为text，那么可以考虑创建一个btree索引，也可以走索引
abase=# create index i_test_jsonb_j_jsonb_btree on test_jsonb using btree((j_jsonb ->> \'kxhbsl\') );
CREATE INDEX
abase=# explain analyze select * from test_jsonb where j_jsonb->>\'kxhbsl\' = \'索尼是大法官\';
                                                                 QUERY PLAN                                                           
-------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on test_jsonb  (cost=498.44..24049.15 rows=10050 width=135) (actual time=4.150..8.168 rows=
10000 loops=1)
   Recheck Cond: ((j_jsonb ->> \'kxhbsl\'::text) = \'索尼是大法官\'::text)
   Heap Blocks: exact=481
   ->  Bitmap Index Scan on i_test_jsonb_j_jsonb_btree  (cost=0.00..495.93 rows=10050 width=0) (actual time=4
.042..4.042 rows=10000 loops=1)
         Index Cond: ((j_jsonb ->> \'kxhbsl\'::text) = \'索尼是大法官\'::text)
 Planning time: 0.684 ms
 Execution time: 8.991 ms
(7 rows)


--6.由于j_jsonb->>\'kxhbsl\'返回为text类型，所以可在其上面做许多操作，比如in，exists等
--查看执行计划，in查询：
abase=# explain analyze select * from test_jsonb where j_jsonb->>\'kxhbsl\' in (\'索尼是大法官\',\'3\');
                                                                 QUERY PLAN                                                             
-------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on test_jsonb  (cost=992.88..35800.76 rows=20100 width=135) (actual time=2.666..5.992 rows=
10000 loops=1)
   Recheck Cond: ((j_jsonb ->> \'kxhbsl\'::text) = ANY (\'{索尼是大法官,3}\'::text[]))
   Heap Blocks: exact=481
   ->  Bitmap Index Scan on i_test_jsonb_j_jsonb_btree  (cost=0.00..987.86 rows=20100 width=0) (actual time=2
.576..2.576 rows=10000 loops=1)
         Index Cond: ((j_jsonb ->> \'kxhbsl\'::text) = ANY (\'{索尼是大法官,3}\'::text[]))
 Planning time: 0.360 ms
 Execution time: 6.856 ms
(7 rows)

三种查询都能得到相同的结果，可以看出第一种针对于jsonb字段的gin索引，适用于jsonb字段所有的元素，而第二种和第三种分别是对单个元素创建的gin和btree索引。

等值查询方面可能单个元素的btree索引占用空间小，且效率较高，如果单独某个元素的查询较为频繁可选择btree索引，而整个jsonb创建gin对所有元素有效。

第一种传入的是一个json，而第二种，第三种传入的是字符串

jsonb元素值模糊匹配

--1.有时候需要对jsonb的元素值进行模糊匹配
--在前面只有j_jsonb gin索引情况下，like全模糊匹配不能走索引
abase=#  explain  analyze select * from test_jsonb where j_jsonb->>\'kxhbsl\' like \'%大法官%\';
                                                           QUERY PLAN                                                      
-------------------------------------------------------------------------------------------------------------
 Gather  (cost=1000.00..55287.60 rows=201 width=135) (actual time=832.031..857.306 rows=10000 loops=1)
   Workers Planned: 2
   Workers Launched: 2
   ->  Parallel Seq Scan on test_jsonb  (cost=0.00..54267.50 rows=84 width=135) (actual time=826.065..844.494
 rows=3333 loops=3)
         Filter: ((j_jsonb ->> \'kxhbsl\'::text) ~~ \'%大法官%\'::text)
         Rows Removed by Filter: 666667
 Planning time: 0.314 ms
 Execution time: 873.938 ms
(8 rows)

--由于(j_jsonb ->>\'kxhbsl\')返回的是text类型，所以考虑再其上面使用pg_trgm,创建gin索引。
abase=# create index i_test_jsonb_j_jsonb_gin on test_jsonb using gin((j_jsonb ->>\'kxhbsl\') gin_trgm_ops);
CREATE INDEX
--查看执行计划，模糊匹配可走索引。
abase=#  explain  analyze select * from test_jsonb where j_jsonb->>\'kxhbsl\' like \'%大法官%\';
                                                               QUERY PLAN                                                              
-------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on test_jsonb  (cost=17.56..782.71 rows=201 width=135) (actual time=3.781..16.256 rows=1000
0 loops=1)
   Recheck Cond: ((j_jsonb ->> \'kxhbsl\'::text) ~~ \'%大法官%\'::text)
   Heap Blocks: exact=481
   ->  Bitmap Index Scan on i_test_jsonb_j_jsonb_gin  (cost=0.00..17.51 rows=201 width=0) (actual time=3.649.
.3.649 rows=10000 loops=1)
         Index Cond: ((j_jsonb ->> \'kxhbsl\'::text) ~~ \'%大法官%\'::text)
 Planning time: 0.575 ms
 Execution time: 17.514 ms
(7 rows)


--2.当然还有一种方式就是将该jsonb字段转为text，然后再创建gin索引
--创建gin索引
abase=#create index i_jsonb_ops on test_jsonb using gin ((j_jsonb::text) gin_trgm_ops);
CREATE INDEX
--但是这样的模糊匹配，可能匹配到其他元素中包含同样的值，所以需要加上辅助条件：j_jsonb->>\'kxhbsl\' like \'%索尼是大法官%\'，用来确保是该元素
abase=#  explain  analyze select * from test_jsonb where j_jsonb->>\'kxhbsl\' like \'%大法官%\' and j_jsonb ::text like \'%大法官%\';
                                                         QUERY PLAN                                          
              
-------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on test_jsonb  (cost=1297.51..2064.17 rows=1 width=135) (actual time=5.318..28.149 rows=100
00 loops=1)
   Recheck Cond: ((j_jsonb)::text ~~ \'%大法官%\'::text)
   Filter: ((j_jsonb ->> \'kxhbsl\'::text) ~~ \'%大法官%\'::text)
   Heap Blocks: exact=481
   ->  Bitmap Index Scan on i_jsonb_ops  (cost=0.00..1297.51 rows=201 width=0) (actual time=5.198..5.198 rows
=10000 loops=1)
         Index Cond: ((j_jsonb)::text ~~ \'%大法官%\'::text)
 Planning time: 0.479 ms
 Execution time: 29.147 ms
(8 rows)

第二种方法效率相对于第一种要低一点，但是所有元素都可使用

结语

1.在json和jsonb选择上，json更加适合用于存储，jsonb更加适用于检索。

2.可以对整个jsonb字段创建gin索引，同时也可以对jsonb中某个元素创建gin索引，或者btree。btree效率最高。

3.(j_jsonb ->> \'kxhbsl\')返回的是一个text类型，所以可以在该属性上创建对应类型的索引，比如btree，gin索引。

4.对于元素值的模糊匹配可以创建单个元素的gin索引，也可以创建整个jsonb字段的gin索引，前者效率较高，后者适用所有元素。