【问题标题】:How to handle empty array-type scalar in tde template如何处理 tde 模板中的空数组类型标量
【发布时间】:2022-01-21 11:32:26
【问题描述】:

我需要使用 tde 模板处理一些有时缺少数据的文档。但仍需要在表格中显示。但是我的模板在返回带有空数据的文档时遇到问题。

 var doc1 = xdmp.toJSON(
    {
          "customer":{
            "Name": "Name1", 
            "Addr": [
              {
                "AddrTypeCd": "MailingAddress", 
                "Addr1": "911 FORBES AVE", 
                "Addr2": "SUITE XXX", 
                "CityName": "asdfasfd", 
                "StateProvCd": "PA", 
                "PostalCode": "15219"
              }, 
              {
                "AddrTypeCd": "OfficeAddress", 
                "Addr1": "911 Watson AVE", 
                "Addr2": "SUITE XXX", 
                "CityName": "asdfasfd", 
                "StateProvCd": "CT", 
                "PostalCode": "15119"
              }
            ]
          }
        });

    var doc2 = xdmp.toJSON(
        {
          "customer":{
            "Name": "Name2", 
            "Addr": [
            ]
          }
        });

  var rowtde1 = xdmp.toJSON(
  {
    "template":{
      "context":"/customer/Addr",
      "rows":[
        {
          "schemaName":"Schemas",
          "viewName":"CustomerAddress",
          "columns":[
            {
              "name":"CustomerName",
              "scalarType":"string",
              "val":"../../Name"
            },
            {
              "name":"AddrTypeCd",
              "scalarType":"string",
              "val":"AddrTypeCd",
              "nullable": true
            },
            {
              "name":"Addr1",
              "scalarType":"string",
              "val":"Addr1",
              "nullable": true
            },
            {
              "name":"Addr2",
              "scalarType":"string",
              "val":"Addr2",
              "nullable": true
            },
            {
              "name":"CityName",
              "scalarType":"string",
              "val":"CityName",
              "nullable": true
            },
            {
              "name":"StateProvCd",
              "scalarType":"string",
              "val":"StateProvCd",
              "nullable": true
            },
            {
              "name":"PostalCode",
              "scalarType":"string",
              "val":"PostalCode",
              "nullable": true
            }
          ]
        }
      ]
    }
  }
  );



tde.nodeDataExtract([doc1, doc2],[rowtde1]);

对于此数据提取,响应为:

{
  "document1": [
    {
      "row": {
        "schema": "Schemas",
        "view": "CustomerAddress",
        "data": {
          "rownum": "1",
          "CustomerName": "Name1",
          "AddrTypeCd": "MailingAddress",
          "Addr1": "911 FORBES AVE",
          "Addr2": "SUITE XXX",
          "CityName": "asdfasfd",
          "StateProvCd": "PA",
          "PostalCode": "15219"
        }
      }
    },
    {
      "row": {
        "schema": "Schemas",
        "view": "CustomerAddress",
        "data": {
          "rownum": "2",
          "CustomerName": "Name1",
          "AddrTypeCd": "OfficeAddress",
          "Addr1": "911 Watson AVE",
          "Addr2": "SUITE XXX",
          "CityName": "asdfasfd",
          "StateProvCd": "CT",
          "PostalCode": "15119"
        }
      }
    }
  ],
  "document2": []
}

但我也需要为没有地址的客户获取响应数据,如下所示:

{   "document1": [
    {
      "row": {
        "schema": "Schemas",
        "view": "CustomerAddress",
        "data": {
          "rownum": "1",
          "CustomerName": "Name1",
          "AddrTypeCd": "MailingAddress",
          "Addr1": "911 FORBES AVE",
          "Addr2": "SUITE XXX",
          "CityName": "asdfasfd",
          "StateProvCd": "PA",
          "PostalCode": "15219"
        }
      }
    },
    {
      "row": {
        "schema": "Schemas",
        "view": "CustomerAddress",
        "data": {
          "rownum": "2",
          "CustomerName": "Name1",
          "AddrTypeCd": "OfficeAddress",
          "Addr1": "911 Watson AVE",
          "Addr2": "SUITE XXX",
          "CityName": "asdfasfd",
          "StateProvCd": "CT",
          "PostalCode": "15119"
        }
      }
    }   ],   "document2": [    {
      "row": {
        "schema": "Schemas",
        "view": "CustomerAddress",
        "data": {
          "rownum": "1",
          "CustomerName": "Name2"
        }
      }
    }  ] }

我是否需要创建第二个视图来处理文档中的名称,然后将其与地址视图相结合?多个联接是否会对性能产生影响?

【问题讨论】:

    标签: marklogic marklogic-9 marklogic-10


    【解决方案1】:

    通过一点点 xPath 魔法和一些 TDE 模板设置,您实际上可以使用嵌套模板和稀疏定义来做到这一点。

    主模板

    • 无地址客户的模板
    • 带有地址的客户模板

    请注意:这适用于系统中文档的/ 上下文。但是,为此,您需要一个目录或集合范围。这不能在线欺骗,所以我添加了根元素以允许它在不保存文档的情况下作为示例工作。只要您还使用目录或集合限定它们,您就可以按定义的方式保存文档(无根)。

     var doc1 = xdmp.toJSON(
    {
        "root" : {
          "customer":{
            "Name": "Name1", 
            "Addr": [
              {
                "AddrTypeCd": "MailingAddress", 
                "Addr1": "911 FORBES AVE", 
                "Addr2": "SUITE XXX", 
                "CityName": "asdfasfd", 
                "StateProvCd": "PA", 
                "PostalCode": "15219"
              }, 
              {
                "AddrTypeCd": "OfficeAddress", 
                "Addr1": "911 Watson AVE", 
                "Addr2": "SUITE XXX", 
                "CityName": "asdfasfd", 
                "StateProvCd": "CT", 
                "PostalCode": "15119"
              }
            ]
          }
        } 
        });
    
    var doc2 = xdmp.toJSON(
      {"root": 
         {
          "customer":{
            "Name": "Name2", 
            "Addr": [
            ]
          }
      }     
        });
    
    var rowtde1 = xdmp.toJSON(
    {
    "template":{
      "context":"/root",
      "templates" : [
        {
          "context":"customer[fn:count(./Addr/*) = 0]/Name",
            "rows":[
              {
                "schemaName":"Schemas",
                "viewLayout": "sparse",
                "viewName":"CustomerAddress",
                "columns":[
                  {
                    "name":"CustomerName",
                    "scalarType":"string",
                    "val":"."
                  }
                ]
              }
            ]
        },
                {
          "context":"customer[fn:count(./Addr/*) > 0]/Addr",
            "rows":[
              {
                "schemaName":"Schemas",
                "viewLayout": "sparse",
                "viewName":"CustomerAddress",
                "columns":[
                  {
                    "name":"CustomerName",
                    "scalarType":"string",
                    "val":"../../Name"
                  },
                  {
                    "name":"AddrTypeCd",
                    "scalarType":"string",
                    "val":"AddrTypeCd",
                    "nullable": true
                  },
                  {
                    "name":"Addr1",
                    "scalarType":"string",
                    "val":"Addr1",
                    "nullable": true
                  },
                  {
                    "name":"Addr2",
                    "scalarType":"string",
                    "val":"Addr2",
                    "nullable": true
                  },
                  {
                    "name":"CityName",
                    "scalarType":"string",
                    "val":"CityName",
                    "nullable": true
                  },
                  {
                    "name":"StateProvCd",
                    "scalarType":"string",
                    "val":"StateProvCd",
                    "nullable": true
                  },
                  {
                    "name":"PostalCode",
                    "scalarType":"string",
                    "val":"PostalCode",
                    "nullable": true
                  }
                ]
              }
            ]
        }
      ]
    }
    }
    );
    tde.nodeDataExtract([doc1, doc2],[rowtde1]);
    

    仅名称行的结果只是单个值。稀疏模板允许这样做。您也可以将整个列列表定义为 null,但我没有看到该值。

    {
    "document1": [{
            "row": {
                "schema": "Schemas",
                "view": "CustomerAddress",
                "data": {
                    "rownum": "1",
                    "CustomerName": "Name1",
                    "AddrTypeCd": "MailingAddress",
                    "Addr1": "911 FORBES AVE",
                    "Addr2": "SUITE XXX",
                    "CityName": "asdfasfd",
                    "StateProvCd": "PA",
                    "PostalCode": "15219"
                }
            }
        },
        {
            "row": {
                "schema": "Schemas",
                "view": "CustomerAddress",
                "data": {
                    "rownum": "2",
                    "CustomerName": "Name1",
                    "AddrTypeCd": "OfficeAddress",
                    "Addr1": "911 Watson AVE",
                    "Addr2": "SUITE XXX",
                    "CityName": "asdfasfd",
                    "StateProvCd": "CT",
                    "PostalCode": "15119"
                }
            }
        }
    ],
    "document2": [{
        "row": {
            "schema": "Schemas",
            "view": "CustomerAddress",
            "data": {
                "rownum": "1",
                "CustomerName": "Name2"
            }
        }
    }]
    }
    

    【讨论】:

    • 加入不一定便宜。在分布式数据库中,它们可能更昂贵。在森林上进行过滤、连接等,然后是节点,然后是本地无法解决的问题,传输到 E 节点。根据连接的不同,这可能会导致大量内容跨节点传输,只是为了解决查询。在像 MarkLogic 这样的数据库中,按照您的意图来构建您的内容。我会尝试尽可能接近相同的 TDE 结构。试图在结构化内容的视图上达到 3NF 将是一个倒退
    【解决方案2】:

    由于有一个项目数组,您应该创建第二个模板来表达这些行。您将看到主要客户的视图和地址的视图。最好有一个唯一的密钥来加入。拥有这两个视图后,您可以使用 SQL 或 Optic API(MarkLogic 的多模型查询库)进行查询和连接。

    {
        "template": {
            "description": "test template",
            "context": "customer",
            "rows": [
                {
                    "schemaName": "Schemas",
                    "viewName": "Customer",
                    "columns": [
                        {
                            "name": "id",
                            "scalarType": "int",
                            "val": "./id"
                        },
                        {
                            "name": "CustomerName",
                            "scalarType": "string",
                            "val": "./Name"
                        }
                    ]
                }
            ],
            "templates": [
                {
                    "context": "./Addr",
                    "rows": [
                        {
                            "schemaName": "Schemas",
                            "viewName": "CustomerAddr",
                            "viewLayout": "sparse",
                            "columns": [
                                {
                                    "name": "customerId",
                                    "scalarType": "int",
                                    "val": "../../id"
                                },
                                {
                                    "name": "AddrTypeCd",
                                    "scalarType": "string",
                                    "val": "AddrTypeCd",
                                    "nullable": true
                                },
                                {
                                    "name": "Addr1",
                                    "scalarType": "string",
                                    "val": "Addr1",
                                    "nullable": true
                                },
                                {
                                    "name": "Addr2",
                                    "scalarType": "string",
                                    "val": "Addr2",
                                    "nullable": true
                                },
                                {
                                    "name": "CityName",
                                    "scalarType": "string",
                                    "val": "CityName",
                                    "nullable": true
                                },
                                {
                                    "name": "StateProvCd",
                                    "scalarType": "string",
                                    "val": "StateProvCd",
                                    "nullable": true
                                },
                                {
                                    "name": "PostalCode",
                                    "scalarType": "string",
                                    "val": "PostalCode",
                                    "nullable": true
                                }
                            ]
                        }
                    ]
                }
            ]
        }
    }
    

    Optic API 查询示例

    'use strict';
    
    const op = require('/MarkLogic/optic')
    
    let customer = op.fromView('Schemas', 'Customer');
    let addr = op.fromView('Schemas', 'CustomerAddr');
    
    customer.joinLeftOuter(addr, op.on(op.col('id'), op.col('customerId'))).limit(10).result()
    

    结果

    {
        "Schemas.Customer.id": 1,
        "Schemas.CustomerAddr.customerId": 1,
        "Schemas.Customer.CustomerName": "Name1",
        "Schemas.CustomerAddr.AddrTypeCd": "MailingAddress",
        "Schemas.CustomerAddr.Addr1": "911 FORBES AVE",
        "Schemas.CustomerAddr.Addr2": "SUITE XXX",
        "Schemas.CustomerAddr.CityName": "asdfasfd",
        "Schemas.CustomerAddr.StateProvCd": "PA",
        "Schemas.CustomerAddr.PostalCode": "15219"
    }
    
    {
        "Schemas.Customer.id": 1,
        "Schemas.CustomerAddr.customerId": 1,
        "Schemas.Customer.CustomerName": "Name1",
        "Schemas.CustomerAddr.AddrTypeCd": "OfficeAddress",
        "Schemas.CustomerAddr.Addr1": "911 Watson AVE",
        "Schemas.CustomerAddr.Addr2": "SUITE XXX",
        "Schemas.CustomerAddr.CityName": "asdfasfd",
        "Schemas.CustomerAddr.StateProvCd": "CT",
        "Schemas.CustomerAddr.PostalCode": "15119"
    } 
    
    {
        "Schemas.Customer.id": 2,
        "Schemas.CustomerAddr.customerId": null,
        "Schemas.Customer.CustomerName": "Name2",
        "Schemas.CustomerAddr.AddrTypeCd": null,
        "Schemas.CustomerAddr.Addr1": null,
        "Schemas.CustomerAddr.Addr2": null,
        "Schemas.CustomerAddr.CityName": null,
        "Schemas.CustomerAddr.StateProvCd": null,
        "Schemas.CustomerAddr.PostalCode": null
    }
    
    {
        "Schemas.Customer.id": 3,
        "Schemas.CustomerAddr.customerId": null,
        "Schemas.Customer.CustomerName": "Name3",
        "Schemas.CustomerAddr.AddrTypeCd": null,
        "Schemas.CustomerAddr.Addr1": null,
        "Schemas.CustomerAddr.Addr2": null,
        "Schemas.CustomerAddr.CityName": null,
        "Schemas.CustomerAddr.StateProvCd": null,
        "Schemas.CustomerAddr.PostalCode": null
    }
    

    【讨论】:

      【解决方案3】:

      如果您没有自然连接键,则可以在两个视图中添加类似下面的内容..

      {
       "name": "joinKey",
       "scalarType": "string",
       "val": "xdmp:node-uri(.) || '#' || fn:position()"
      }
      

      安德鲁的反应仍然正确。需要的是创建两个视图,这就是如何使用 TDE 展平嵌入式阵列。 您也可以使用 SQL 语句进行快速测试,如下所示

      select * from Schemas.Customer cus
      LEFT  JOIN Schemas.CustomerAddr addr ON cus.joinKey = addr.joinKey
      

      select * from Schemas.Customer cus,
      Schemas.CustomerAddr addr 
      where cus.joinKey = addr.joinKey
      

      【讨论】:

        猜你喜欢
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 2011-12-07
        • 1970-01-01
        • 2010-12-22
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        相关资源
        最近更新 更多