【问题标题】:SQLSERVER - Import and parsing special XML coming from wordpressSQLSERVER - 导入和解析来自 wordpress 的特殊 XML
【发布时间】:2017-06-01 08:52:19
【问题描述】:

我已经编写了一个脚本来加载一个 xml 文件并对其进行解析。

这是脚本和部分 xml 文件作为示例:

SQL 脚本

DECLARE @XML AS XML
DECLARE @hDoc AS INT

CREATE TABLE #XMLwithOpenXML
(
Id INT IDENTITY PRIMARY KEY,
XMLData XML,
LoadedDateTime DATETIME
)

INSERT INTO #XMLwithOpenXML(XMLData, LoadedDateTime)
SELECT CONVERT(XML, BulkColumn) AS BulkColumn, GETDATE() 
FROM OPENROWSET(BULK 'C:\temp\wordpress.2017-05-22.xml', SINGLE_BLOB) AS x;

SELECT @XML = XMLData FROM #XMLwithOpenXML

EXEC sp_xml_preparedocument @hDoc OUTPUT, @XML

SELECT *
FROM OPENXML(@hDoc, 'rss/channel/item')
WITH 
    (       
        Product [VARCHAR](50) 'title',
        Category [VARCHAR](50) 'category'
    )

EXEC sp_xml_removedocument @hDoc

DROP TABLE #XMLwithOpenXML

XML 文件(部分)

<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0"
    xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
    xmlns:content="http://purl.org/rss/1.0/modules/content/"
    xmlns:wfw="http://wellformedweb.org/CommentAPI/"
    xmlns:dc="http://purl.org/dc/elements/1.1/"
    xmlns:wp="http://wordpress.org/export/1.2/"
>
<channel>
    <item>
        <title>Bancone Tetris</title>
        <category domain="categorie-prodotti" nicename="arredi-light"><![CDATA[Arredi light]]></category>
        <category domain="categorie-outlet" nicename="arredi-light"><![CDATA[arredi light]]></category>
        <category domain="categorie-prodotti" nicename="banconi"><![CDATA[Banconi]]></category>
        <category domain="categorie-outlet" nicename="banconi"><![CDATA[Banconi]]></category>
        <category domain="categorie-outlet" nicename="luci"><![CDATA[luci]]></category>
        <wp:postmeta>
            <wp:meta_key><![CDATA[wpcf-codice]]></wp:meta_key>
            <wp:meta_value><![CDATA[cod.05-008]]></wp:meta_value>
        </wp:postmeta>
        <wp:postmeta>
            <wp:meta_key><![CDATA[wpcf-size]]></wp:meta_key>
            <wp:meta_value><![CDATA[145x80xh110 cm]]></wp:meta_value>
        </wp:postmeta>
</item>
<item>
... SAME STRUCTURE AS ABOVE
</item>
</channel>
</rss>

我遇到的问题

  1. 我需要列出所有产品和所有相关的类别,此时脚本返回所有只有第一个类别的产品.
  2. 我不知道如何返回每个产品的所有 postmeta(键、值)列表...

希望清楚, 感谢支持!

【问题讨论】:

    标签: sql-server xml tsql xml-parsing


    【解决方案1】:

    我对 OPENXML 生疏了,但你不需要它。让我们从一些更新的示例数据开始:

    INSERT INTO #XMLwithOpenXML(XMLData, LoadedDateTime)
    SELECT 
    '<?xml version="1.0" encoding="UTF-8" ?>
    <rss version="2.0"
        xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
        xmlns:content="http://purl.org/rss/1.0/modules/content/"
        xmlns:wfw="http://wellformedweb.org/CommentAPI/"
        xmlns:dc="http://purl.org/dc/elements/1.1/"
        xmlns:wp="http://wordpress.org/export/1.2/"
    >
    <channel>
        <item>
            <title>Bancone Tetris</title>
            <category domain="categorie-prodotti" nicename="arredi-light"><![CDATA[Arredi light]]></category>
            <category domain="categorie-outlet" nicename="arredi-light"><![CDATA[arredi light]]></category>
            <category domain="categorie-prodotti" nicename="banconi"><![CDATA[Banconi]]></category>
            <category domain="categorie-outlet" nicename="banconi"><![CDATA[Banconi]]></category>
            <category domain="categorie-outlet" nicename="luci"><![CDATA[luci]]></category>
            <wp:postmeta>
                <wp:meta_key><![CDATA[wpcf-codice]]></wp:meta_key>
                <wp:meta_value><![CDATA[cod.05-008]]></wp:meta_value>
            </wp:postmeta>
            <wp:postmeta>
                <wp:meta_key><![CDATA[wpcf-size]]></wp:meta_key>
                <wp:meta_value><![CDATA[145x80xh110 cm]]></wp:meta_value>
            </wp:postmeta>
        </item>
        <item>
            <title>Bancone Tetris Part2</title>
            <category domain="categorie-outlet" nicename="banconi"><![CDATA[Banconi]]></category>
            <category domain="categorie-outlet" nicename="luci"><![CDATA[luci]]></category>
            <wp:postmeta>
                <wp:meta_key><![CDATA[wpcf-codice]]></wp:meta_key>
                <wp:meta_value><![CDATA[cod.05-008]]></wp:meta_value>
            </wp:postmeta>
            <wp:postmeta>
                <wp:meta_key><![CDATA[wpcf-size]]></wp:meta_key>
                <wp:meta_value><![CDATA[145x80xh110 cm]]></wp:meta_value>
            </wp:postmeta>
        </item>
    </channel>
    </rss>' AS BulkColumn, GETDATE();
    

    此查询将为您提供所有产品和相关类别:

    SELECT 
      product  = item.value('(title/text())[1]', 'varchar(1000)'),
      category = category.value('(text())[1]', 'varchar(1000)')
    FROM #XMLwithOpenXML x
    CROSS APPLY x.XMLData.nodes('rss/channel/item') n1(item)
    CROSS APPLY item.nodes('category')   n2(category);
    

    结果

    product                    category
    -------------------------- --------------
    Bancone Tetris             Arredi light
    Bancone Tetris             arredi light
    Bancone Tetris             Banconi
    Bancone Tetris             Banconi
    Bancone Tetris             luci
    Bancone Tetris Part2       Banconi
    Bancone Tetris Part2       luci
    

    对于您的 wp:postmeta 内容,您需要参考 wp 命名空间。以下是三种方法(注意我的 cmets):

    -- Option #1: USE "*:" for "all namespaces (MY FAVORITE)
    SELECT 
      product  = item.value('(title/text())[1]', 'varchar(1000)'),
      category = category.value('(text())[1]', 'varchar(1000)'),
      meta_key = postmeta.value('(*:meta_key/text())[1]', 'varchar(1000)'),
      meta_val = postmeta.value('(*:meta_value/text())[1]', 'varchar(1000)')
    FROM #XMLwithOpenXML x
    CROSS APPLY x.XMLData.nodes('rss/channel/item') n1(item)
    CROSS APPLY item.nodes('category')   n2(category)
    CROSS APPLY item.nodes('*:postmeta') n3(postmeta);
    
    -- Option #2: USE "WITH XMLNAMESPACES"
    WITH XMLNAMESPACES ('http://wordpress.org/export/1.2/' as wp) 
    SELECT 
      product  = item.value('(title/text())[1]', 'varchar(1000)'),
      category = category.value('(text())[1]', 'varchar(1000)'),
      meta_key = postmeta.value('(wp:meta_key/text())[1]', 'varchar(1000)'),
      meta_val = postmeta.value('(wp:meta_value/text())[1]', 'varchar(1000)')
    FROM #XMLwithOpenXML x
    CROSS APPLY x.XMLData.nodes('rss/channel/item') n1(item)
    CROSS APPLY item.nodes('category')   n2(category)
    CROSS APPLY item.nodes('wp:postmeta') n3(postmeta);
    
    -- Option #3: Inline Namespace declaration
    SELECT 
      product  = item.value('(title/text())[1]', 'varchar(1000)'),
      category = category.value('(text())[1]', 'varchar(1000)'),
      meta_key = postmeta.value('declare namespace wp="http://wordpress.org/export/1.2/"; (wp:meta_key/text())[1]', 'varchar(1000)'),
      meta_val = postmeta.value('declare namespace wp="http://wordpress.org/export/1.2/"; (wp:meta_value/text())[1]', 'varchar(1000)')
    FROM #XMLwithOpenXML x
    CROSS APPLY x.XMLData.nodes('rss/channel/item') n1(item)
    CROSS APPLY item.nodes('category')   n2(category)
    CROSS APPLY item.nodes('declare namespace wp="http://wordpress.org/export/1.2/"; wp:postmeta') n3(postmeta);
    

    所有三个解决方案都返回:

    product              category             meta_key             meta_val
    -------------------- -------------------- -------------------- --------------------
    Bancone Tetris       Arredi light         wpcf-codice          cod.05-008
    Bancone Tetris       Arredi light         wpcf-size            145x80xh110 cm
    Bancone Tetris       arredi light         wpcf-codice          cod.05-008
    Bancone Tetris       arredi light         wpcf-size            145x80xh110 cm
    Bancone Tetris       Banconi              wpcf-codice          cod.05-008
    Bancone Tetris       Banconi              wpcf-size            145x80xh110 cm
    Bancone Tetris       Banconi              wpcf-codice          cod.05-008
    Bancone Tetris       Banconi              wpcf-size            145x80xh110 cm
    Bancone Tetris       luci                 wpcf-codice          cod.05-008
    Bancone Tetris       luci                 wpcf-size            145x80xh110 cm
    Bancone Tetris Part2 Banconi              wpcf-codice          cod.05-008
    Bancone Tetris Part2 Banconi              wpcf-size            145x80xh110 cm
    Bancone Tetris Part2 luci                 wpcf-codice          cod.05-008
    Bancone Tetris Part2 luci                 wpcf-size            145x80xh110 cm
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2011-08-21
      • 1970-01-01
      相关资源
      最近更新 更多