【问题标题】:How to import XML files into SQL Table with their column names?如何将 XML 文件及其列名导入 SQL 表?
【发布时间】:2016-10-07 05:06:11
【问题描述】:

我有 12 个 XML 文件,我必须在 SQL Server 的单个表中导入,其中一个如下,请帮助我将这些文件放入带有列名的表格式中。

<?xml version="1.0" encoding="UTF-16"?>
<Data>
<!-- IntelliScan - USC  -->
<BatchNumber>1</BatchNumber>
<ComputerName>PC-XDS1</ComputerName>
<StartTime>2/16/2016 7:44:38 AM</StartTime>
<EndTime>2/16/2016 8:14:47 AM</EndTime>
<IdleTime>672</IdleTime>
<ImagesDroppedFront>0</ImagesDroppedFront>
<ImagesDroppedBack>205</ImagesDroppedBack>
<JobName>Landscape Production NO MONEY</JobName>
<JobNumber>001</JobNumber>
<Operator>abwhite</Operator>
<OutputFolderName>6604773004001</OutputFolderName>
<PagesOperatorReplaced>2</PagesOperatorReplaced>
<PagesOperatorDeleted>1</PagesOperatorDeleted>
<PagesOperatorInserted>0</PagesOperatorInserted>
<ProcessingDate>2/16/2016</ProcessingDate>
<ProfileName> Multis Landscape</ProfileName>
<RecoveredTime></RecoveredTime>
<Status></Status>
<SystemDate>2/16/2016</SystemDate>
<TotalItems>318</TotalItems>
<TotalLevel1>0</TotalLevel1>
<TotalLevel2>64</TotalLevel2>
<TotalLevel3>0</TotalLevel3>
<TotalImages>506</TotalImages>
<TotalFrontIJP>321</TotalFrontIJP>
<TotalBackIJP>0</TotalBackIJP>
<TotalPostIJP>0</TotalPostIJP>
<TransportType>DocuScan</TransportType>
<TotalAutoFeed>298</TotalAutoFeed>
<TotalManualFeed>23</TotalManualFeed>
<TotalFeedOnTime>0</TotalFeedOnTime>
<TotalSpeed1>0</TotalSpeed1>
<TotalSpeed2>0</TotalSpeed2>
<TotalSpeed3>321</TotalSpeed3>
<XPTNumber>73</XPTNumber>
<IR_Marked>0</IR_Marked>
<IR_Reviewed>39</IR_Reviewed>
<Start_DLN>6604773004001</Start_DLN>
<End_DLN>6604773004061</End_DLN>
<DocTypes>
<Doctype ID="400">
<!-- [ DocType400 ]  - DOCSEP -->
<Name>DOCSEP</Name>
<Count>64</Count>
</Doctype>
<Doctype ID="001">
<!-- [ DocType001 ]  - Page -->
<Name>Page</Name>
<Count>254</Count>
</Doctype>
</DocTypes>
<Jams>
<Jam>
<Message>5.10 Document Late To MultiPocket STK1 PKT1 Entrance Sensor</Message>
<Count>3</Count>
<Time>206</Time>
</Jam>
<Jam>
<Message>1.6 Document Late to PostScan IJP Entrance sensor.</Message>
<Count>2</Count>
<Time>13</Time>
</Jam>
<Jam>
<Message>1.4 Document Late to Reader On Doc sensor.</Message>
<Count>3</Count>
<Time>18</Time>
</Jam>
<Jam>
<Message>0.125 Double feed detected. Document held at the handfeed track</Message>
<Count>1</Count>
<Time>16</Time>
</Jam>
<Jam>
<Message>1.5 Document Late to PreScan IJP Entrance sensor.</Message>
<Count>1</Count>
<Time>3</Time>
</Jam>
<Jam>
<Message>3.2 Unexpected Item Detected at Dbl Feed exit sensor.</Message>
<Count>1</Count>
<Time>43</Time>
</Jam>
</Jams>
</Data>

我已经在运行一个查询,但问题是,在 Data/DoctTypes 中有多个 ID,在 Data/Jams/Jam 中有多个 Message、Count、time 列。我已经在表名 XMLFiles 和列名 XMLData 中加载了 xml 文件,现在需要从 xml 文件中检索 xml 文件中的列,并将它们的值放在一个表中。那么如何检索那些多列或数据呢?我的查询如下,

Select  XMLData.value('(/Data/BatchNumber)[1]','int' ) BatchNumber,
    XMLData.value('(/Data/ComputerName)[1]','varchar(50)' ) ComputerName,
    XMLData.value('(/Data/StartTime)[1]','DateTime' ) StartTime,
    XMLData.value('(/Data/EndTime)[1]','DateTime' ) EndTime,
    XMLData.value('(/Data/IdleTime)[1]','int' ) IdleTime,
    XMLData.value('(/Data/ImagesDroppedFront)[1]','int' ) ImagesDroppedFront,
    XMLData.value('(/Data/ImagesDroppedBack)[1]','int' ) ImagesDroppedBack,
    XMLData.value('(/Data/JobName)[1]','varchar(100)' ) JobName,
    XMLData.value('(/Data/JobNumber)[1]','varchar(10)' ) JobNumber,
    XMLData.value('(/Data/Operator)[1]','varchar(50)' ) Operator,
    XMLData.value('(/Data/OutputFolderName)[1]','varchar(20)' ) OutputFolderName,
    XMLData.value('(/Data/PagesOperatorReplaced)[1]','int' ) PagesOperatorReplaced,
    XMLData.value('(/Data/PagesOperatorDeleted)[1]','int' ) PagesOperatorDeleted,
    XMLData.value('(/Data/PagesOperatorInserted)[1]','int' ) PagesOperatorInserted,
    XMLData.value('(/Data/ProcessingDate)[1]','Date' ) ProcessingDate,
    XMLData.value('(/Data/ProfileName)[1]','varchar(50)' ) ProfileName,
    XMLData.value('(/Data/RecoveredTime)[1]','Time' ) RecoveredTime,
    XMLData.value('(/Data/Status)[1]','varchar(50)' ) [Status],
    XMLData.value('(/Data/SystemDate)[1]','Date' ) SystemDate,
    XMLData.value('(/Data/TotalItems)[1]','int' ) TotalItems,
    XMLData.value('(/Data/TotalLevel1)[1]','int' ) TotalLevel1,
    XMLData.value('(/Data/TotalLevel2)[1]','int' ) TotalLevel2,
    XMLData.value('(/Data/TotalLevel3)[1]','int' ) TotalLevel3,
    XMLData.value('(/Data/TotalImages)[1]','int' ) TotalImages,
    XMLData.value('(/Data/TotalFrontIJP)[1]','int' ) TotalFrontIJP,
    XMLData.value('(/Data/TotalBackIJP)[1]','int' ) TotalBackIJP,
    XMLData.value('(/Data/TotalPostIJP)[1]','int' ) TotalPostIJP,
    XMLData.value('(/Data/TransportType)[1]','varchar(50)' ) TransportType,
    XMLData.value('(/Data/TotalAutoFeed)[1]','int' ) TotalAutoFeed,
    XMLData.value('(/Data/TotalManualFeed)[1]','int' ) TotalManualFeed,
    XMLData.value('(/Data/TotalFeedOnTime)[1]','int' ) TotalFeedOnTime,
    XMLData.value('(/Data/TotalSpeed1)[1]','int' ) TotalSpeed1,
    XMLData.value('(/Data/TotalSpeed2)[1]','int' ) TotalSpeed2,
    XMLData.value('(/Data/TotalSpeed3)[1]','int' ) TotalSpeed3,
    XMLData.value('(/Data/XPTNumber)[1]','int' ) XPTNumber,
    XMLData.value('(/Data/IR_Marked)[1]','int' ) IR_Marked,
    XMLData.value('(/Data/IR_Reviewed)[1]','int' ) IR_Reviewed,
    ISNULL(XMLData.value('(/Data/Start_DLN)[1]','varchar(20)'),'' ) Start_DLN,
    ISNULL(XMLData.value('(/Data/End_DLN)[1]','varchar(20)'),'' ) End_DLN,
    XMLData.value('(/Data/DocTypes/Doctype/@ID)[1]','int') ID,
    XMLData.value('(/Data/DocTypes/Doctype/Name)[1]','varchar(50)' ) Name,
    XMLData.value('(/Data/DocTypes/Doctype/Count)[1]','int' ) [Count],
    ISNULL(XMLData.value('(/Data/Jams/Jam/Message)[1]','varchar(1000)'),'') [Message],
    ISNULL(XMLData.value('(/Data/Jams/Jam/Count)[1]','int' ),'') [Count],
    ISNULL(XMLData.value('(/Data/Jams/Jam/Time)[1]','int' ),'') [Time]
    From XMLFiles X

【问题讨论】:

    标签: sql-server xml sql-server-2008 xml-parsing


    【解决方案1】:

    您的 XML 包含具有 1:n 关系的嵌套数据。要将其放入适当设计的数据结构中,需要单独的表。

    使用此代码,您将获得三个带有生成 ID 的派生表来定义它们的关系:

    DECLARE @x XML=
    N'<Data>
      <!-- IntelliScan - USC  -->
      <BatchNumber>1</BatchNumber>
      <ComputerName>PC-XDS1</ComputerName>
      <StartTime>2/16/2016 7:44:38 AM</StartTime>
      <EndTime>2/16/2016 8:14:47 AM</EndTime>
      <IdleTime>672</IdleTime>
      <ImagesDroppedFront>0</ImagesDroppedFront>
      <ImagesDroppedBack>205</ImagesDroppedBack>
      <JobName>Landscape Production NO MONEY</JobName>
      <JobNumber>001</JobNumber>
      <Operator>abwhite</Operator>
      <OutputFolderName>6604773004001</OutputFolderName>
      <PagesOperatorReplaced>2</PagesOperatorReplaced>
      <PagesOperatorDeleted>1</PagesOperatorDeleted>
      <PagesOperatorInserted>0</PagesOperatorInserted>
      <ProcessingDate>2/16/2016</ProcessingDate>
      <ProfileName> Multis Landscape</ProfileName>
      <RecoveredTime />
      <Status />
      <SystemDate>2/16/2016</SystemDate>
      <TotalItems>318</TotalItems>
      <TotalLevel1>0</TotalLevel1>
      <TotalLevel2>64</TotalLevel2>
      <TotalLevel3>0</TotalLevel3>
      <TotalImages>506</TotalImages>
      <TotalFrontIJP>321</TotalFrontIJP>
      <TotalBackIJP>0</TotalBackIJP>
      <TotalPostIJP>0</TotalPostIJP>
      <TransportType>DocuScan</TransportType>
      <TotalAutoFeed>298</TotalAutoFeed>
      <TotalManualFeed>23</TotalManualFeed>
      <TotalFeedOnTime>0</TotalFeedOnTime>
      <TotalSpeed1>0</TotalSpeed1>
      <TotalSpeed2>0</TotalSpeed2>
      <TotalSpeed3>321</TotalSpeed3>
      <XPTNumber>73</XPTNumber>
      <IR_Marked>0</IR_Marked>
      <IR_Reviewed>39</IR_Reviewed>
      <Start_DLN>6604773004001</Start_DLN>
      <End_DLN>6604773004061</End_DLN>
      <DocTypes>
        <Doctype ID="400">
          <!-- [ DocType400 ]  - DOCSEP -->
          <Name>DOCSEP</Name>
          <Count>64</Count>
        </Doctype>
        <Doctype ID="001">
          <!-- [ DocType001 ]  - Page -->
          <Name>Page</Name>
          <Count>254</Count>
        </Doctype>
      </DocTypes>
      <Jams>
        <Jam>
          <Message>5.10 Document Late To MultiPocket STK1 PKT1 Entrance Sensor</Message>
          <Count>3</Count>
          <Time>206</Time>
        </Jam>
        <Jam>
          <Message>1.6 Document Late to PostScan IJP Entrance sensor.</Message>
          <Count>2</Count>
          <Time>13</Time>
        </Jam>
        <Jam>
          <Message>1.4 Document Late to Reader On Doc sensor.</Message>
          <Count>3</Count>
          <Time>18</Time>
        </Jam>
        <Jam>
          <Message>0.125 Double feed detected. Document held at the handfeed track</Message>
          <Count>1</Count>
          <Time>16</Time>
        </Jam>
        <Jam>
          <Message>1.5 Document Late to PreScan IJP Entrance sensor.</Message>
          <Count>1</Count>
          <Time>3</Time>
        </Jam>
        <Jam>
          <Message>3.2 Unexpected Item Detected at Dbl Feed exit sensor.</Message>
          <Count>1</Count>
          <Time>43</Time>
        </Jam>
      </Jams>
    </Data>';
    

    --从这里开始查询:

    WITH DataNode AS
    (
        SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS Data_ID
              ,Data.value('BatchNumber[1]','int') AS BatchNumber
              ,Data.value('ComputerName[1]','nvarchar(max)') AS ComputerName
              --add all columns of the first level below Data
              ,Data.query('DocTypes') AS Doctype
              ,Data.query('Jams') AS Jam
        FROM @x.nodes('Data') AS A(Data)
    )
    ,Doctypes AS
    (
        SELECT DataNode.Data_ID
              ,ROW_NUMBER() OVER(PARTITION BY Data_ID ORDER BY (SELECT NULL)) AS Doctype_ID
              ,dt.value('@ID','int') AS DoctypeID
              ,dt.value('Name[1]','nvarchar(max)') AS DoctypeName
              ,dt.value('Count[1]','int') AS DoctypeCount
        FROM DataNode
        CROSS APPLY Doctype.nodes('DocTypes/Doctype') AS A(dt)
    )
    ,Jams AS
    (
        SELECT DataNode.Data_ID
              ,ROW_NUMBER() OVER(PARTITION BY Data_ID ORDER BY (SELECT NULL)) AS Jam_ID
              ,dt.value('Message[1]','nvarchar(max)') AS JamMessage
              ,dt.value('Count[1]','int') AS JamCount
              ,dt.value('Time[1]','int') AS JamTime
        FROM DataNode
        CROSS APPLY Jam.nodes('Jams/Jam') AS A(dt)
    )
    SELECT dn.*
          --add all Data columns here
          ,dt.Doctype_ID
          ,dt.DoctypeCount
          ,dt.DoctypeID
          ,dt.DoctypeName
          ,jm.Jam_ID
          ,jm.JamCount
          ,jm.JamMessage
          ,jm.JamTime
    INTO #tmpResult
    FROM DataNode AS dn
    INNER JOIN DocTypes AS dt ON dt.Data_ID=dn.Data_ID
    INNER JOIN Jams AS jm ON dt.Data_ID=jm.Data_ID;
    
    SELECT DISTINCT Data_ID,BatchNumber,ComputerName INTO #MyDataTable FROM #tmpResult;
    SELECT DISTINCT Data_ID,Doctype_ID,DoctypeID,DoctypeName,DoctypeCount INTO #MyDoctypeTable FROM #tmpResult;
    SELECT DISTINCT Data_ID,Jam_ID,JamCount,JamMessage,JamTime INTO #MyJamTable FROM #tmpResult;
    GO
    
    SELECT * FROM #MyDataTable;
    SELECT * FROM #MyDoctypeTable;
    SELECT * FROM #MyJamTable;
    
    GO
    DROP TABLE #MyDataTable;
    DROP TABLE #MyDoctypeTable;
    DROP TABLE #MyJamTable;
    DROP TABLE #tmpResult;
    

    【讨论】:

    • 谢谢Shnugo,它的工作..但我无法将所有12个Xml文件加载到一个表中..你能帮我吗?
    • 在我的第二个 XML 文件中,没有 Jam 标签和消息,所以无法得到输出,我们怎样才能得到这样的结果?还有一些没有 DoctypeID 和 Jams 的 xml,请建议一些 Query,@Shnugo。
    • @Ruhaan 请用减少的样本和清晰的描述开始一个新问题。在这里添加问题的链接,我很快就会在那里弹出。
    • 是的,发布了新问题。请提出一些解决方案。
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 2019-09-23
    • 1970-01-01
    • 2019-06-03
    • 1970-01-01
    • 2015-03-12
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多