【问题标题】:XML to import data frameXML 导入数据框
【发布时间】:2021-10-17 17:05:52
【问题描述】:

这是一个xml文件,需要在python中解析转换成数据框,保存为csv

    <?xml version="1.0" encoding="UTF-8"?>
   <m2:M006 xmlns:m2="http://www.mym2.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mym2.com http://www.mym2.com/schemas/M2RequestSchema.xsd"><M001><tkn /><m2xmlReleaseNo>020</m2xmlReleaseNo><mvmtHdr mvmtTp="CRT">
       <cmt>Reduction of nominal</cmt>
     </mvmtHdr><mvmtSrc>
       <ctlr>SAR</ctlr>
       <ctlrMvmtRef>464082138</ctlrMvmtRef>
       <srcRecType>Cash Transaction</srcRecType>
       <srcTransType>Reduction of Nominal</srcTransType>
     </mvmtSrc><extSys>
       <extSysID>SAR</extSysID>
       <extSysRef>464082138</extSysRef>
     </extSys><hldCpt1 amtScheme="m2">
       <hldId>
         <ctlrOwnr>6.00595-6</ctlrOwnr>
         <ctlrAcct>6005956.6000</ctlrAcct>
         <ownrRefCcy>CHF</ownrRefCcy>
         <ccy>CHF</ccy>
         <asst>CH0013841017</asst>
         <asstIDs>
           <asstRef refType="ISIN">CH0013841017</asstRef>
           <asstRef refType="Valor">1384101</asstRef>
           <asstRef refType="SARA">000000426547</asstRef>
         </asstIDs>
         <asstDetail>
           <isin>CH0013841017</isin>
           <type>Shares</type>
           <cntry>CH/Switzerland</cntry>
           <desc>Lonza Group AG Nam.</desc>
           <issueCcy>CHF</issueCcy>
           <latestPrc ccy="CHF">126.50</latestPrc>
           <latestPrcDate>2015-04-14</latestPrcDate>
         </asstDetail>
       </hldId>
       <effDt>2015-04-14</effDt>
       <settleDt>2015-04-14</settleDt>
       <qty>5000</qty>
       <prc ccy="CHF">2.5</prc>
       <grTrdAmt ccy="CHF">-12500</grTrdAmt>
       <grAmt ccy="CHF">-12500</grAmt>
       <netAmt ccy="CHF">-12500</netAmt>
     </hldCpt1><hldCpt2 amtScheme="m2">
       <hldId>
         <ctlrOwnr>6.00595-6</ctlrOwnr>
         <ctlrAcct>6005956.4000/CHF/KK</ctlrAcct>
         <ccy>CHF</ccy>
         <asst>CHF</asst>
         <asstIDs>
           <asstRef refType="CURRENCY">CHF</asstRef>
         </asstIDs>
       </hldId>
       <effDt>2015-04-14</effDt>
       <grAmt ccy="CHF">12500</grAmt>
       <netAmt ccy="CHF">12500</netAmt>
     </hldCpt2></M001><M001><tkn /><m2xmlReleaseNo>020</m2xmlReleaseNo><mvmtHdr mvmtTp="CCL">
       <cmt>Variation Margin</cmt>
     </mvmtHdr><mvmtSrc>
       <ctlr>SAR</ctlr>
       <ctlrMvmtRef>464068243</ctlrMvmtRef>
       <srcRecType>Cash Transaction</srcRecType>
       <srcTransType>Variation Margin</srcTransType>
     </mvmtSrc><extSys>
       <extSysID>SAR</extSysID>
       <extSysRef>464068243</extSysRef>
     </extSys><hldCpt1 amtScheme="m2">
       <hldId>
         <ctlrOwnr>6.00767-3</ctlrOwnr>
         <ctlrAcct>6007673.6000</ctlrAcct>
         <ownrRefCcy>EUR</ownrRefCcy>
         <ccy>EUR</ccy>
         <asst>73896793</asst>
         <asstIDs>
           <asstRef refType="Valor">73896793</asstRef>
           <asstRef refType="SARA">000125009930</asstRef>
         </asstIDs>
         <asstDetail>
           <type>Future</type>
           <cntry>DE/Germany</cntry>
           <desc>FGBL - BUND FUTURES Jun15 EUX</desc>
           <issueCcy>EUR</issueCcy>
           <latestPrc ccy="EUR">159.54</latestPrc>
           <latestPrcDate>2015-04-14</latestPrcDate>
           <asstDerivDetails>
             <contractSize>100000</contractSize>
             <expDate>2015-06-08</expDate>
             <underlyingAsstIDs>
               <asstRef refType="Valor">0000004209</asstRef>
             </underlyingAsstIDs>
           </asstDerivDetails>
         </asstDetail>
       </hldId>
       <effDt>2015-04-14</effDt>
       <settleDt>2015-04-14</settleDt>
       <qty>-10</qty>
       <prc ccy="EUR">989898</prc>
       <grTrdAmt ccy="EUR">100</grTrdAmt>
       <grAmt ccy="EUR">100</grAmt>
       <netAmt ccy="EUR">100</netAmt>
     </hldCpt1><hldCpt2 amtScheme="m2">
       <hldId>
         <ctlrOwnr>6.00767-3</ctlrOwnr>
         <ctlrAcct>6007673.4004/EUR/KK 'Variation Margin'</ctlrAcct>
         <ccy>EUR</ccy>
         <asst>EUR</asst>
         <asstIDs>
           <asstRef refType="CURRENCY">EUR</asstRef>
         </asstIDs>
       </hldId>
       <effDt>2015-04-14</effDt>
       <grAmt ccy="EUR">-100</grAmt>
       <netAmt ccy="EUR">-100</netAmt>
     </hldCpt2></M001><M001><tkn /><m2xmlReleaseNo>020</m2xmlReleaseNo><mvmtHdr mvmtTp="AOU">
       <cmt>Variation Margin</cmt>
     </mvmtHdr><mvmtSrc>
       <ctlr>SAR</ctlr>
       <ctlrMvmtRef>464068485</ctlrMvmtRef>
       <srcRecType>Cash Transaction</srcRecType>
       <srcTransType>Variation Margin</srcTransType>
     </mvmtSrc><extSys>
       <extSysID>SAR</extSysID>
       <extSysRef>464068485</extSysRef>
     </extSys><hldCpt1 amtScheme="m2">
       <hldId>
         <ctlrOwnr>6.01279-4</ctlrOwnr>
         <ctlrAcct>6012794.6000</ctlrAcct>
         <ownrRefCcy>EUR</ownrRefCcy>
         <ccy>CHF</ccy>
         <asst>74040383</asst>
         <asstIDs>
           <asstRef refType="Valor">74040383</asstRef>
           <asstRef refType="SARA">000125132191</asstRef>
         </asstIDs>
         <asstDetail>
           <type>Future</type>
           <cntry>CH/Switzerland</cntry>
           <desc>FSMI FUTURES Jun15 EUX</desc>
           <issueCcy>CHF</issueCcy>
           <latestPrc ccy="CHF">9288.00</latestPrc>
           <latestPrcDate>2015-04-14</latestPrcDate>
           <asstDerivDetails>
             <contractSize>10</contractSize>
             <expDate>2015-06-19</expDate>
             <underlyingAsstIDs>
               <asstRef refType="Valor">0000004208</asstRef>
             </underlyingAsstIDs>
           </asstDerivDetails>
         </asstDetail>
       </hldId>
       <effDt>2015-04-14</effDt>
       <settleDt>2015-04-14</settleDt>
       <qty>-14</qty>
       <prc ccy="CHF">989898</prc>
       <grTrdAmt ccy="CHF">-6440</grTrdAmt>
       <grAmt ccy="CHF">-6440</grAmt>
       <netAmt ccy="CHF">-6440</netAmt>
     </hldCpt1><hldCpt2 amtScheme="m2">
       <hldId>
         <ctlrOwnr>6.01279-4</ctlrOwnr>
         <ctlrAcct>6012794.4019/CHF/KK 'Variation Margin'</ctlrAcct>
         <ccy>CHF</ccy>
         <asst>CHF</asst>
         <asstIDs>
           <asstRef refType="CURRENCY">CHF</asstRef>
         </asstIDs>
       </hldId>
       <effDt>2015-04-14</effDt>
       <grAmt ccy="CHF">6440</grAmt>
       <netAmt ccy="CHF">6440</netAmt>
     </hldCpt2></M001></m2:M006>

要将其转换为数据框,我编写了以下代码:

import pandas as pd
    import xml.etree.ElementTree as et
def parse_XML(xml_file, df_cols): 
    xtree = et.parse(xml_file)
    xroot = xtree.getroot()
    rows = []
    
    for node in xroot: 
        res = []
        res.append(node.attrib.get(df_cols[0]))
        for el in df_cols[1:]: 
            if node is not None and node.find(el) is not None:
                res.append(node.find(el).text)
            else: 
                res.append(None)
        rows.append({df_cols[i]: res[i] 
                     for i, _ in enumerate(df_cols)})
    
    out_df = pd.DataFrame(rows, columns=df_cols)
        
    return out_df

但是数据框没有加载 错误是我无法获取标题下的值 But unable to get output 想要的输出 output wanted

【问题讨论】:

    标签: python-3.x pandas xml numpy elementtree


    【解决方案1】:

    也许这样的事情可能会有所帮助。您需要加载 xmltodict 和 flatten_json。从这里,您可以创建所需的数据框和列。我用 xmlst = ''' 你的 xml '''

        import xmltodict
        data = json.loads(json.dumps(xmltodict.parse(xmlst)))
        data['M001']
        pd.DataFrame(data)
        
        from flatten_json import flatten
        dic_flattened = (flatten(d, '.') for d in data['m2:M006']['M001'])
        df = pd.DataFrame(dic_flattened)
        df
    
        tkn m2xmlReleaseNo  ... hldCpt1.hldId.asstDetail.asstDerivDetails.underlyingAsstIDs.asstRef.@refType hldCpt1.hldId.asstDetail.asstDerivDetails.underlyingAsstIDs.asstRef.#text
    0  None            020  ...                                                NaN                                                                          NaN
    1  None            020  ...                                              Valor                                                                   0000004209
    2  None            020  ...                                              Valor                                                                   0000004208
    
    [3 rows x 57 columns]
    
    df.iloc[2]
    
    tkn                                                                                                               None
    m2xmlReleaseNo                                                                                                     020
    mvmtHdr.@mvmtTp                                                                                                    AOU
    mvmtHdr.cmt                                                                                           Variation Margin
    mvmtSrc.ctlr                                                                                                       SAR
    mvmtSrc.ctlrMvmtRef                                                                                          464068485
    mvmtSrc.srcRecType                                                                                    Cash Transaction
    mvmtSrc.srcTransType                                                                                  Variation Margin
    extSys.extSysID                                                                                                    SAR
    extSys.extSysRef                                                                                             464068485
    hldCpt1.@amtScheme                                                                                                  m2
    hldCpt1.hldId.ctlrOwnr                                                                                       6.01279-4
    hldCpt1.hldId.ctlrAcct                                                                                      6012794.60
    hldCpt1.hldId.ownrRefCcy                                                                                           EUR
    hldCpt1.hldId.ccy                                                                                                  CHF
    hldCpt1.hldId.asst                                                                                            74040383
    hldCpt1.hldId.asstIDs.asstRef.0.@refType                                                                         Valor
    hldCpt1.hldId.asstIDs.asstRef.0.#text                                                                         74040383
    hldCpt1.hldId.asstIDs.asstRef.1.@refType                                                                          SARA
    hldCpt1.hldId.asstIDs.asstRef.1.#text                                                                     000125132191
    hldCpt1.hldId.asstIDs.asstRef.2.@refType                                                                           NaN
    hldCpt1.hldId.asstIDs.asstRef.2.#text                                                                              NaN
    hldCpt1.hldId.asstDetail.isin                                                                                      NaN
    hldCpt1.hldId.asstDetail.type                                                                                   Future
    hldCpt1.hldId.asstDetail.cntry                                                                          CH/Switzerland
    hldCpt1.hldId.asstDetail.desc                                                                   FSMI FUTURES Jun15 EUX
    hldCpt1.hldId.asstDetail.issueCcy                                                                                  CHF
    hldCpt1.hldId.asstDetail.latestPrc.@ccy                                                                            CHF
    hldCpt1.hldId.asstDetail.latestPrc.#text                                                                        9288.0
    hldCpt1.hldId.asstDetail.latestPrcDate                                                                      2015-04-14
    hldCpt1.effDt                                                                                               2015-04-14
    hldCpt1.settleDt                                                                                            2015-04-14
    hldCpt1.qty                                                                                                        -14
    hldCpt1.prc.@ccy                                                                                                   CHF
    hldCpt1.prc.#text                                                                                               989898
    hldCpt1.grTrdAmt.@ccy                                                                                              CHF
    hldCpt1.grTrdAmt.#text                                                                                           -6440
    hldCpt1.grAmt.@ccy                                                                                                 CHF
    hldCpt1.grAmt.#text                                                                                              -6440
    hldCpt1.netAmt.@ccy                                                                                                CHF
    hldCpt1.netAmt.#text                                                                                             -6440
    hldCpt2.@amtScheme                                                                                                  m2
    hldCpt2.hldId.ctlrOwnr                                                                                       6.01279-4
    hldCpt2.hldId.ctlrAcct                                                          6012794.4019/CHF/KK 'Variation Margin'
    hldCpt2.hldId.ccy                                                                                                  CHF
    hldCpt2.hldId.asst                                                                                                 CHF
    hldCpt2.hldId.asstIDs.asstRef.@refType                                                                        CURRENCY
    hldCpt2.hldId.asstIDs.asstRef.#text                                                                                CHF
    hldCpt2.effDt                                                                                               2015-04-14
    hldCpt2.grAmt.@ccy                                                                                                 CHF
    hldCpt2.grAmt.#text                                                                                               6440
    hldCpt2.netAmt.@ccy                                                                                                CHF
    hldCpt2.netAmt.#text                                                                                              6440
    hldCpt1.hldId.asstDetail.asstDerivDetails.contractSize                                                              10
    hldCpt1.hldId.asstDetail.asstDerivDetails.expDate                                                           2015-06-19
    hldCpt1.hldId.asstDetail.asstDerivDetails.underlyingAsstIDs.asstRef.@refType                                     Valor
    hldCpt1.hldId.asstDetail.asstDerivDetails.underlyingAsstIDs.asstRef.#text                                   0000004208
    Name: 2, dtype: object
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 2019-03-09
      • 1970-01-01
      • 1970-01-01
      • 2021-10-08
      • 1970-01-01
      • 1970-01-01
      • 2013-06-01
      相关资源
      最近更新 更多