【问题标题】:compare array of hashes and print expected & actual results比较哈希数组并打印预期和实际结果
【发布时间】:2016-11-11 13:01:16
【问题描述】:

我有 2 个哈希数组:

actual = [{"column_name"=>"NONINTERESTINCOME", "column_data_type"=>"NUMBER"},
 {"column_name"=>"NONINTERESTEXPENSE", "column_data_type"=>"VARCHAR"},
 {"column_name"=>"TRANSACTIONDATE", "column_data_type"=>"TIMESTAMP"},
 {"column_name"=>"UPDATEDATE", "column_data_type"=>"TIMESTAMP"}]
expected = [{"column_name"=>"NONINTERESTINCOME", "column_data_type"=>"NUMBER"},
 {"column_name"=>"NONINTERESTEXPENSE", "column_data_type"=>"NUMBER"},
 {"column_name"=>"TRANSACTIONDATE", "column_data_type"=>"NUMBER"},
 {"column_name"=>"UPDATEDATE", "column_data_type"=>"TIMESTAMP"}]

我需要比较这 2 个哈希值并找出 column_data_type 的不同之处。

比较我们可以直接使用:

diff = actual -   expected

这会将输出打印为:

{"column_name"=>"NONINTERESTEXPENSE", "column_data_type"=>"VARCHAR"}
{"column_name"=>"TRANSACTIONDATE", "column_data_type"=>"TIMESTAMP"}

我的预期输出是在结果中我想打印实际和预期的数据类型,这意味着来自实际和预期的哈希数组中缺少的“column_name”的数据类型,例如:

{"column_name"=>"NONINTERESTEXPENSE", "expected_column_data_type"=>"NUMBER", "actual_column_data_type" => "VARCHAR"}
{"column_name"=>"TRANSACTIONDATE", "expected_column_data_type"=>"NUMBER","actual_column_data_type" => "TIMESTAMP" }

【问题讨论】:

    标签: ruby hash


    【解决方案1】:

    无论您的数组中哈希的顺序如何,这都会起作用。

    diff = []
    
    expected.each do |elem|
      column_name = elem['column_name']
      column_type = elem['column_data_type']
      match = actual.detect { |elem2| elem2['column_name'] == column_name  }
      if column_type != match['column_data_type']
        diff << { 'column_name' => column_name,
                  'expected_column_data_type' => column_type,
                  'actual_column_data_type' => match['column_data_type'] }
      end
    end
    
    p diff
    

    【讨论】:

    • 考虑使用each_with_object 而不是冗余的局部变量。是 ruby​​,不是 php。
    【解决方案2】:
    [actual, expected].map { |a| a.map(&:dup).map(&:values) }
                      .map(&Hash.method(:[]))
                      .reduce do |actual, expected|
                        actual.merge(expected) do |k, o, n|
                          o == n ? nil : {name: k, actual: o, expected: n}
                        end
                      end.values.compact
    
    #⇒ [
    #    [0] {
    #            :name => "NONINTERESTEXPENSE",
    #          :actual => "VARCHAR",
    #        :expected => "NUMBER"
    #    },
    #    [1] {
    #            :name => "TRANSACTIONDATE",
    #          :actual => "TIMESTAMP",
    #        :expected => "NUMBER"
    #    }
    # ]
    

    上面的方法很容易扩展合并N个数组(使用reduce.with_indexmerge,键为"value_from_#{idx}"。)

    【讨论】:

      【解决方案3】:
      (expected - actual).
        concat(actual - expected).
        group_by { |column| column['column_name'] }.
        map do |name, (expected, actual)|
          {
            'column_name'               => name,
            'expected_column_data_type' => expected['column_data_type'],
            'actual_column_data_type'   => actual['column_data_type'],
          }
        end
      

      【讨论】:

      • 这要求对于h = a.group_by {... },对于任何数组ah的每个值(数组)中元素的顺序必须与它们在a中的顺序一致。虽然我预计会是这样,但据我所知,文档中并不能保证。
      • @CarySwoveland,我明白你的意思。这里有几件事:(1)如果不是这种情况,the ruby spec 将不起作用,这是由 cruby for some time now 正式使用的。 (2) ruby​​ 文档 (ruby-doc.org) 不由 ruby​​ 核心团队中的任何人维护。 Ruby 被定义为 Matz 头脑中的东西。 (3) 当前的实现是最优的。我怀疑我们会找到一个更好的算法,但也恰好不能保证顺序。
      【解决方案4】:

      这个怎么样?

      def select(hashes_array, column_name)
        hashes_array.select { |h| h["column_name"] == column_name }.first
      end
      
      diff = (expected - actual).map do |h|
        {
          "column_name" => h["column_name"],
          "expected_column_data_type" => select(expected, h["column_name"])["column_data_type"],
          "actual_column_data_type" => select(actual, h["column_name"])["column_data_type"],
        }
      end
      

      PS:这段代码当然可以改进,看起来更优雅

      【讨论】:

        【解决方案5】:

        代码

        def convert(actual, expected)
          hashify(actual-expected, "actual_data_type").
          merge(hashify(expected-actual, "expected_data_type")) { |_,a,e| a.merge(e) }.values
        end
        
        def hashify(arr, key)
          arr.each_with_object({}) { |g,h| h[g["column_name"]] =
            { "column_name"=>g["column_name"], key=>g["column_data_type"] } }
        end
        

        示例

        actual = [
          {"column_name"=>"TRANSACTIONDATE", "column_data_type"=>"TIMESTAMP"},
          {"column_name"=>"NONINTERESTEXPENSE", "column_data_type"=>"VARCHAR"},
          {"column_name"=>"NONINTERESTINCOME", "column_data_type"=>"NUMBER"},
          {"column_name"=>"UPDATEDATE", "column_data_type"=>"TIMESTAMP"}
        ]
        
        expected = [
          {"column_name"=>"NONINTERESTINCOME", "column_data_type"=>"NUMBER"},
          {"column_name"=>"NONINTERESTEXPENSE", "column_data_type"=>"NUMBER"},
          {"column_name"=>"TRANSACTIONDATE", "column_data_type"=>"NUMBER"},
          {"column_name"=>"UPDATEDATE", "column_data_type"=>"TIMESTAMP"}
        ]
        
        convert(actual, expected)
          #=> [{"column_name"=>"TRANSACTIONDATE",
          #     "actual_data_type"=>"TIMESTAMP", "expected_data_type"=>"NUMBER"},
          #    {"column_name"=>"NONINTERESTEXPENSE",
          #     "actual_data_type"=>"VARCHAR", "expected_data_type"=>"NUMBER"}] 
        

        说明

        对于上面的例子,步骤如下。

        首先是hashifyactualexpected

        f = actual-expected
          #=> [{"column_name"=>"TRANSACTIONDATE", "column_data_type"=>"TIMESTAMP"},
          #    {"column_name"=>"NONINTERESTEXPENSE", "column_data_type"=>"VARCHAR"}]
        
        g = hashify(f, "actual_data_type")
          #=> {"TRANSACTIONDATE"=>{"column_name"=>"TRANSACTIONDATE",
          #      "actual_data_type"=>"TIMESTAMP"},
          #    "NONINTERESTEXPENSE"=>{ "column_name"=>"NONINTERESTEXPENSE",
          #      "actual_data_type"=>"VARCHAR"}}
        
        h = expected-actual
          #=> [{"column_name"=>"NONINTERESTEXPENSE", "column_data_type"=>"NUMBER"},
          #    {"column_name"=>"TRANSACTIONDATE", "column_data_type"=>"NUMBER"}]
        
        i = hashify(h, "expected_data_type")
          #=> {"NONINTERESTEXPENSE"=>{"column_name"=>"NONINTERESTEXPENSE",
          #      "expected_data_type"=>"NUMBER"},
          #    "TRANSACTIONDATE"=>{"column_name"=>"TRANSACTIONDATE",
          #      "expected_data_type"=>"NUMBER"}}
        

        接下来使用Hash#merge 的形式合并gi,该形式使用一个块来确定要合并的两个哈希中存在的键的值。三个块变量的定义见文档(第一个,公共键,我用下划线表示,表示它不用于块计算)。

        j = g.merge(i) { |_,a,e| a.merge(e) }
          #=> {"TRANSACTIONDATE"=>{"column_name"=>"TRANSACTIONDATE",
          #      "actual_data_type"=>"TIMESTAMP", "expected_data_type"=>"NUMBER"},
          #    "NONINTERESTEXPENSE"=>{"column_name"=>"NONINTERESTEXPENSE",
          #      "actual_data_type"=>"VARCHAR", "expected_data_type"=>"NUMBER"}}
        

        最后,放下钥匙。

        k = j.values
          #=> [{"column_name"=>"TRANSACTIONDATE", "actual_data_type"=>"TIMESTAMP",
          #     "expected_data_type"=>"NUMBER"},
          #    {"column_name"=>"NONINTERESTEXPENSE", "actual_data_type"=>"VARCHAR",
          #     "expected_data_type"=>"NUMBER"}]
        

        【讨论】:

          猜你喜欢
          • 2014-07-28
          • 1970-01-01
          • 1970-01-01
          • 2022-01-06
          • 2018-05-08
          • 2012-07-22
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          相关资源
          最近更新 更多