使用给定数据集实现深度学习架构答案

【问题标题】：Implementing Deep Learning architecture with a given dataset使用给定数据集实现深度学习架构
【发布时间】：2017-03-14 11:18:55
【问题描述】：

我是 caffe 和深度学习的新手。我只是想实现深度学习架构。

这是我正在尝试实现的架构。架构和 Parse27k 数据集由 亚琛工业大学视觉计算研究所的计算机视觉小组创建和构建。

您可以在下面看到我需要改进的模型：

Train_val.prototxt

name: "Parse27"
layer {
  name: "data"
  type: "HDF5Data"
  top: "crops"
  top: "labels"
  include {
    phase: TRAIN
  }

  hdf5_data_param {
    source: "/home/nail/caffe/caffe/examples/hdf5_classification/data/train.txt"
    batch_size: 256
  }
}
layer {
  name: "data"
  type: "HDF5Data"
  top: "crops"
  top: "labels"
  include {
    phase: TEST
  }
  hdf5_data_param {
    source: "/home/nail/caffe/caffe/examples/hdf5_classification/data/test.txt"
    batch_size: 256
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "crops"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 96
    kernel_size: 11
    stride: 4
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "norm1"
  type: "LRN"
  bottom: "pool1"
  top: "norm1"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "norm1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 2
    kernel_size: 5
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "conv2"
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "norm2"
  type: "LRN"
  bottom: "pool2"
  top: "norm2"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "norm2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu3"
  type: "ReLU"
  bottom: "conv3"
  top: "conv3"
}
layer {
  name: "conv4"
  type: "Convolution"
  bottom: "conv3"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu4"
  type: "ReLU"
  bottom: "conv4"
  top: "conv4"
}
layer {
  name: "conv5"
  type: "Convolution"
  bottom: "conv4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu5"
  type: "ReLU"
  bottom: "conv5"
  top: "conv5"
}
layer {
  name: "pool5"
  type: "Pooling"
  bottom: "conv5"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "fc6"
  type: "InnerProduct"
  bottom: "pool5"
  top: "fc6"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu6"
  type: "ReLU"
  bottom: "fc6"
  top: "fc6"
}
layer {
  name: "drop6"
  type: "Dropout"
  bottom: "fc6"
  top: "fc6"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc7"
  type: "InnerProduct"
  bottom: "fc6"
  top: "fc7"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu7"
  type: "ReLU"
  bottom: "fc7"
  top: "fc7"
}
layer {
  name: "drop7"
  type: "Dropout"
  bottom: "fc7"
  top: "fc7"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc8"
  type: "InnerProduct"
  bottom: "fc7"
  top: "fc8"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 1000
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "fc8"
  bottom: "labels"
  top: "accuracy"
  include {
    phase: TEST
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "fc8"
  bottom: "labels"  
  top: "loss"
}

Solver.prototxt

net: "models/Parse27/train_val.prototxt"
test_iter: 1000
test_interval: 1000
base_lr: 0.01
lr_policy: "step"
gamma: 0.1
stepsize: 100000
display: 20
max_iter: 450000
momentum: 0.9
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "models/Parse27/Parse27_train"
solver_mode: GPU

在实施此架构时，我遇到了两个主要困难。

如上所示，我的模型不包括自定义损失层。我的模型几乎是 caffeNet 架构。但是我应该用自定义损失层（绿色框）替换我在红色框内的最后一层。
我的火车数据集具有以下结构。

crops       Dataset {27482, 3, 128, 192}
labels      Dataset {27482, 12}
mean        Dataset {3, 128, 192}
pids        Dataset {27482}

如此处所示，作物和标签中的行数（示例）相同为 27482。但是我的标签数据集中有 12 列。当只有 1 个标签时，我的模型有效。我怎样才能训练所有标签？

我在 Train_val.prototxt 中的模型现在看起来像这样：

我们将不胜感激任何形式的帮助或建议。

【问题讨论】：

标签： machine-learning neural-network deep-learning caffe protocol-buffers

【解决方案1】：

如果我理解正确，您将尝试为每个输入示例预测 12 个离散标签（属性）。在这种情况下，您应该"Slice" 标签：

layer {
  type: "Slice"
  name: "slice_labels"
  bottom: "label"
  top: "attr_00"
  top: "attr_01"
  top: "attr_02"
  top: "attr_03"
  top: "attr_04"
  top: "attr_05"
  top: "attr_06"
  top: "attr_07"
  top: "attr_08"
  top: "attr_09"
  top: "attr_10"
  top: "attr_11"
  slice_param {
    axis: -1 # slice the last dimension
    slice_point: 1
    slice_point: 2
    slice_point: 3
    slice_point: 4
    slice_point: 5
    slice_point: 6
    slice_point: 7
    slice_point: 8
    slice_point: 9
    slice_point: 10
    slice_point: 11
  }
}

现在，每个属性都有一个“标量”标签。我相信你可以从这里拿走。

【讨论】：

我在执行时收到这样的错误消息 F1101 14:11:12.764832 7023 layer.hpp:374] 检查失败：ExactNumBottomBlobs() == bottom.size()（2 vs. 13) 精度层将 2 个底部 blob 作为输入。我什至尝试将其中一个属性发送到 loss 和 fc8 仍然是同样的错误。图片链接postimg.org/image/3yh66h8fp模型prototxt文档codepad.org/GfsLcWqn
@MIRMIX 看看你发布的模型：i.stack.imgur.com/B7Cmq.png。如您所见，每个属性都有一个不同损失（和准确性）层。您不能为所有属性使用一个图层。根据您尝试实施的计划工作。
我明白了，但正如我所提到的，即使我尝试为一个属性执行此操作，它也会再次收到错误消息。 postimg.org/image/xrqzhgzz7不应该至少对一个标签有效吗？
@MIRMIX 您的模型未显示准确度层。该层也仅适用于一个属性。您的fc8 的num_output 是否对应于attr_00 的离散标签（“状态”）的数量？请注意，每个属性都应该有不同的 fc8 层。（如果您密切关注i.stack.imgur.com/B7Cmq.png，则需要fc8_hidden 和fc8 层每个属性）。