【问题标题】:pandas.errors.ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'pandas.errors.ParserError:数据标记错误。 C 错误:对源调用 read(nbytes) 失败。尝试引擎='python'
【发布时间】:2020-03-13 08:10:03
【问题描述】:

我试图从 csv 文件生成 tf 记录,以便使用 tensorflow 对象检测 API 进行对象检测。以下是我使用的代码:

from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
import os
import io
import pandas as pd
from PIL import Image
from collections import namedtuple, OrderedDict
from models.research.object_detection.utils import dataset_util
import tensorflow as tf

flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
flags.DEFINE_string('image_dir', '', 'Path to the Image directory')
FLAGS = flags.FLAGS

def class_text_to_int(row_label):
   if str(row_label)=='1':
       return 1
   elif str(row_label)=='2':
       return 2
   elif str(row_label)=='3':
       return 3
   elif str(row_label)=='4':
       return 4
   elif str(row_label)=='5':
       return 5
   elif str(row_label)=='6':
       return 6
   elif str(row_label)=='7':
       return 7
   elif str(row_label)=='0':
       return 8
   else:
       None
def split(df, group):
   data = namedtuple('data', ['filename', 'object'])
   gb = df.groupby(group)
   return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]

def create_tf_example(example):
   with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
       encoded_jpg = fid.read()
   encoded_jpg_io = io.BytesIO(encoded_jpg)
   image = Image.open(encoded_jpg_io)
   width, height = image.size

   filename = group.filename.encode('utf8')
   image_format = b'png'
   xmins = []
   xmaxs = []
   ymins = []
   ymaxs = []
   classes_text = []
   classes = []

   for index, row in group.object.iterrows():
       xmins.append(row['xmin'] / width)
       xmaxs.append(row['xmax'] / width)
       ymins.append(row['ymin'] / height)
       ymaxs.append(row['ymax'] / height)
       classes_text.append(row['class'].encode('utf8'))
       classes.append(class_text_to_int(row['class']))


   tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(filename),
      'image/source_id': dataset_util.bytes_feature(filename),
      'image/encoded': dataset_util.bytes_feature(encoded_image_data),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
  }))
   return tf_example


def main(_):
   writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
   path = os.path.join(os.getcwd(), FLAGS.image_dir)
   examples = pd.read_csv(FLAGS.csv_input)
   grouped = split(examples, 'filename')
   for group in grouped:
       tf_example = create_tf_example(group, path)
       writer.write(tf_example.SerializeToString())

   writer.close()
   output_path = os.path.join(os.getcwd(), FLAGS.output_path)
   print('Successfully created the TFRecords: {}'.format(output_path))



if __name__ == '__main__':
   tf.app.run()

当我运行它时,我收到以下错误:

pandas.errors.ParserError:数据标记错误。 C 错误:对源调用 read(nbytes) 失败。试试 engine='python'。

谁能帮帮我。 提前感谢您。

【问题讨论】:

    标签: python-3.x pandas tensorflow


    【解决方案1】:

    当我不小心像下面这样切换参数时,我得到了同样的错误

    python generate_tfrecord.py --image_dir=CrowdHumanTrain --csv_input=crowd_human_train_anno.csv --output_path=ch_train.record --label "head"
    

    我已经提供了 CSV 路径的目录路径,反之亦然。

    在传递给 generate_tfrecord.py 时仔细检查参数

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2011-11-29
      • 2017-01-10
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多