【问题标题】:Skip first row - when reading the Object using get_object API跳过第一行 - 使用 get_object API 读取对象时
【发布时间】:2018-10-11 17:05:25
【问题描述】:

如何跳过第一行 - 使用 get_object API 读取对象时

import os
import boto3
import json
import logging

def lambda_handler(event, context):

    # Fetch the bucket name and the file
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']


    # Generate record in DynamoDB
    try :
        # Declare S3 bucket and DynamoDB Boto3 Clients
        s3_client = boto3.client('s3')
        dynamodb = boto3.resource('dynamodb')

        # Read the Object using get_object API
        obj = s3_client.get_object(Bucket=bucket, Key=key)
        rows = obj['Body'].read().decode("utf-8").split('\n')

        tableName = os.environ['DB_TABLE_NAME']
        table = dynamodb.Table(tableName)

        log.info("TableName: " + tableName)

        # Need client just to access the Exception
        dynamodb_client = boto3.client('dynamodb')

        try :
            # Write the CSV file to the DynamoDB Table
            with table.batch_writer() as batch:
                for row in rows:       
                    batch.put_item(Item={
                        'x': row.split(',')[0],
                        'c': row.split(',')[1],
                        'w': row.split(',')[2],
                        'f': row.split(',')[3]
                        })


            print('Finished Inserting into TableName: ' + tableName)
        except dynamodb_client.exceptions.ResourceNotFoundException as tableNotFoundEx:
            return ('ERROR: Unable to locate DynamoDB table: ', tableName)


    except KeyError as dynamoDBKeyError:
        msg = 'ERROR: Need DynamoDB Environment Var: DB_TABLE_NAME'
        print(dynamoDBKeyError)
        return msg;

以上代码读取 CSV 并插入 dynamo db。这里的问题是 - 标题行(列 nmaes)也被插入到表中。如何跳过第一行并从第二行开始解析? next 不适合我

【问题讨论】:

    标签: python python-3.x amazon-web-services api lambda


    【解决方案1】:

    也许不是最好的解决方案,但这应该可以解决问题:

    import os
    import boto3
    import json
    import logging
    
    def lambda_handler(event, context):
    
        # Fetch the bucket name and the file
        bucket = event['Records'][0]['s3']['bucket']['name']
        key = event['Records'][0]['s3']['object']['key']
    
    
        # Generate record in DynamoDB
        try :
            # Declare S3 bucket and DynamoDB Boto3 Clients
            s3_client = boto3.client('s3')
            dynamodb = boto3.resource('dynamodb')
    
            # Read the Object using get_object API
            obj = s3_client.get_object(Bucket=bucket, Key=key)
            rows = obj['Body'].read().decode("utf-8").split('\n')
    
            tableName = os.environ['DB_TABLE_NAME']
            table = dynamodb.Table(tableName)
    
            log.info("TableName: " + tableName)
    
            # Need client just to access the Exception
            dynamodb_client = boto3.client('dynamodb')
    
            try :
                first = True
                # Write the CSV file to the DynamoDB Table
                with table.batch_writer() as batch:
                    for row in rows:
                        if first:
                            first = False
                        else:       
                            batch.put_item(Item={
                                'x': row.split(',')[0],
                                'c': row.split(',')[1],
                                'w': row.split(',')[2],
                                'f': row.split(',')[3]
                                })
    
    
                print('Finished Inserting into TableName: ' + tableName)
            except dynamodb_client.exceptions.ResourceNotFoundException as tableNotFoundEx:
                return ('ERROR: Unable to locate DynamoDB table: ', tableName)
    
    
        except KeyError as dynamoDBKeyError:
            msg = 'ERROR: Need DynamoDB Environment Var: DB_TABLE_NAME'
            print(dynamoDBKeyError)
            return msg;
    

    使用for i in range(1, len(rows)) 循环可能会更好,但上面需要对代码的更改较少

    【讨论】:

      猜你喜欢
      • 2012-02-23
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2012-03-25
      • 1970-01-01
      • 2014-01-26
      • 1970-01-01
      相关资源
      最近更新 更多