Logstash 读取 CSV 和它的 JSON 对象答案

【问题标题】：Logstash Read CSV and it's JSON objectLogstash 读取 CSV 和它的 JSON 对象
【发布时间】：2018-03-13 07:44:06
【问题描述】：

我有以下格式的 CSV 文件记录，我如何从 CSV 文件中读取 JSON 对象。

"2017-09-29 10:08:28.982","20170929100828_x9RqJ","20170929100828_x9RqJ","20170929100828_x9RqJ","JOB_STARTED","JOB:MF_ATM_Ingestion_Demo","","DEMO_INGESTION","MF_ATM_Ingestion_Demo","Default","0.2","","","","","","{""context_parameters"": {
    ""acc_table_name"": ""testingestion"",
    ""aws_bucket_name"": ""cst-ssb-demo"",
    ""csv_field_delimeter"": "","",
    ""csv_file_path"": """",
    ""csv_filtered_file"": """",
    ""csv_included_header"": 1,
    ""csv_row_delimiter"": ""\n"",
    ""mf_conversion_root_folder"": ""/cst_ssb_demo/talend/mf_atm_ingestion/mf_vsam_conversion/"",
    ""mf_cpy_file_name"": ""SSBCPYBK.TXT"",
    ""mf_file_name"": ""D101012"",
    ""ouput_encrypted_root_folder"": ""/cst_ssb_demo/talend/"",
    ""schena_validation_root_folder"": ""/cst_ssb_demo/talend/mf_atm_ingestion/schema_validation/""
}}"

我可以直接读取 CSV 文件以及如何读取 json 对象。

请帮忙。

【问题讨论】：

标签： json elasticsearch logstash kibana elastic-stack

【解决方案1】：

使用此配置：

input {
    file {
        path => "/path/to/logstash_input.csv"
        start_position => "beginning"
        codec => multiline {
            pattern => "\"\d{4}-\d{2}-\d{2}"
            negate => true
            what => "previous"
        }
    }
}
filter {
    csv {
    }
    json {
        source => "column17"
    }
}
output {
    stdout {
        codec => json
    }
}

我得到这个输出：

{
    "column1": "2017-09-29 10:08:28.982",
    "column12": "",
    "column11": "0.2",
    "column10": "Default",
    "column5": "JOB_STARTED",
    "column4": "20170929100828_x9RqJ",
    "column3": "20170929100828_x9RqJ",
    "column2": "20170929100828_x9RqJ",
    "column17": "{\"context_parameters\": {\n    \"acc_table_name\": \"testingestion\",\n    \"aws_bucket_name\": \"cst-ssb-demo\",\n    \"csv_field_delimeter\": \",\",\n    \"csv_file_path\": \"\",\n    \"csv_filtered_file\": \"\",\n    \"csv_included_header\": 1,\n    \"csv_row_delimiter\": \"\\n\",\n    \"mf_conversion_root_folder\": \"/cst_ssb_demo/talend/mf_atm_ingestion/mf_vsam_conversion/\",\n    \"mf_cpy_file_name\": \"SSBCPYBK.TXT\",\n    \"mf_file_name\": \"D101012\",\n    \"ouput_encrypted_root_folder\": \"/cst_ssb_demo/talend/\",\n    \"schena_validation_root_folder\": \"/cst_ssb_demo/talend/mf_atm_ingestion/schema_validation/\"\n}}",
    "message": "\"2017-09-29 10:08:28.982\",\"20170929100828_x9RqJ\",\"20170929100828_x9RqJ\",\"20170929100828_x9RqJ\",\"JOB_STARTED\",\"JOB:MF_ATM_Ingestion_Demo\",\"\",\"DEMO_INGESTION\",\"MF_ATM_Ingestion_Demo\",\"Default\",\"0.2\",\"\",\"\",\"\",\"\",\"\",\"{\"\"context_parameters\"\": {\n    \"\"acc_table_name\"\": \"\"testingestion\"\",\n    \"\"aws_bucket_name\"\": \"\"cst-ssb-demo\"\",\n    \"\"csv_field_delimeter\"\": \"\",\"\",\n    \"\"csv_file_path\"\": \"\"\"\",\n    \"\"csv_filtered_file\"\": \"\"\"\",\n    \"\"csv_included_header\"\": 1,\n    \"\"csv_row_delimiter\"\": \"\"\\n\"\",\n    \"\"mf_conversion_root_folder\"\": \"\"/cst_ssb_demo/talend/mf_atm_ingestion/mf_vsam_conversion/\"\",\n    \"\"mf_cpy_file_name\"\": \"\"SSBCPYBK.TXT\"\",\n    \"\"mf_file_name\"\": \"\"D101012\"\",\n    \"\"ouput_encrypted_root_folder\"\": \"\"/cst_ssb_demo/talend/\"\",\n    \"\"schena_validation_root_folder\"\": \"\"/cst_ssb_demo/talend/mf_atm_ingestion/schema_validation/\"\"\n}}\"",
    "column16": "",
    "column15": "",
    "column14": "",
    "tags": ["multiline"],
    "column13": "",
    "path": "/path/to/logstash_input.csv",
    "context_parameters": {
        "csv_row_delimiter": "\n",
        "csv_file_path": "",
        "mf_file_name": "D101012",
        "aws_bucket_name": "cst-ssb-demo",
        "csv_included_header": 1,
        "ouput_encrypted_root_folder": "/cst_ssb_demo/talend/",
        "mf_conversion_root_folder": "/cst_ssb_demo/talend/mf_atm_ingestion/mf_vsam_conversion/",
        "schena_validation_root_folder": "/cst_ssb_demo/talend/mf_atm_ingestion/schema_validation/",
        "csv_field_delimeter": ",",
        "acc_table_name": "testingestion",
        "csv_filtered_file": "",
        "mf_cpy_file_name": "SSBCPYBK.TXT"
    },
    "@timestamp": "2017-10-02T10:22:37.202Z",
    "@version": "1",
    "host": "my_laptop",
    "column9": "MF_ATM_Ingestion_Demo",
    "column8": "DEMO_INGESTION",
    "column7": "",
    "column6": "JOB:MF_ATM_Ingestion_Demo"
}

从这里，您可以：

在 CSV 过滤器中配置 CSV 列名称
删除无用字段（消息、column17...）
添加日期过滤器以解析字段 column1

【讨论】：