Grok 是正则表达式的包装器。如果你可以用正则表达式解析数据,你可以用 grok 来实现它。
即使您的范围特定于 CommandLine 字段,解析大多数 key=value 日志中的每个字段都非常简单,并且可以通过一些 grok 过滤器对每个字段使用单个正则表达式。如果您打算存储、查询和可视化日志 - 数据越多越好。
正则表达式:
首先我们从以下内容开始:
(.*?(?=\s\w+=|\<|$))
-
.*? - 匹配除行终止符以外的任何字符
-
(?=\s\w+=|\<|$)) - 断言模式的正向前瞻必须与以下内容匹配
-
\s\w+= - 前面有空格的任何单词字符,后跟 =
-
|\<|$ - 或者可以匹配 < 或行尾,以免将它们包含在匹配组中。
这意味着每个字段都可以像下面这样解析:
CommandLine=(.*?(?=\s\w+=|\<|$))
Grok:
现在这意味着我们可以开始创建 grok 过滤器了。它的强大之处在于,可重用组件可以应用语义语言。
/etc/logstash/patterns/powershell.grok:
# Patterns
PS_KEYVALUE (.*?(?=\s\w+=|\<|$))
# Fields
PS_NEWCOMMANDSTATE NewCommandState=%{PS_KEYVALUE:NewCommandState}
PS_SEQUENCENUMBER SequenceNumber=%{PS_KEYVALUE:SequenceNumber}
PS_HOSTNAME HostName=%{PS_KEYVALUE:HostName}
PS_HOSTVERSION HostVersion=%{PS_KEYVALUE:HostVersion}
PS_HOSTID HostId=%{PS_KEYVALUE:HostId}
PS_HOSTAPPLICATION HostApplication=%{PS_KEYVALUE:HostApplication}
PS_ENGINEVERSION EngineVersion=%{PS_KEYVALUE:EngineVersion}
PS_RUNSPACEID RunspaceId=%{PS_KEYVALUE:RunspaceId}
PS_PIPELINEID PipelineId=%{PS_KEYVALUE:PipelineId}
PS_COMMANDNAME CommandName=%{PS_KEYVALUE:CommandName}
PS_COMMANDTYPE CommandType=%{PS_KEYVALUE:CommandType}
PS_SCRIPTNAME ScriptName=%{PS_KEYVALUE:ScriptName}
PS_COMMANDPATH CommandPath=%{PS_KEYVALUE:CommandPath}
PS_COMMANDLINE CommandLine=%{PS_KEYVALUE:CommandLine}
%{PATTERN:label} 将使用 PS_KEYVALUE 正则表达式,匹配组将在 JSON 中使用该值进行标记。在这里您可以灵活地命名您知道的字段。
/etc/logstash/conf.d/powershell.conf:
input {
...
}
filter {
grok {
patterns_dir => "/etc/logstash/patterns"
break_on_match => false
match => [
"message", "%{PS_NEWCOMMANDSTATE}",
"message", "%{PS_SEQUENCENUMBER}",
"message", "%{PS_HOSTNAME}",
"message", "%{PS_HOSTVERSION}",
"message", "%{PS_HOSTID}",
"message", "%{PS_HOSTAPPLICATION}",
"message", "%{PS_ENGINEVERSION}",
"message", "%{PS_RUNSPACEID}",
"message", "%{PS_PIPELINEID}",
"message", "%{PS_COMMANDNAME}",
"message", "%{PS_COMMANDTYPE}",
"message", "%{PS_SCRIPTNAME}",
"message", "%{PS_COMMANDPATH}",
"message", "%{PS_COMMANDLINE}"
]
}
}
output {
stdout { codec => "rubydebug" }
}
结果:
{
"HostApplication" => "C:\\WINDOWS\\system32\\WindowsPowerShell\\v1.0\\powershell.exe",
"EngineVersion" => "5.1.14409.1005",
"RunspaceId" => "bd4224a9-ce42-43e3-b8bb-53a302c342c9",
"message" => "<'Data'>NewCommandState=Stopped SequenceNumber=1463 HostName=ConsoleHost HostVersion=5.1.14409.1005 HostId=b99970c6-0f5f-4c76-9fb0-d5f7a8427a2a HostApplication=C:\\WINDOWS\\system32\\WindowsPowerShell\\v1.0\\powershell.exe EngineVersion=5.1.14409.1005 RunspaceId=bd4224a9-ce42-43e3-b8bb-53a302c342c9 PipelineId=167 CommandName=Import-Module CommandType=Cmdlet ScriptName= CommandPath= CommandLine=Import-Module -Verbose.\\nishang.psm1<'/Data'>",
"HostId" => "b99970c6-0f5f-4c76-9fb0-d5f7a8427a2a",
"HostVersion" => "5.1.14409.1005",
"CommandLine" => "Import-Module -Verbose.\\nishang.psm1",
"@timestamp" => 2017-05-12T23:49:24.130Z,
"port" => 65134,
"CommandType" => "Cmdlet",
"@version" => "1",
"host" => "10.0.2.2",
"SequenceNumber" => "1463",
"NewCommandState" => "Stopped",
"PipelineId" => "167",
"CommandName" => "Import-Module",
"HostName" => "ConsoleHost"
}