Fork me on GitHub
Suzf  Blog

How-to exploit apache access log with rsyslog elasticSearch and Kibana

前言 本文主要简述的如何使用 Rsyslog 实现日志的中心化,并利用 ELK Stack 实现图形搜索与数据统计。

Use Rsyslog Collect Apache Log

# diff -ruN /etc/rsyslog.conf_170420  /etc/rsyslog.conf
--- /etc/rsyslog.conf_170420	2017-04-20 11:02:43.477549484 +0800
+++ /etc/rsyslog.conf	2017-04-21 16:49:43.879806067 +0800
@@ -8,17 +8,22 @@
 # The imjournal module bellow is now used as a message source instead of imuxsock.
 $ModLoad imuxsock # provides support for local system logging (e.g. via logger command)
 $ModLoad imjournal # provides access to the systemd journal
-#$ModLoad imklog # reads kernel messages (the same are read from journald)
-#$ModLoad immark  # provides --MARK-- message capability
+$ModLoad imklog # reads kernel messages (the same are read from journald)
+$ModLoad immark  # provides --MARK-- message capability
 
 # Provides UDP syslog reception
-#$ModLoad imudp
-#$UDPServerRun 514
+$ModLoad imudp
+$UDPServerRun 514
 
 # Provides TCP syslog reception
 #$ModLoad imtcp
 #$InputTCPServerRun 514
 
+# Provides File syslog reception
+# $ModLoad imfile
+
+# Provides parser syslog reception
+$ModLoad mmnormalize
 
 #### GLOBAL DIRECTIVES ####
 
@@ -26,7 +31,9 @@
 $WorkDirectory /var/lib/rsyslog
 
 # Use default timestamp format
-$ActionFileDefaultTemplate RSYSLOG_TraditionalFileFormat
+#$ActionFileDefaultTemplate RSYSLOG_TraditionalFileFormat
+$ActionFileDefaultTemplate RSYSLOG_FileFormat
+
 
 # File syncing capability is disabled by default. This feature is usually not required,
 # not useful and an extreme performance hit
@@ -51,7 +58,7 @@
 
 # Log anything (except mail) of level info or higher.
 # Don't log private authentication messages!
-*.info;mail.none;authpriv.none;cron.none                /var/log/messages
+#*.info;mail.none;authpriv.none;cron.none                /var/log/messages
 
 # The authpriv file has restricted access.
 authpriv.*                                              /var/log/secure
@@ -73,6 +80,16 @@
 local7.*                                                /var/log/boot.log
 
 
+if $syslogfacility-text == 'local6' and $programname == 'access_test' then /var/log/httpd/access_test.log
+
+# Python Apps logging test
+if $programname == 'logger_test' then /var/log/logger_test.log
+
+if $syslogfacility-text == 'local6' then /var/log/local6.log
+
+
+
 # ### begin forwarding rule ###
 # The statement between the begin ... end define a SINGLE forwarding
 # rule. They belong together, do NOT split them. If you create multiple
@@ -92,3 +109,52 @@
 
 local4.* /var/log/ldap.log
 
+
+# Elasticsearch output module
+module(load="omelasticsearch")
+
+# this is for index names to be like: rsyslog-YYYY.MM.DD
+template(name="rsyslog-index"
+  type="list") {
+    constant(value="rsyslog-")
+    property(name="timereported" dateFormat="rfc3339" position.from="1" position.to="4")
+    constant(value=".")
+    property(name="timereported" dateFormat="rfc3339" position.from="6" position.to="7")
+    constant(value=".")
+    property(name="timereported" dateFormat="rfc3339" position.from="9" position.to="10")
+}
+
+
+# template to generate JSON documents for Elasticsearch in Logstash format
+template(name="plain-syslog"
+  type="list") {
+    constant(value="{")
+    constant(value="\"@timestamp\":\"")         property(name="timereported" dateFormat="rfc3339")
+    constant(value="\",\"host\":\"")            property(name="hostname")
+    constant(value="\",\"severity-num\":")      property(name="syslogseverity")
+    constant(value=",\"facility-num\":")        property(name="syslogfacility")
+    constant(value=",\"severity\":\"")          property(name="syslogseverity-text")
+    constant(value="\",\"facility\":\"")        property(name="syslogfacility-text")
+    constant(value="\",\"syslogtag\":\"")       property(name="syslogtag" format="json")
+    constant(value="\",\"message\":\"")         property(name="msg" format="json")
+    constant(value="\"}")
+  }
+# if $programname ==  'httpd' then {
+if $syslogfacility-text == 'local6' then {
+    # action(type="mmjsonparse")
+    action(type="omelasticsearch"
+      server="172.16.9.50"
+      serverport="9200"
+      template="plain-syslog"  # use the template defined earlier
+      searchIndex="rsyslog-index"
+      dynSearchIndex="on"
+      searchType="events"
+      bulkmode="on"                   # use the Bulk API
+      queue.dequeuebatchsize="5000"   # ES bulk size
+      queue.size="100000"   # capacity of the action queue
+      queue.workerthreads="5"   # 5 workers for the action
+      action.resumeretrycount="-1"  # retry indefinitely if ES is unreachable
+      errorfile="/var/log/omelasticsearch.log"
+    )
+}
+

重启服务

# systemctl  restart rsyslog.service

Apache Syslog Setting

# diff -ruN /etc/httpd/conf/httpd.conf{.old,}
--- /etc/httpd/conf/httpd.conf.old	2017-04-21 17:02:57.456763558 +0800
+++ /etc/httpd/conf/httpd.conf	2017-04-21 17:09:51.803741363 +0800
@@ -196,6 +196,9 @@
     LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
     LogFormat "%h %l %u %t \"%r\" %>s %b" common
 
+    # Set for Rsyslog
+    LogFormat "%a %D %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" rsyslog_fmt
+
 
     <IfModule logio_module>
       # You need to enable mod_logio.c to use %I and %O
@@ -215,7 +218,8 @@
     # If you prefer a logfile with access, agent, and referer information
     # (Combined Logfile Format) you can use the following directive.
     #
-    CustomLog "logs/access_log" combined
+    # CustomLog "logs/access_log" combined
+    CustomLog "|/usr/bin/logger -t access_test -p local6.notice" rsyslog_fmt
 </IfModule>
 
 <IfModule alias_module>

重启服务

# systemctl  restart rsyslog.service

验证

# curl -I http://172.16.9.50
HTTP/1.1 403 Forbidden
Date: Fri, 21 Apr 2017 09:16:16 GMT
Server: Apache/2.4.6 (CentOS) PHP/5.4.16
Last-Modified: Thu, 16 Oct 2014 13:20:58 GMT
ETag: "1321-5058a1e728280"
Accept-Ranges: bytes
Content-Length: 4897
Keep-Alive: timeout=5, max=100
Connection: keep-alive
Content-Type: text/html; charset=UTF-8
Proxy-Connection: keep-alive

# tail -1 /var/log/httpd/access_test.log 
2017-04-21T17:16:16.872066+08:00 monkey access_test: 172.16.9.11 440 - - [21/Apr/2017:17:16:16 +0800] "HEAD / HTTP/1.1" 403 - "-" "curl/7.29.0"

然后在 Kibana 上创建索引 ...

Python App Log Test

# cat logger_test.py 
import logging
from logging.handlers import SysLogHandler


class Logger(object):

    def __init__(self, *args, **kwargs) :
        self.logger    = logging.getLogger(ident)
        self.handler = SysLogHandler(address='/dev/log', facility=SysLogHandler.LOG_LOCAL6)
        self.formatter = logging.Formatter('%(name)s[%(process)d]: [%(levelname)s] %(message)s')
        self.logger.addHandler(self.handler)
        self.handler.setFormatter(self.formatter)

    def info(self, msg):
        print "info: " + msg
        self.logger.setLevel(logging.INFO)
        self.logger.info(msg)


ident='logger_test'
logger = Logger(ident)
logger.info("test syslog messages for python app")

# 执行测试 查看结果
# python logger_test.py 
info: test syslog messages for python app

# tail -1 /var/log/logger_test.log 
2017-04-21T16:30:10.656753+08:00 monkey logger_test[29055]: [INFO] test syslog messages for python app

Kibana 验证rsyslog_to_es Reference

[0] https://www.freeipa.org/page/Howto/Centralised_Logging_with_Logstash/ElasticSearch/Kibana

[1] https://www.loggly.com/ultimate-guide/centralizing-apache-logs/

[2] http://wiki.rsyslog.com/index.php/Working_Apache_and_Rsyslog_configuration

[3] https://askubuntu.com/questions/186592/how-do-i-configure-rsyslog-to-send-logs-from-a-specific-program-to-a-remote-sysl

[4] http://www.rsyslog.com/doc/v8-stable/configuration/filters.html

[5] https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/7/html/System_Administrators_Guide/ch-Viewing_and_Managing_Log_Files.html