インストール
# yum install epel-release # yum install python2-pip # pip install apache-log-parser
バグ(?)修正
# sed -i -r -e "s/make_regex\('%I'\), '\.\*\?'/make_regex('%I'), '\\\d+'/" -e "s/make_regex\('%O'\), '\.\*\?'/make_regex('%O'), '\\\d+'/" /usr/lib/python2.7/site-packages/apache_log_parser/__init__.py
これをしないと%Iや%Oがログ最後尾に出力される場合に、出力がおかしくなる。
サンプルコード(a.py)
# -*- coding:utf-8 -*- import apache_log_parser LOG = '/var/log/httpd/access_log' DATA = open(LOG, 'r') parser = apache_log_parser.make_parser('%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %I %O') for line in DATA: log_data = parser(line) print('#' * 50) for k,v in log_data.items(): print(k,v)
テストログ(/var/log/httpd/access_log)
192.168.56.101 - - [08/Jul/2018:15:52:11 +0900] "POST /wp/wp-cron.php?doing_wp_cron=1531032731.9390690326690673828125 HTTP/1.1" 200 - "http://192.168.56.101/wp/wp-cron.php?doing_wp_cron=1531032731.9390690326690673828125" "WordPress/4.9.7; http://192.168.56.101/wp" 385 202 192.168.56.1 - - [08/Jul/2018:19:32:59 +0900] "GET /wp/ HTTP/1.1" 200 61156 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36" 694 61611
実行結果
################################################## ('status', '200') ('request_url_query', 'doing_wp_cron=1531032731.9390690326690673828125') ('request_header_user_agent__browser__family', 'WordPress') ('request_url_username', None) ('request_first_line', 'POST /wp/wp-cron.php?doing_wp_cron=1531032731.9390690326690673828125 HTTP/1.1') ('time_received_utc_isoformat', '2018-07-08T06:52:11+00:00') ('time_received_utc_datetimeobj', datetime.datetime(2018, 7, 8, 6, 52, 11, tzinfo='0000')) ('request_url_hostname', None) ('request_url_query_simple_dict', {'doing_wp_cron': '1531032731.9390690326690673828125'}) ('response_bytes_clf', '-') ('request_header_user_agent__os__family', 'Other') ('request_url_scheme', '') ('request_url', '/wp/wp-cron.php?doing_wp_cron=1531032731.9390690326690673828125') ('request_url_query_dict', {'doing_wp_cron': ['1531032731.9390690326690673828125']}) ('request_url_query_list', [('doing_wp_cron', '1531032731.9390690326690673828125')]) ('request_http_ver', '1.1') ('request_header_referer', 'http://192.168.56.101/wp/wp-cron.php?doing_wp_cron=1531032731.9390690326690673828125') ('remote_user', '-') ('request_url_port', None) ('request_header_user_agent__is_mobile', False) ('request_header_user_agent__browser__version_string', '4.9.7') ('request_url_fragment', '') ('remote_host', '192.168.56.101') ('request_header_user_agent', 'WordPress/4.9.7; http://192.168.56.101/wp') ('request_header_user_agent__os__version_string', '') ('time_received_tz_datetimeobj', datetime.datetime(2018, 7, 8, 15, 52, 11, tzinfo='0900')) ('remote_logname', '-') ('request_url_path', '/wp/wp-cron.php') ('request_url_password', None) ('time_received_datetimeobj', datetime.datetime(2018, 7, 8, 15, 52, 11)) ('bytes_tx', '202') ('request_method', 'POST') ('time_received_tz_isoformat', '2018-07-08T15:52:11+09:00') ('time_received_isoformat', '2018-07-08T15:52:11') ('request_url_netloc', '') ('bytes_rx', '385') ('time_received', '[08/Jul/2018:15:52:11 +0900]') ################################################## ('status', '200') ('request_url_query', '') ('request_header_user_agent__browser__family', 'Chrome') ('request_url_username', None) ('request_first_line', 'GET /wp/ HTTP/1.1') ('time_received_utc_isoformat', '2018-07-08T10:32:59+00:00') ('time_received_utc_datetimeobj', datetime.datetime(2018, 7, 8, 10, 32, 59, tzinfo='0000')) ('request_url_hostname', None) ('request_url_query_simple_dict', {}) ('response_bytes_clf', '61156') ('request_header_user_agent__os__family', u'Mac OS X') ('request_url_scheme', '') ('request_url', '/wp/') ('request_url_query_dict', {}) ('request_url_query_list', []) ('request_http_ver', '1.1') ('request_header_referer', '-') ('remote_user', '-') ('request_url_port', None) ('request_header_user_agent__is_mobile', False) ('request_header_user_agent__browser__version_string', '65.0.3325') ('request_url_fragment', '') ('remote_host', '192.168.56.1') ('request_header_user_agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36') ('request_header_user_agent__os__version_string', '10.13.3') ('time_received_tz_datetimeobj', datetime.datetime(2018, 7, 8, 19, 32, 59, tzinfo='0900')) ('remote_logname', '-') ('request_url_path', '/wp/') ('request_url_password', None) ('time_received_datetimeobj', datetime.datetime(2018, 7, 8, 19, 32, 59)) ('bytes_tx', '61611') ('request_method', 'GET') ('time_received_tz_isoformat', '2018-07-08T19:32:59+09:00') ('time_received_isoformat', '2018-07-08T19:32:59') ('request_url_netloc', '') ('bytes_rx', '694') ('time_received', '[08/Jul/2018:19:32:59 +0900]')
出力項目とApacheのログ対応表は以下の通り
参考URLより引用