ためしに作ってみる!
参考URL
GeoIPでIPアドレスから位置情報を取得する[Python] - sonickun.log
GeoLite2 Free Downloadable Databases « Maxmind Developer Site
http://geoip2.readthedocs.io/en/latest/
GeoIPセットアップ
# rpm -ivh ftp://195.220.108.108/linux/centos/7.2.1511/os/x86_64/Packages/GeoIP-devel-1.5.0-9.el7.x86_64.rpm # pip install geoip2 # wget http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz # gunzip GeoLite2-City.mmdb.gz
機能
ユーザー単位で解析する。ログイン傾向から不正ログインを検知する
入力データは以下とする。
ログイン時間,接続元IP
入力データをプログラムで以下データに変換し学習データとする。
①接続元IP(ip_int)
②接続元IPの地域情報(geoid)
④該当IPが新規接続元IPであるかの有無(first)
⑤対象アカウントへの過去M分以内のログイン回数(num_of_login_in_near_time)
⑥対象アカウントへの過去M分以内の接続元地域数(num_of_location_in_near_time)
別の場所からのログインは怪しく見えるから。
誤検知をできるだけ防ぐべく、多次元データで外れ値検知をしてみる。
まだ軽くしか動かしてないので、しっかり動くか分かりません!!
プログラム
login_analyze.py
#!/usr/bin/env python # -*- coding: utf-8 -*- import signal import sys import os import json from jubatus.anomaly import client from jubatus.common import Datum import time import geoip2.database import wget import gzip import dns.resolver import syslog DEBUG = True def exec_study(uname, data, mes): stime = time.time() ip_int = data[0] geoid = data[1] dns_info = data[2] first = data[3] num_of_login_in_near_time = data[4] num_of_location_in_near_time = data[5] datum = Datum() datum.add_number("ip_int", float(ip_int)) datum.add_number("geoid", float(geoid)) datum.add_number("dns_info", float(dns_info)) datum.add_number("first", float(first)) datum.add_number("num_of_login_in_near_time", float(num_of_login_in_near_time)) datum.add_number("num_of_location_in_near_time", float(num_of_location_in_near_time)) anom = client.Anomaly("127.0.0.1", 9199, uname) ret = anom.add(datum) output_log(str(ret) + ": " + mes) def op_srv(OP): if OP[0] == "start": com = "jubaanomaly -f " + OP[1] + "> /dev/null 2>&1 &" else: com = "pkill jubaanomaly > /dev/null 2>&1" if os.system(com): err_fin("jubaanomaly " + OP[0] + " failed") def ip2int(ip): o = map(int, ip.split('.')) ip_int = (16777216 * o[0]) + (65536 * o[1]) + (256 * o[2]) + o[3] return ip_int def output_log(mes): syslog.openlog() syslog.syslog(mes) syslog.closelog() if DEBUG: print(mes) def err_fin(mes): print("Error: " + mes) output_log("Error " + mes) op_srv(["stop"]) exit(1) def get_dnsinfo(ip): bls = ["zen.spamhaus.org", "xbl.spamhaus.org", "pbl.spamhaus.org"] answers = list() for bl in bls: my_resolver = dns.resolver.Resolver() query = '.'.join(reversed(str(ip).split("."))) + "." + bl try: answers += my_resolver.query(query, "A") except dns.resolver.NXDOMAIN: pass except: err_fin("dns loolup failed") return len(answers) def get_location(ip): db_file = "./GeoLite2-City.mmdb" if not os.path.isfile(db_file): ret = os.system("wget -q http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz; gunzip -f ./GeoLite2-City.mmdb.gz") if ret: print("Error: wget failed") reader = geoip2.database.Reader(db_file) country = str() geoid = int() try: response = reader.city(ip) except: geoid = 0 country = "UNKNOWN" else: geoid = response.city.geoname_id country = response.country.name if not geoid: geoid = 0 if not country: country = "UNKNOWN" return geoid, country def get_args(): if len(sys.argv) != 3: err_fin(" Invalid args") config = sys.argv[1] uname = sys.argv[2] if not os.path.isfile(config): err_fin(config + " does not exist") return config, uname def do_exit(sig, stack): print('You pressed Ctrl+C.') print('Stop running the job.') sys.exit(0) def main(): signal.signal(signal.SIGINT, do_exit) config, uname = get_args() ips = set() login_data = list() op_srv(["start",config]) time.sleep(5) while True: try: line = raw_input() except EOFError: op_srv(["stop"]) exit() if not line: next ltime = int(line.split(",")[0]) ip = line.split(",")[1] ip_int = ip2int(ip) geoid, country = get_location(ip) dns_info = get_dnsinfo(ip) if ip in ips: first = 0 else: first = 1 ips.add(ip) login_data.append([ltime, geoid]) near_time_info = [i for i in login_data if ltime - (60 * 60 * 3) <= i[0] <= ltime] num_of_login_in_near_time = len(near_time_info) tmp_set = set() for i in near_time_info: tmp_set.add(i[1]) num_of_location_in_near_time = len(tmp_set) mes = "uname: " + uname + " " mes += "ltime: " + str(ltime) + " " mes += "ip: " + ip + " " mes += "ip_int: " + str(ip_int) + " " mes += "country: " + country + " " mes += "geoid: " + str(geoid) + " " mes += "dns_info: " + str(dns_info) + " " mes += "first: " + str(first) + " " mes += "num_of_login_in_near_time: " + str(num_of_login_in_near_time) + " " mes += "num_of_location_in_near_time: " + str(num_of_location_in_near_time) + " " data = [ip_int, geoid, dns_info, first, num_of_login_in_near_time, num_of_location_in_near_time] exec_study(uname, data, mes) if __name__ == '__main__': main()
コンフィグファイル
login_analyze.json
{ "method" : "lof", "parameter" : { "nearest_neighbor_num" : 10, "reverse_nearest_neighbor_num" : 30, "method" : "euclid_lsh", "ignore_kth_same_point" : true, "parameter" : { "hash_num" : 8, "table_num" : 16, "probe_num" : 64, "bin_width" : 10, "seed" : 1234 } }, "converter" : { "string_filter_types": {}, "string_filter_rules": [], "num_filter_types": {}, "num_filter_rules": [], "string_types": {}, "string_rules": [{"key":"*", "type":"str", "global_weight" : "bin", "sample_weight" : "bin"}], "num_types": {}, "num_rules": [ {"key" : "ip_int", "type" : "str"}, {"key" : "geoid", "type" : "str"}, {"key" : "dns_info", "type" : "num"}, {"key" : "first", "type" : "num"}, {"key" : "num_of_login_in_near_time", "type" : "num"}, {"key" : "num_of_location_in_near_time", "type" : "num"} ] } }
入力データ作成用ちょいスクリプト
make_data.py
#!/usr/bin/env python # -*- coding: utf-8 -*- import re import signal import datetime import time import sys import os from operator import itemgetter def do_exit(): exit(1) signal.signal(signal.SIGINT, do_exit) data = list() for log in sys.argv[1:]: if os.path.isfile(log): ext = log.split(".") if not ext[1]: next y = (ext[1][0:4]) for line in open(log, 'r'): line = line.strip() ptrn1 = re.compile(r'^(\S+)\s+(\d+)\s+(\d{2}:\d{2}:\d{2})(.*)$') mobj1 = ptrn1.match(line) if mobj1: m = mobj1.group(1) d = mobj1.group(2) t = mobj1.group(3) obj = datetime.datetime.strptime(str(y) + m + str(d) + t, '%Y%b%d%H:%M:%S') utime = int(time.mktime(obj.timetuple())) # print(utime) mes = mobj1.group(4) # SSHログイン ptrn2 = re.compile(r'^.*Accepted publickey for .* from (\S+) .*$') mobj2 = ptrn2.match(mes) if mobj2: ip = mobj2.group(1) data.append([utime, ip]) # 他アプリのログインデータを追加 #ptrn2 = re.compile(r'^.*Accepted publickey for .* from (\S+) .*$') #mobj2 = ptrn2.match(mes) # if mobj2: # ip = mobj2.group(1) # data.append([utime, ip]) data.sort(key=itemgetter(0)) for i in data: print(str(i[0]) + "," + i[1])
実行方法
# python ./make_data.py /var/log/secure.20161010 /var/log/secure.20160918 | python ./login_analyze.py ./login_analyze.json hoge