pikesaku’s blog

個人的な勉強メモです。記載内容について一切の責任は持ちません。

Jubatus外れ値検知機能を使い不正ログイン検知

ためしに作ってみる!

GeoIPセットアップ

# rpm -ivh ftp://195.220.108.108/linux/centos/7.2.1511/os/x86_64/Packages/GeoIP-devel-1.5.0-9.el7.x86_64.rpm
# pip install geoip2
# wget http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz
# gunzip GeoLite2-City.mmdb.gz

機能

ユーザー単位で解析する。ログイン傾向から不正ログインを検知する
入力データは以下とする。
ログイン時間,接続元IP

入力データをプログラムで以下データに変換し学習データとする。

①接続元IP(ip_int)

②接続元IPの地域情報(geoid)

③接続元IPのDNSBL登録状況(dns_info)

④該当IPが新規接続元IPであるかの有無(first)

⑤対象アカウントへの過去M分以内のログイン回数(num_of_login_in_near_time)

⑥対象アカウントへの過去M分以内の接続元地域数(num_of_location_in_near_time)
 別の場所からのログインは怪しく見えるから。

誤検知をできるだけ防ぐべく、多次元データで外れ値検知をしてみる。

まだ軽くしか動かしてないので、しっかり動くか分かりません!!

プログラム

login_analyze.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import signal
import sys
import os
import json
from jubatus.anomaly import client
from jubatus.common import Datum
import time
import geoip2.database
import wget
import gzip
import dns.resolver
import syslog

DEBUG = True


def exec_study(uname, data, mes):
    stime = time.time()

    ip_int = data[0]
    geoid = data[1]
    dns_info = data[2]
    first = data[3]
    num_of_login_in_near_time = data[4]
    num_of_location_in_near_time = data[5]

    datum = Datum()

    datum.add_number("ip_int", float(ip_int))
    datum.add_number("geoid", float(geoid))
    datum.add_number("dns_info", float(dns_info))
    datum.add_number("first", float(first))
    datum.add_number("num_of_login_in_near_time", float(num_of_login_in_near_time))
    datum.add_number("num_of_location_in_near_time", float(num_of_location_in_near_time))
    anom = client.Anomaly("127.0.0.1", 9199, uname)
    ret = anom.add(datum)
    output_log(str(ret) + ": " +  mes)


def op_srv(OP):
    if OP[0] == "start":
        com = "jubaanomaly -f " + OP[1] + "> /dev/null 2>&1 &"
    else:
        com = "pkill jubaanomaly > /dev/null 2>&1"
    if os.system(com):
        err_fin("jubaanomaly " + OP[0] + " failed")


def ip2int(ip):
    o = map(int, ip.split('.'))
    ip_int = (16777216 * o[0]) + (65536 * o[1]) + (256 * o[2]) + o[3]
    return ip_int


def output_log(mes):
    syslog.openlog()
    syslog.syslog(mes)
    syslog.closelog()
    if DEBUG:
        print(mes)


def err_fin(mes):
    print("Error: " + mes)
    output_log("Error " + mes)
    op_srv(["stop"])
    exit(1)


def get_dnsinfo(ip):
    bls = ["zen.spamhaus.org", "xbl.spamhaus.org", "pbl.spamhaus.org"]
    answers = list()
    for bl in bls:
        my_resolver = dns.resolver.Resolver()
        query = '.'.join(reversed(str(ip).split("."))) + "." + bl
        try:
            answers += my_resolver.query(query, "A")
        except dns.resolver.NXDOMAIN:
            pass
        except:
            err_fin("dns loolup failed")
    return len(answers)


def get_location(ip):
    db_file = "./GeoLite2-City.mmdb"
    if not os.path.isfile(db_file):
        ret = os.system("wget -q http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz; gunzip -f ./GeoLite2-City.mmdb.gz")
        if ret:
            print("Error: wget failed")
    reader = geoip2.database.Reader(db_file)

    country = str()
    geoid = int()

    try:
        response = reader.city(ip)
    except:
        geoid = 0
        country = "UNKNOWN"
    else:
        geoid = response.city.geoname_id
        country = response.country.name
        if not geoid:
            geoid = 0
        if not country:
            country = "UNKNOWN"
    return geoid, country


def get_args():
    if len(sys.argv) != 3:
        err_fin(" Invalid args")

    config = sys.argv[1]
    uname = sys.argv[2]

    if not os.path.isfile(config):
        err_fin(config + " does not exist")
    return config, uname


def do_exit(sig, stack):
    print('You pressed Ctrl+C.')
    print('Stop running the job.')
    sys.exit(0)


def main():
    signal.signal(signal.SIGINT, do_exit)
    config, uname = get_args()

    ips = set()
    login_data = list()

    op_srv(["start",config])
    time.sleep(5)
 
    while True:
        try:
            line = raw_input()
        except EOFError:
            op_srv(["stop"])
            exit()

        if not line:
            next

        ltime = int(line.split(",")[0])
        ip = line.split(",")[1]
        ip_int = ip2int(ip)

        geoid, country = get_location(ip)

        dns_info = get_dnsinfo(ip)

        if ip in ips:
            first = 0
        else:
            first = 1
        ips.add(ip)

        login_data.append([ltime, geoid])

        near_time_info = [i for i in login_data if ltime - (60 * 60 * 3) <= i[0] <= ltime]
        num_of_login_in_near_time = len(near_time_info)

        tmp_set = set()
        for i in near_time_info:
            tmp_set.add(i[1])
        num_of_location_in_near_time = len(tmp_set)

        mes = "uname: " + uname + " "
        mes += "ltime: " + str(ltime) + " "
        mes += "ip: " + ip + " "
        mes += "ip_int: " + str(ip_int) + " "
        mes += "country: " + country + " "
        mes += "geoid: " + str(geoid) + " "
        mes += "dns_info: " + str(dns_info) + " "
        mes += "first: " + str(first) + " "
        mes += "num_of_login_in_near_time: " + str(num_of_login_in_near_time) + " "
        mes += "num_of_location_in_near_time: " + str(num_of_location_in_near_time) + " "

        data = [ip_int, geoid, dns_info, first, num_of_login_in_near_time, num_of_location_in_near_time]

        exec_study(uname, data, mes)


if __name__ == '__main__':
    main()

コンフィグファイル

login_analyze.json

{
 "method" : "lof",
 "parameter" : {
  "nearest_neighbor_num" : 10,
  "reverse_nearest_neighbor_num" : 30,
  "method" : "euclid_lsh",
  "ignore_kth_same_point" : true,
  "parameter" : {
   "hash_num" : 8,
   "table_num" : 16,
   "probe_num" : 64,
   "bin_width" : 10,
   "seed" : 1234
  }
 },

 "converter" : {
  "string_filter_types": {},
  "string_filter_rules": [],
  "num_filter_types": {},
  "num_filter_rules": [],
  "string_types": {},
  "string_rules": [{"key":"*", "type":"str", "global_weight" : "bin", "sample_weight" : "bin"}],
  "num_types": {},
  "num_rules": [
    {"key" : "ip_int", "type" : "str"},
    {"key" : "geoid", "type" : "str"},
    {"key" : "dns_info", "type" : "num"},
    {"key" : "first", "type" : "num"},
    {"key" : "num_of_login_in_near_time", "type" : "num"},
    {"key" : "num_of_location_in_near_time", "type" : "num"}
  ]
 }
}

入力データ作成用ちょいスクリプト

make_data.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re
import signal
import datetime
import time
import sys
import os
from operator import itemgetter


def do_exit():
    exit(1)

signal.signal(signal.SIGINT, do_exit)

data = list()
for log in sys.argv[1:]:
    if os.path.isfile(log):
        ext = log.split(".")
        if not ext[1]:
            next
        y = (ext[1][0:4])

        for line in open(log, 'r'):
            line = line.strip()
            ptrn1 = re.compile(r'^(\S+)\s+(\d+)\s+(\d{2}:\d{2}:\d{2})(.*)$')
            mobj1 = ptrn1.match(line)
            if mobj1:
                m = mobj1.group(1)
                d = mobj1.group(2)
                t = mobj1.group(3)
                obj = datetime.datetime.strptime(str(y) + m + str(d) + t, '%Y%b%d%H:%M:%S')
                utime = int(time.mktime(obj.timetuple()))
                # print(utime)

                mes = mobj1.group(4)

                # SSHログイン
                ptrn2 = re.compile(r'^.*Accepted publickey for .* from (\S+) .*$')
                mobj2 = ptrn2.match(mes)
                if mobj2:
                    ip = mobj2.group(1)
                    data.append([utime, ip])

                # 他アプリのログインデータを追加
                #ptrn2 = re.compile(r'^.*Accepted publickey for .* from (\S+) .*$')
                #mobj2 = ptrn2.match(mes)
                # if mobj2:
                #  ip = mobj2.group(1)
                #  data.append([utime, ip])


data.sort(key=itemgetter(0))

for i in data:
    print(str(i[0]) + "," + i[1])

実行方法

# python ./make_data.py  /var/log/secure.20161010 /var/log/secure.20160918 | python ./login_analyze.py ./login_analyze.json hoge