わかったこと
内包表記の方が処理が速い
ファイルハンドラを作成しただけではメモリは確保されない。
ファイルハンドラをフィルタせず不要にリストオブジェクトに代入しないこと
※全データ分メモリがとられる。フィルタすればフィルタ結果の分だけメモリが使われる。
forループはイテレータで動作する。ファイルハンドラをreadlinesしたり、リスト代入すると、全データ分メモリが取られる。
リスト内包表記の方が処理が速い。
調査スクリプト
# -*- coding:utf-8 -*- import subprocess import time import os from memory_profiler import profile def get_rss(): pid = os.getpid() cmd = 'ps -p ' + str(pid) + ' -o rss --no-header' rss = int(subprocess.check_output(cmd, shell=True)) return rss def clear_cache(): cmd = "echo '3' > /proc/sys/vm/drop_caches" subprocess.call(cmd, shell=True) def get_perf_info(obj): obj_name = obj.__name__ clear_cache() start = time.time() pre = get_rss() ret = obj() aft = get_rss() leak = aft - pre etime = round(time.time() - start, 2) mes = 'Obj: {:<10}'.format(obj_name) + ' ' mes += 'Etime: {:>5}'.format(str(etime)) + ' ' mes += 'MemUsed Pre(kb): {:<10}'.format(str(pre)) mes += 'MemUsed Aft(kb): {:<10}'.format(str(aft)) mes += 'Mem Leak(kb): {:<10}'.format(str(leak)) del ret print(mes) print('\n\n') @profile def list_op1(): fh = open('/tmp/5.mbfile', 'r') fh.close() @profile def list_op2(): fh = open('/tmp/5.mbfile', 'r') a = list(fh) fh.close() @profile def list_op3(): fh = open('/tmp/5.mbfile', 'r') a = list(fh) aa = a[0] fh.close() @profile def list_op4(): fh = open('/tmp/5.mbfile', 'r') a = list(fh) aa = a[0] del aa del a fh.close() @profile def list_op5(): fh = open('/tmp/5.mbfile', 'r') a = [ i for i in fh ] fh.close() @profile def list_op6(): fh = open('/tmp/5.mbfile', 'r') a = list() for i in fh: a.append(i) fh.close() @profile def list_op7(): fh = open('/tmp/5.mbfile', 'r') a = [ i for i in fh if 'a' in i] fh.close() @profile def list_op8(): fh = open('/tmp/5.mbfile', 'r') a = list() for i in fh: if 'a' in i: a.append(i) fh.close() @profile def list_op9(): fh = open('/tmp/5.mbfile', 'r') a = list() for i in fh.readlines(): if 'a' in i: a.append(i) fh.close() get_perf_info(list_op1) get_perf_info(list_op2) get_perf_info(list_op3) get_perf_info(list_op4) get_perf_info(list_op5) get_perf_info(list_op6) get_perf_info(list_op7) get_perf_info(list_op8) get_perf_info(list_op9)
実行結果
Filename: ./a.py Line # Mem usage Increment Line Contents ================================================ 41 10.4 MiB 10.4 MiB @profile 42 def list_op1(): 43 10.4 MiB 0.0 MiB fh = open('/tmp/5.mbfile', 'r') 44 10.4 MiB 0.0 MiB fh.close() Obj: list_op1 Etime: 0.07 MemUsed Pre(kb): 10636 MemUsed Aft(kb): 10740 Mem Leak(kb): 104 Filename: ./a.py Line # Mem usage Increment Line Contents ================================================ 47 10.5 MiB 10.5 MiB @profile 48 def list_op2(): 49 10.5 MiB 0.0 MiB fh = open('/tmp/5.mbfile', 'r') 50 19.5 MiB 9.0 MiB a = list(fh) 51 19.5 MiB -0.0 MiB fh.close() Obj: list_op2 Etime: 0.05 MemUsed Pre(kb): 10740 MemUsed Aft(kb): 11060 Mem Leak(kb): 320 Filename: ./a.py Line # Mem usage Increment Line Contents ================================================ 54 10.8 MiB 10.8 MiB @profile 55 def list_op3(): 56 10.8 MiB 0.0 MiB fh = open('/tmp/5.mbfile', 'r') 57 19.4 MiB 8.6 MiB a = list(fh) 58 19.4 MiB 0.0 MiB aa = a[0] 59 19.4 MiB -0.0 MiB fh.close() Obj: list_op3 Etime: 0.06 MemUsed Pre(kb): 11060 MemUsed Aft(kb): 11568 Mem Leak(kb): 508 Filename: ./a.py Line # Mem usage Increment Line Contents ================================================ 62 11.3 MiB 11.3 MiB @profile 63 def list_op4(): 64 11.3 MiB 0.0 MiB fh = open('/tmp/5.mbfile', 'r') 65 19.4 MiB 8.1 MiB a = list(fh) 66 19.4 MiB 0.0 MiB aa = a[0] 67 19.4 MiB 0.0 MiB del aa 68 11.3 MiB -8.1 MiB del a 69 11.3 MiB -0.0 MiB fh.close() Obj: list_op4 Etime: 0.06 MemUsed Pre(kb): 11568 MemUsed Aft(kb): 11568 Mem Leak(kb): 0 Filename: ./a.py Line # Mem usage Increment Line Contents ================================================ 72 11.3 MiB 11.3 MiB @profile 73 def list_op5(): 74 11.3 MiB 0.0 MiB fh = open('/tmp/5.mbfile', 'r') 75 19.4 MiB 8.1 MiB a = [ i for i in fh ] 76 19.4 MiB -0.0 MiB fh.close() Obj: list_op5 Etime: 7.4 MemUsed Pre(kb): 11568 MemUsed Aft(kb): 11576 Mem Leak(kb): 8 Filename: ./a.py Line # Mem usage Increment Line Contents ================================================ 79 11.3 MiB 11.3 MiB @profile 80 def list_op6(): 81 11.3 MiB 0.0 MiB fh = open('/tmp/5.mbfile', 'r') 82 11.3 MiB 0.0 MiB a = list() 83 19.4 MiB 8.1 MiB for i in fh: 84 19.4 MiB 0.0 MiB a.append(i) 85 19.4 MiB -0.0 MiB fh.close() Obj: list_op6 Etime: 14.78 MemUsed Pre(kb): 11576 MemUsed Aft(kb): 11576 Mem Leak(kb): 0 Filename: ./a.py Line # Mem usage Increment Line Contents ================================================ 88 11.3 MiB 11.3 MiB @profile 89 def list_op7(): 90 11.3 MiB 0.0 MiB fh = open('/tmp/5.mbfile', 'r') 91 16.4 MiB 5.1 MiB a = [ i for i in fh if 'a' in i] 92 16.4 MiB -0.0 MiB fh.close() Obj: list_op7 Etime: 7.91 MemUsed Pre(kb): 11576 MemUsed Aft(kb): 11576 Mem Leak(kb): 0 Filename: ./a.py Line # Mem usage Increment Line Contents ================================================ 95 11.3 MiB 11.3 MiB @profile 96 def list_op8(): 97 11.3 MiB 0.0 MiB fh = open('/tmp/5.mbfile', 'r') 98 11.3 MiB 0.0 MiB a = list() 99 16.4 MiB 5.1 MiB for i in fh: 100 16.4 MiB -0.0 MiB if 'a' in i: 101 16.4 MiB -0.0 MiB a.append(i) 102 16.4 MiB -0.0 MiB fh.close() Obj: list_op8 Etime: 19.56 MemUsed Pre(kb): 11576 MemUsed Aft(kb): 11576 Mem Leak(kb): 0 Filename: ./a.py Line # Mem usage Increment Line Contents ================================================ 105 11.3 MiB 11.3 MiB @profile 106 def list_op9(): 107 11.3 MiB 0.0 MiB fh = open('/tmp/5.mbfile', 'r') 108 11.3 MiB 0.0 MiB a = list() 109 19.8 MiB 8.1 MiB for i in fh.readlines(): 110 19.8 MiB 0.0 MiB if 'a' in i: 111 19.8 MiB 0.4 MiB a.append(i) 112 19.8 MiB -0.0 MiB fh.close() Obj: list_op9 Etime: 19.26 MemUsed Pre(kb): 11576 MemUsed Aft(kb): 11040 Mem Leak(kb): -536