python获取apache日志并推送数据到elasticsearch
import os
import re
import collections
from elasticsearch import Elasticsearch
def get_all_info(log):
m = re.search(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})', log)
if m is not None:
IP = m.group(1)
else:
IP = ""
print('IP地址:', IP)
sm = re.search(r'^.+? - ', log)
if sm is not None:
source = sm.group()[:-2]
else:
source = ""
# print('来源:', source)
dm = re.search(r'\"http|https?://.+?/\"', log)
if dm is not None:
domain = dm.group()[1:-1]
else:
domain = ""
# print('访问域名:', domain)
um = re.search(r'\"GET (.+?) ', log)
if um is not None:
url = um.group(1)
else:
url = ""
# print('访问地址:', url)
tm = re.search(r'\[.+?\]', log)
if tm is not None:
datetime = tm.group()[1:-1]
else:
datetime = ""
# print('访问时间:', time)
stm = re.search(r'HTTP/1.1\" (\d{3}) ', log)
if stm is not None:
status = stm.group(1)
else:
status = ""
# print('页面打开状态:', status)
bm = re.search(r'" (.+?)\"$', log)
if bm is not None:
browser = bm.group(1)
# print(browser)
browser_m = re.search(r'" "(.+?)$', browser)
if browser_m is not None:
browser = browser_m.group(1)
else:
browser= ''
else:
browser = ""
# print('访问浏览器:', browser)
cm = re.search(r'\[(.+?)\]', log)
if cm is not None:
client = cm.group(1)
# print(client)
else:
client = ""
# print('访问的客户端:', client)
pattern = "\[FBAN\/(.*?);FBDV\/(.*?);FBMD\/(.*?);FBSN\/(.*?);FBSV\/(.*?);FBSS\/(.*?);FBID\/(.*?);FBLC\/(.*?);FBOP\/(.*?)\]"
match = re.search(pattern, log)
FBDATA = []
# print({"url":get_access_url(line),"ip":get_ip_address(line),"user":get_user_agent(line),"referer":get_referer(line)})
if match:
FBAN = match.group(1)
FBDV = match.group(2)
FBMD = match.group(3)
FBSN = match.group(4)
FBSV = match.group(5)
FBSS = match.group(6)
FBID = match.group(7)
FBLC = match.group(8)
FBOP = match.group(9)
FBDATA = {'FBAN':FBAN,'FBDV':FBDV,'FBMD':FBMD,'FBSN':FBSN,'FBSV':FBSV,'FBSS':FBSS,'FBID':FBID,'FBLC':FBLC,'FBOP':FBOP}
body = {"datetime":datetime,"url":url,"ip":IP,'status':status,"browser":browser,"client":client,"fb":FBDATA}
return body;
# 监控access_log日志文件
filename = '/var/log/httpd/access_log'
file_mtime = None
es = Elasticsearch(hosts=[{'host':'137.184.90.17','port':9200,'scheme':'http'}])
offset = -10
with open(filename) as f:
while True:
new_mtime = os.stat(filename).st_mtime
# lines = f.readlines()
if file_mtime != new_mtime:
f.seek(0,2)
last_pos = f.tell()
print(last_pos)
f.seek(last_pos-800,0)
# f.seek(0,1)
lines = f.readline()
print(lines)
# print('access_log has been updated')
#line = ''
#lines = f.readlines()
#print(len(lines))
line=''
if len(lines)>1:
print(len(lines))
# f.seek(0,1)
line = f.readlines()[-1]
else:
# offset *= 2
f.seek(0)
print('<=1')
# last_pos = f.tell()
# f.seek(last_pos)
# line = f.readline()
# print(line)
bodyData = get_all_info(line)
print(bodyData)
# if bodyData != '':
es.index(index="216service_access_log_url", body=bodyData)
file_mtime = new_mtime
版权属于:BLOG DEWEBSTUDIO 本文作者:狒狒
原文地址: http://blog.dewebstudio.com/?post=126
版权声明:转载时必须以链接形式注明原始出处及本声明。
继续浏览: python elasticsearch apache日志
下一篇:openssl升级到3.1版本
发表评论