- 解决日志统计可视化问题
- 输出词云、词频文件
- 解析json 并结合pandas进行数据分析
- 基于pyecharts词云可视化
pip install pandas
pip install pyecharts
import json
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import WordCloud
class Tool:
def __init__(self):
pass
def stat(self):
# 1、修改分析日志文件名:log.txt
with open('log.txt', 'r', encoding='utf-8') as f_r:
lines = f_r.readlines()
data = []
for i, v in enumerate(lines):
# 2、将前缀和json数据一分为二([2020-11-11 02:45:05] DEBUG json)
if 'DEBUG ' in v:
data_obj = json.loads(v.split('DEBUG ')[-1])
# 3、取节点数据
data += data_obj['data']['list']
self.out(data=data, item_name='item1') # 需要分析的列
self.out(data=data, item_name='content') # 需要分析的列
def out(self, data, item_name):
df = pd.DataFrame(data)
print(f'{item_name}词频统计:')
room_id_series = df[item_name].value_counts()
wc = {'词': list(room_id_series.index), '词频': room_id_series.tolist()}
wc_df = pd.DataFrame(wc)
wc_df.to_csv(f'./data/{item_name}.txt', index=False)
print(room_id_series)
data = list(zip(room_id_series.index, room_id_series.tolist()))
c = (
WordCloud(init_opts=opts.InitOpts(width='900px', height='400px')).add(
"", data
).set_global_opts(
title_opts=opts.TitleOpts(title=f"{item_name}词频统计", subtitle=f"总量:{sum(room_id_series.tolist())}")
).render(f"./data/{item_name}_cloud.html")
)
if __name__ == '__main__':
tool = Tool()
tool.stat()
版权声明:本文为xiaoc100200原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。