python 日志统计可视化极简教程

  • 解决日志统计可视化问题
  • 输出词云、词频文件
  • 解析json 并结合pandas进行数据分析
  • 基于pyecharts词云可视化
pip install pandas
pip install pyecharts
import json
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import WordCloud


class Tool:
    def __init__(self):
        pass

    def stat(self):
        # 1、修改分析日志文件名:log.txt
        with open('log.txt', 'r', encoding='utf-8') as f_r:
            lines = f_r.readlines()
            data = []
            for i, v in enumerate(lines):
                # 2、将前缀和json数据一分为二([2020-11-11 02:45:05] DEBUG json)
                if 'DEBUG ' in v:
                    data_obj = json.loads(v.split('DEBUG ')[-1])
                    # 3、取节点数据
                    data += data_obj['data']['list']
            self.out(data=data, item_name='item1')  # 需要分析的列
            self.out(data=data, item_name='content')  # 需要分析的列

    def out(self, data, item_name):
        df = pd.DataFrame(data)
        print(f'{item_name}词频统计:')
        room_id_series = df[item_name].value_counts()
        wc = {'词': list(room_id_series.index), '词频': room_id_series.tolist()}
        wc_df = pd.DataFrame(wc)
        wc_df.to_csv(f'./data/{item_name}.txt', index=False)
        print(room_id_series)
        data = list(zip(room_id_series.index, room_id_series.tolist()))
        c = (
            WordCloud(init_opts=opts.InitOpts(width='900px', height='400px')).add(
                "", data
            ).set_global_opts(
                title_opts=opts.TitleOpts(title=f"{item_name}词频统计", subtitle=f"总量:{sum(room_id_series.tolist())}")
            ).render(f"./data/{item_name}_cloud.html")
        )


if __name__ == '__main__':
    tool = Tool()
    tool.stat()

版权声明:本文为xiaoc100200原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。