绘制电影分类统计条形图
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
file_path = "DataAnalysis/IMDB-Movie-Data.csv"
df = pd.read_csv(file_path)
# 统计分类列表
# 对索引得到的每条数据转换成字符串,再将字符串按“,”分割转换成一个列表,得到temp_list是一个二维列表
temp_list = df["Genre"].str.split(",").tolist()
# 取二维列表中的每个列表进行去重,得到去重后的一维列表,列表内容为电影分类
genre_list = list(set([i for j in temp_list for i in j]))
# 构造为全0的数组
zero_df = pd.DataFrame(np.zeros((df.shape[0], len(genre_list))), columns=genre_list)
# 给每个电影出现的分类位置赋值1
for i in range(df.shape[0]):
zero_df.loc[i, temp_list[i]] = 1
# 统计每个分类的电影的数量和
genre_count = zero_df.sum(axis=0)
# 排序
genre_count = genre_count.sort_values()
_x = genre_count.index
_y = genre_count.values
# 画图
plt.bar(range(len(_x)), _y)
plt.xticks(range(len(_x)), _x)
plt.show()

统计星巴克数量
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
file_path = "DataAnalysis/starbucks_store_worldwide.csv"
df = pd.read_csv(file_path)
grouped = df.groupby("Country")
# 统计美国和中国星巴克数量
print(grouped["Brand"].count()["CN"])
print(grouped["Brand"].count()["US"])
# 统计中国每个省店铺的数量
china_data = df[df["Country"]=="CN"]
grouped = china_data.groupby("State/Province")["Brand"].count()
每个中国每个城市的店铺数量
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import font_manager
font = font_manager.FontProperties(fname="/usr/share/fonts/truetype/arphic-gbsn00lp/gbsn00lp.ttf")
file_path = "DataAnalysis/starbucks_store_worldwide.csv"
df = pd.read_csv(file_path)
df = df[df["Country"]=="CN"]
data1 = df.groupby(by="City")["Brand"].count().sort_values(ascending=False)[:50]
_x = data1.index
_y = data1.values
plt.bar(range(len(_x)), _y)
plt.xticks(range(len(_x)), _x, fontproperties=font, rotation=45)
plt.show()

统计不同年份书的数量
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import font_manager
from pandas.core import groupby
file_path = "DataAnalysis/books.csv"
df = pd.read_csv(file_path)
df = df[pd.notnull(df["original_publication_year"])]
grouped = df.groupby(by="original_publication_year")["title"].count()
统计不同年份书的平均评分情况
grouped = df["average_rating"].groupby(by=df["original_publication_year"]).mean()
_x = grouped.index
_y = grouped.values
plt.plot(range(len(_x)), _y)
plt.xticks(list(range(len(_x)))[::10], _x[::10].astype(int), rotation=45)
plt.show()

版权声明:本文为caoDanLife原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。