import pandas as pd
import numpy as np
import time
import jieba
import datetime
from pyecharts import options as opts
from pyecharts.charts import *
from pyecharts.commons.utils import JsCode
from pyecharts.components import Table
from pyecharts.options import ComponentTitleOpts
df = pd.read_excel('B站新榜_总粉丝数榜单.xlsx')
del df['头像']
df.head(2)
|
数据更新时间 |
创作领域 |
等级 |
获赞数 |
mid |
up主 |
up主标签 |
播放数 |
性别 |
类型 |
平均获赞数 |
平均播放数 |
充电人数 |
总粉丝人数 |
作品数 |
0 |
2022-05-15 14:14:58 |
生活 |
6 |
6104495 |
321173469 |
哔哩哔哩大会员 |
哔哩哔哩大会员官方账号 |
2939318 |
男 |
生活 |
290690.2 |
139967.5 |
351 |
24620688 |
21 |
1 |
2022-05-15 14:14:58 |
生活 |
6 |
91718101 |
9824766 |
敬汉卿 |
bilibili 2020百大UP主、2019年度弹幕人气奖UP主 |
1588762824 |
男 |
生活 |
77793.1 |
1347551.2 |
20379 |
9272243 |
1179 |
type = df['创作领域'].value_counts().index.tolist()
type
['娱乐',
'游戏',
'国创',
'动物圈',
'动画',
'影视',
'番剧',
'纪录片',
'资讯',
'鬼畜',
'数码',
'舞蹈',
'美食',
'知识',
'运动',
'科技',
'时尚',
'汽车',
'音乐',
'生活',
'电视剧',
'电影']
headers = df.columns.tolist()[1:]
headers
['创作领域',
'等级',
'获赞数',
'mid',
'up主',
'up主标签',
'播放数',
'性别',
'类型',
'平均获赞数',
'平均播放数',
'充电人数',
'总粉丝人数',
'作品数']
tab = Tab()
for i in range(len(type)):
tab_category = df[df['创作领域']==type[i]]
rows = tab_category[headers].apply(lambda x: list(x), axis=1).values.tolist()
table = (
Table()
.add(
headers,
rows,
attributes={
'class':'fl_table',
'style':'margin: 0 auto'
}
)
.set_global_opts(
title_opts=ComponentTitleOpts(
title=f'{type[i]} - 总粉丝排行榜单Top50',
subtitle='更新时间:2022-05-15 充电人数为负是数据本身问题'
)
)
)
tab.add(table,type[i])
tab.render_notebook()
def bar_chart(desc, title_pos, num):
df_t = df.sort_values(by=[desc], ascending=False).head(num)
chart = (
Bar()
.add_xaxis(
df_t['up主'].tolist()
)
.add_yaxis(
'',
df_t[desc].tolist()
)
.set_global_opts(
xaxis_opts=opts.AxisOpts(
is_scale=True,
axislabel_opts={'rotate':'90'},
splitline_opts=opts.SplitLineOpts(
is_show=True,
linestyle_opts=opts.LineStyleOpts(
type_='dashed'
)
)
),
yaxis_opts=opts.AxisOpts(
is_scale=True,
name='',
type_='value',
splitline_opts=opts.SplitLineOpts(
is_show=True,
linestyle_opts=opts.LineStyleOpts(
type_='dashed'
)
)
),
tooltip_opts=opts.TooltipOpts(
trigger='axis',
axis_pointer_type='shadow'
),
title_opts=opts.TitleOpts(
title='up主-'+desc,
subtitle=f'👇👇👇👇',
pos_left=title_pos[0],
pos_top=title_pos[1],
title_textstyle_opts=opts.TextStyleOpts(
color='#42B983',
font_size=16
),
)
)
)
return chart
def transform_fans(x):
if x <= 100000:
return '10w'
elif x <= 500000:
return '10w~50w'
elif x <= 1000000:
return '50w~100w'
elif x <= 2000000:
return '100w~200w'
elif x <= 3000000:
return '200w~300w'
elif x <= 4000000:
return '300w~400w'
elif x <= 5000000:
return '400w~500w'
elif x <= 6000000:
return '500w~600w'
elif x <= 7000000:
return '600w~700w'
elif x <= 8000000:
return '700w~800w'
elif x <= 9000000:
return '800w~900w'
elif x <= 10000000:
return '900w~1000w'
else:
return '>1000w'
def transform_work(x):
if x <= 100:
return '0~100'
elif x <= 200:
return '100~200'
elif x <= 300:
return '200~300'
elif x <= 400:
return '300~400'
elif x <= 500:
return '400~500'
else:
return '>500'
def pie_chart():
df['fans_cut'] = df['总粉丝人数'].apply(lambda x: transform_fans(x))
df_f = df['fans_cut'].value_counts()
fans_pairs = [list(z) for z in zip(df_f.index.tolist(),df_f.values.tolist())]
df['works_cut'] = df['作品数'].apply(lambda x: transform_work(x))
df_w = df['works_cut'].value_counts()
works_pairs = [list(z) for z in zip(df_w.index.tolist(), df_w.values.tolist())]
pie = (
Pie()
.add(
'',
fans_pairs,
radius=['55','100'],
center=['30%','90%']
)
.add(
'',
works_pairs,
radius=['55','100'],
center=['75%','90%']
)
.set_series_opts(label_opts=opts.LabelOpts(formatter='{b}: {c} {d}%'))
.set_global_opts(
legend_opts=opts.LegendOpts(is_show=False),
title_opts=[
dict(
text=f'总粉丝人数区间分布',
left='15',
top='80%',
textStyle=dict(
color='#334B5C',
fontSize=16
)
),
dict(
text='作品数区间分布',
left='60%',
top='80%',
textStyle=dict(
color='#334B5C',
fontSize=16
)
)
]
)
)
return pie
grid = Grid(
init_opts=opts.InitOpts(
width='1000px',
height='2000px',
theme='light'
)
)
grid.add(
bar_chart('获赞数',['15%','1%'],8),
is_control_axis_index=False,
grid_opts=opts.GridOpts(
pos_top='5%',
pos_bottom='85%',
pos_left='15%',
pos_right='50%'
)
)
grid.add(
bar_chart('平均获赞数',['60%','1%'],8),
is_control_axis_index=False,
grid_opts=opts.GridOpts(
pos_top='5%',
pos_bottom='85%',
pos_left='60%',
pos_right='5%'
)
)
grid.add(
bar_chart('播放数',['15%','21%'],8),
is_control_axis_index=False,
grid_opts=opts.GridOpts(
pos_top='25%',
pos_bottom='65%',
pos_left='15%',
pos_right='50%'
)
)
grid.add(
bar_chart('平均播放数',['60%','21%'],8),
is_control_axis_index=False,
grid_opts=opts.GridOpts(
pos_top='25%',
pos_bottom='65%',
pos_left='60%',
pos_right='5%'
)
)
grid.add(
bar_chart('总粉丝人数',['15%','40%'],15),
is_control_axis_index=False,
grid_opts=opts.GridOpts(
pos_top='44%',
pos_bottom='46%',
pos_left='15%',
pos_right='5%'
)
)
grid.add(
bar_chart('作品数',['15%','58%'],15),
is_control_axis_index=False,
grid_opts=opts.GridOpts(
pos_top='61%',
pos_bottom='29%',
pos_left='15%',
pos_right='5%'
)
)
grid.add(
pie_chart(),
is_control_axis_index=False,
grid_opts=opts.GridOpts(
pos_top='25%',
pos_bottom='65%',
pos_left='5%',
pos_right='5%'
)
)
grid.render_notebook()