文章目录
1 简介
🔥 Hi,大家好,这里是丹成学长的毕设系列文章!
🔥 对毕设有任何疑问都可以问学长哦!
这两年开始,各个学校对毕设的要求越来越高,难度也越来越大… 毕业设计耗费时间,耗费精力,甚至有些题目即使是专业的老师或者硕士生也需要很长时间,所以一旦发现问题,一定要提前准备,避免到后面措手不及,草草了事。
为了大家能够顺利以及最少的精力通过毕设,学长分享优质毕业设计项目,今天要分享的新项目是
🚩 奥运会数据集分析
🥇学长这里给一个题目综合评分(每项满分5分)
- 难度系数:4分
- 工作量:4分
- 创新点:3分
🧿 选题指导, 项目分享:
https://gitee.com/yaa-dc/BJH/blob/master/gg/cc/README.md
2 导入包+基本的数据处理
from plotly import __version__
print(__version__)from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from plotly.graph_objs import*import colorlover as cl
# import seaborn as sns# color = sns.color_palette()
f_p ='/home/kesci/input/olympic/athlete_events.csv'
athlete_events = pd.read_csv(f_p)
3 生成奥运会运动项目的词云
from wordcloud import WordCloud, STOPWORDS
stopwords =set(STOPWORDS)defshow_wordcloud(data, title =None):
wordcloud = WordCloud(
background_color='white',
stopwords=stopwords,
max_words=200,
max_font_size=40,
scale=3,
random_state=1# chosen at random by flipping a coin; it was heads).generate(str(data))
fig = plt.figure(1, figsize=(15,15))
plt.axis('off')if title:
fig.suptitle(title, fontsize=20)
fig.subplots_adjust(top=2.3)
plt.imshow(wordcloud)
plt.show()
show_wordcloud(athlete_events['Sport'], title ="往届奥运比赛项目词云")
4 查看参赛者的男女基本信息
fig ={"data":[{"values": athlete_events['Sex'].value_counts(),"labels":["男性","女性",],"marker":{'colors': cl.scales['5']['div']['PuOr']},"name":"参赛者的男女比例","hoverinfo":"label+percent+name","hole":.4,"type":"pie"}],"layout":{"title":"参赛者的男女比例"}}
iplot(fig, filename='donut')
5 在120年来Top 20得金牌最多的国家
# 根据奖牌类型分组,分别计算每个国家的不同奖牌数并给予这列数值'Medal_Count'的列名。
country_medal = athlete_events.groupby(by =['Medal']).Team.value_counts().reset_index(name ='Medal_Count')# 筛选出金牌类型的df,根据字段Medal_Count降序排列国家,选出前20个记录。
top20_country_medal = country_medal[country_medal.Medal =='Gold'].sort_values(by =['Medal_Count'], ascending =False).head(20)# 绘制柱状图📊
trace = Bar(
x = top20_country_medal.Team,
y = top20_country_medal.Medal_Count,
marker =dict(color = cl.scales['11']['div']['PuOr'], reversescale =True))
layout = Layout(title ="Top 20 的金牌数🏅最多的国家")# 设置layout
data =[trace]
fig = Figure(data = data, layout = layout)
iplot(fig)
6 中国历届奥运会运动员获得奖牌人数
china = athlete_events[athlete_events.Team =='China']
china_medal = china.groupby(by ='Year').Medal.value_counts().reset_index(name ="medal_count")
y0 = china_medal[china_medal.Medal =='Gold'].medal_count
x0 = china_medal[china_medal.Medal =='Gold'].Year
y1 = china_medal[china_medal.Medal =='Silver'].medal_count
x1 = china_medal[china_medal.Medal =='Silver'].Year
y2 = china_medal[china_medal.Medal =='Bronze'].medal_count
x2 = china_medal[china_medal.Medal =='Bronze'].Year
x = china_medal.Year
trace0 = Bar(
x = x0,
y = y0,
name ='Gold',
text = y0,
textposition ='auto',
marker=dict(
color='gold',
line=dict(
color='rgb(8,48,107)',
width=1.5),),
opacity=0.6)
trace1 = Bar(
x = x1,
y = y1,
name ='Silver',
text = y1,
textposition ='auto',
marker=dict(
color ='silver',
line=dict(
color='rgb(8,48,107)',
width=1.5),),
opacity=0.6)
trace2 = Bar(
x = x2,
y = y2,
name ='Bronze',
text = y2,
textposition ='auto',
marker=dict(
color='olive',
line=dict(
color='rgb(8,48,107)',
width=1.5),),
opacity=0.6)
data =[trace0, trace1, trace2]
layout = Layout(
barmode='group',
width=800,
hovermode='closest',
title ='中国历届奥运会运动员获得 金牌🏅 银牌🥈 铜牌🥉 的人数')
fig = Figure(data = data, layout = layout)
iplot(fig)
7 Top 10 中国🇨🇳的强项运动项目
# 根据运动项目group,分别计算group内的值之合
china_sports = china_gold.groupby(by ='Sport',as_index=False).Medal.agg('sum')# 按从大到小的顺序排序
china_sports = china_sports.sort_values(['Medal'], ascending=False)# 选出前10的运动项目
top10_china_sports = china_sports.head(10)# 定义颜色盘
colors =['#91BBF4','#91F4F4','#F79981','#F7E781','#C0F781','rgb(32,155,160)','rgb(253,93,124)','rgb(28,119,139)','rgb(182,231,235)','rgb(35,154,160)']
n_phase = top10_china_sports.Sport.shape[0]
plot_width =200# 绘制宽度
section_h =100# section的高度
section_d =15# sections之间的间隔# 用来计算其他section的宽度的乘系数
unit_width = plot_width /max(top10_china_sports['Medal'])# 200 / 56 = 3.57# 每个漏斗部分相对于绘图宽度的宽度
phase_w =[int(v * unit_width)for v in top10_china_sports['Medal']]# 绘制图的总高度
height = section_h * n_phase + section_d *(n_phase -1)
shapes =[]# 列表存储所有的绘制形状
label_y =[]# 列表存储每个section的name、value文本的Y轴地址for i inrange(n_phase):if(i == n_phase -1):
points =[phase_w[i]/2, height, phase_w[i]/2, height - section_h]else:
points =[phase_w[i]/2, height, phase_w[i+1]/2, height - section_h]
path ='M {0} {1} L {2} {3} L -{2} {3} L -{0} {1} Z'.format(*points)
shape ={'type':'path','path': path,'fillcolor': colors[i],'line':{'width':1,'color': colors[i]}}
shapes.append(shape)# Y-axis location for this section's details (text)
label_y.append(height -(section_h /2))
height = height -(section_h + section_d)
label_trace = Scatter(
x=[-200]*n_phase,
y=label_y,
mode='text',
text= top10_china_sports['Sport'],
textfont=dict(
color='rgb(200,200,200)',
size=15))# For phase values
value_trace = Scatter(
x=[-350]*n_phase,
y=label_y,
mode='text',
text=top10_china_sports['Medal'],
textfont=dict(
color='rgb(200,200,200)',
size=12))
data =[label_trace, value_trace]
layout = Layout(
title="<b>Top 10 中国🇨🇳的强项运动项目</b>",
titlefont=dict(
size=12,
color='rgb(203,203,203)'),
shapes=shapes,
height=600,
width=800,
showlegend=False,
paper_bgcolor='rgba(44,58,71,1)',
plot_bgcolor='rgba(44,58,71,1)',
xaxis=dict(
showticklabels=False,
zeroline=False,),
yaxis=dict(
showticklabels=False,
zeroline=False))
fig = Figure(data=data, layout=layout)
iplot(fig)
8 最后
版权归原作者 caxiou 所有, 如有侵权,请联系我们删除。