Source: Psi Chi
R
Load packages and import data
#library(tidyverse)
library(dplyr)
#library(purrr)
library(ggplot2)
dataset=read.csv('https://osf.io/download/xq8us/')
skimr::skim(dataset)
Data summary
| Name |
dataset |
| Number of rows |
1344 |
| Number of columns |
8 |
| _______________________ |
|
| Column type frequency: |
|
| character |
4 |
| numeric |
4 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| Host_country |
0 |
1 |
5 |
16 |
0 |
21 |
0 |
| Host_city |
0 |
1 |
4 |
19 |
0 |
23 |
0 |
| Country_Name |
0 |
1 |
3 |
32 |
0 |
157 |
0 |
| Country_Code |
0 |
1 |
0 |
3 |
86 |
156 |
0 |
Variable type: numeric
| Year |
0 |
1 |
1978.96 |
33.48 |
1896 |
1956 |
1988 |
2008 |
2020 |
▂▂▃▃▇ |
| Gold |
0 |
1 |
4.07 |
8.45 |
0 |
0 |
1 |
4 |
83 |
▇▁▁▁▁ |
| Silver |
0 |
1 |
4.04 |
7.10 |
0 |
0 |
2 |
4 |
78 |
▇▁▁▁▁ |
| Bronze |
0 |
1 |
4.39 |
6.84 |
0 |
1 |
2 |
5 |
77 |
▇▁▁▁▁ |
Level 1
-Let’s get familiar with our data. Create a table that shows the
frequency of each Country in the dataset.
tab1 =dataset %>%
group_by(Country_Name) %>%
summarise(Frequency=n())
slice_sample(tab1,n=5)
| France |
29 |
| Switzerland |
28 |
| Belarus |
7 |
| Croatia |
8 |
| Argentina |
20 |
Level 2
-Next, let’s look at the representation of each country’s medal
status. Create atable that shows the number of times each country has
won a bronze, silver, and gold medal.
tab2=dataset %>%
group_by(Country_Name) %>%
summarise(Gold = sum(Gold), Silver = sum(Silver), Bronze = sum(Bronze))
slice_sample(tab2,n=5)
| Sri Lanka |
0 |
1 |
0 |
| Lithuania |
6 |
7 |
13 |
| Bahrain |
2 |
2 |
0 |
| Ivory Coast |
1 |
1 |
2 |
| Germany |
202 |
207 |
247 |
Level 3
-Is there a difference in the number of silver medals a country earns
if they are competing in their home country? *Note please pick any one
country you’d like to look at
home=dataset %>%
filter(Host_country=='United States') %>%
group_by(Country_Name) %>%
summarise(Silver = sum(Silver)) %>%
mutate(Percent_Win = round(Silver/sum(Silver) * 100,2))
away =dataset %>%
filter(!Host_country=='United States') %>%
group_by(Country_Name) %>%
summarise(Silver = sum(Silver)) %>%
mutate(Percent_Win = round(Silver/sum(Silver) * 100,2))
shapiro.test(home$Silver)
##
## Shapiro-Wilk normality test
##
## data: home$Silver
## W = 0.30633, p-value < 2.2e-16
shapiro.test(away$Silver)
##
## Shapiro-Wilk normality test
##
## data: away$Silver
## W = 0.45531, p-value < 2.2e-16
hist(home$Silver)

hist(away$Silver)

wilcox1=wilcox.test(home$Silver,away$Silver)
#yes, there's a difference in the number of silver medals a country earns if they are competing in their home country
print(wilcox1)
##
## Wilcoxon rank sum test with continuity correction
##
## data: home$Silver and away$Silver
## W = 5164.5, p-value = 0.000254
## alternative hypothesis: true location shift is not equal to 0
Level 4
-Create a visualization that shows the United State’s gold medal over
time.
USA_visual = dataset %>%
filter(Country_Name == 'United States')
USA_visual %>%
ggplot(aes(x=Year,y=Gold))+
geom_line(lwd=1,col='darkred')+
theme_bw()+
labs(y='Number of Gold Medals',
x= 'Years',
title = 'The Roller Coaster of the American Quest for Gold',
subtitle = "Examining the trajectory of the fight for the ultimate prize",fill=' ')+
theme(plot.title = element_text(hjust = .5),
plot.subtitle = element_text(hjust = .5))

LS0tDQp0aXRsZTogIlBzaSBDaGkgUiAtIEp1bHkgMjAyNCINCmF1dGhvcjogImJ5IEFsYW4gTGFtIg0KI2RhdGU6ICJgciBTeXMuRGF0ZSgpYCINCmRhdGU6ICJEYXRlOiBgciBmb3JtYXQoU3lzLkRhdGUoKSwgJyVkICVCICVZJylgIiANCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0aGVtZTogcmVhZGFibGUNCiAgICBhbHdheXNfYWxsb3dfaHRtbDogeWVzDQogICAgZGZfcHJpbnQ6IGthYmxlDQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZmxvYXQ6IHllcw0KICAgIG51bWJlcl9zZWN0aW9uczogbm8NCiAgICBhbmNob3Jfc2VjdGlvbnM6IFRSVUUNCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUNCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQotLS0NCg0KWyoqU291cmNlKio6IFBzaSBDaGkgUl0oaHR0cHM6Ly9vc2YuaW8vNHBidWMvKQ0KDQojIyBMb2FkIHBhY2thZ2VzIGFuZCBpbXBvcnQgZGF0YQ0KDQpgYGB7ciBzZXR1cCwgd2FybmluZz1GLG1lc3NhZ2U9Rn0NCiNsaWJyYXJ5KHRpZHl2ZXJzZSkNCg0KbGlicmFyeShkcGx5cikNCiNsaWJyYXJ5KHB1cnJyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KDQpkYXRhc2V0PXJlYWQuY3N2KCdodHRwczovL29zZi5pby9kb3dubG9hZC94cTh1cy8nKQ0KYGBgDQoNCmBgYHtyfQ0Kc2tpbXI6OnNraW0oZGF0YXNldCkNCmBgYA0KDQojIyBMZXZlbCAxIA0KDQotTGV04oCZcyBnZXQgZmFtaWxpYXIgd2l0aCBvdXIgZGF0YS4gQ3JlYXRlIGEgdGFibGUgdGhhdCBzaG93cyB0aGUgZnJlcXVlbmN5IG9mIGVhY2ggQ291bnRyeSBpbiB0aGUgZGF0YXNldC4gDQoNCmBgYHtyfQ0KdGFiMSA9ZGF0YXNldCAlPiUgDQogIGdyb3VwX2J5KENvdW50cnlfTmFtZSkgJT4lIA0KICBzdW1tYXJpc2UoRnJlcXVlbmN5PW4oKSkNCg0Kc2xpY2Vfc2FtcGxlKHRhYjEsbj01KQ0KYGBgDQoNCiMjIExldmVsIDINCg0KLU5leHQsIGxldOKAmXMgbG9vayBhdCB0aGUgcmVwcmVzZW50YXRpb24gb2YgZWFjaCBjb3VudHJ54oCZcyBtZWRhbCBzdGF0dXMuIENyZWF0ZSBhdGFibGUgdGhhdCBzaG93cyB0aGUgbnVtYmVyIG9mIHRpbWVzIGVhY2ggY291bnRyeSBoYXMgd29uIGEgYnJvbnplLCBzaWx2ZXIsIGFuZCBnb2xkIG1lZGFsLg0KDQpgYGB7cn0NCnRhYjI9ZGF0YXNldCAlPiUgDQogIGdyb3VwX2J5KENvdW50cnlfTmFtZSkgJT4lIA0KICBzdW1tYXJpc2UoR29sZCA9IHN1bShHb2xkKSwgU2lsdmVyID0gc3VtKFNpbHZlciksIEJyb256ZSA9IHN1bShCcm9uemUpKQ0KDQpzbGljZV9zYW1wbGUodGFiMixuPTUpDQpgYGANCg0KIyMgTGV2ZWwgMw0KDQotSXMgdGhlcmUgYSBkaWZmZXJlbmNlIGluIHRoZSBudW1iZXIgb2Ygc2lsdmVyIG1lZGFscyBhIGNvdW50cnkgZWFybnMgaWYgdGhleSBhcmUgY29tcGV0aW5nIGluIHRoZWlyIGhvbWUgY291bnRyeT8gKk5vdGUgcGxlYXNlIHBpY2sgYW55IG9uZSBjb3VudHJ5IHlvdeKAmWQgbGlrZSB0byBsb29rIGF0DQoNCmBgYHtyfQ0KaG9tZT1kYXRhc2V0ICU+JSANCiAgZmlsdGVyKEhvc3RfY291bnRyeT09J1VuaXRlZCBTdGF0ZXMnKSAlPiUgDQogIGdyb3VwX2J5KENvdW50cnlfTmFtZSkgJT4lIA0KICBzdW1tYXJpc2UoU2lsdmVyID0gc3VtKFNpbHZlcikpICU+JSANCiAgbXV0YXRlKFBlcmNlbnRfV2luID0gcm91bmQoU2lsdmVyL3N1bShTaWx2ZXIpICogMTAwLDIpKQ0KDQphd2F5ID1kYXRhc2V0ICU+JQ0KICBmaWx0ZXIoIUhvc3RfY291bnRyeT09J1VuaXRlZCBTdGF0ZXMnKSAlPiUgDQogIGdyb3VwX2J5KENvdW50cnlfTmFtZSkgJT4lIA0KICBzdW1tYXJpc2UoU2lsdmVyID0gc3VtKFNpbHZlcikpICU+JSANCiAgbXV0YXRlKFBlcmNlbnRfV2luID0gcm91bmQoU2lsdmVyL3N1bShTaWx2ZXIpICogMTAwLDIpKQ0KYGBgDQoNCmBgYHtyfQ0Kc2hhcGlyby50ZXN0KGhvbWUkU2lsdmVyKQ0Kc2hhcGlyby50ZXN0KGF3YXkkU2lsdmVyKQ0KDQpoaXN0KGhvbWUkU2lsdmVyKQ0KaGlzdChhd2F5JFNpbHZlcikNCmBgYA0KDQoNCmBgYHtyfQ0Kd2lsY294MT13aWxjb3gudGVzdChob21lJFNpbHZlcixhd2F5JFNpbHZlcikNCg0KI3llcywgdGhlcmUncyBhIGRpZmZlcmVuY2UgaW4gdGhlIG51bWJlciBvZiBzaWx2ZXIgbWVkYWxzIGEgY291bnRyeSBlYXJucyBpZiB0aGV5IGFyZSBjb21wZXRpbmcgaW4gdGhlaXIgaG9tZSBjb3VudHJ5DQpwcmludCh3aWxjb3gxKQ0KDQpgYGANCg0KIyMgTGV2ZWwgNA0KDQotQ3JlYXRlIGEgdmlzdWFsaXphdGlvbiB0aGF0IHNob3dzIHRoZSBVbml0ZWQgU3RhdGXigJlzIGdvbGQgbWVkYWwgb3ZlciB0aW1lLiANCmBgYHtyfQ0KVVNBX3Zpc3VhbCA9IGRhdGFzZXQgJT4lIA0KICBmaWx0ZXIoQ291bnRyeV9OYW1lID09ICdVbml0ZWQgU3RhdGVzJykNCg0KVVNBX3Zpc3VhbCAlPiUgDQogIGdncGxvdChhZXMoeD1ZZWFyLHk9R29sZCkpKw0KICBnZW9tX2xpbmUobHdkPTEsY29sPSdkYXJrcmVkJykrDQogIHRoZW1lX2J3KCkrDQogIGxhYnMoeT0nTnVtYmVyIG9mIEdvbGQgTWVkYWxzJywNCiAgICAgICB4PSAnWWVhcnMnLA0KICAgICAgIHRpdGxlID0gJ1RoZSBSb2xsZXIgQ29hc3RlciBvZiB0aGUgQW1lcmljYW4gUXVlc3QgZm9yIEdvbGQnLA0KICAgICAgIHN1YnRpdGxlID0gIkV4YW1pbmluZyB0aGUgdHJhamVjdG9yeSBvZiB0aGUgZmlnaHQgZm9yIHRoZSB1bHRpbWF0ZSBwcml6ZSIsZmlsbD0nICcpKw0KICB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gLjUpLA0KICAgICAgICBwbG90LnN1YnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gLjUpKQ0KYGBgDQoNCmBgYHtyIGluY2x1ZGU9RkFMU0V9DQojYmVlcCB3aGVuIGRvbmUNCmlmIChyZXF1aXJlKCJiZWVwciIscXVpZXRseSA9IFQpKQ0KICBiZWVwcjo6YmVlcCgyKQ0KYGBg