Source: Psi Chi R

Load packages and import data

#library(tidyverse)

library(dplyr)
#library(purrr)
library(ggplot2)

dataset=read.csv('https://osf.io/download/xq8us/')
skimr::skim(dataset)
Data summary
Name dataset
Number of rows 1344
Number of columns 8
_______________________
Column type frequency:
character 4
numeric 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Host_country 0 1 5 16 0 21 0
Host_city 0 1 4 19 0 23 0
Country_Name 0 1 3 32 0 157 0
Country_Code 0 1 0 3 86 156 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Year 0 1 1978.96 33.48 1896 1956 1988 2008 2020 ▂▂▃▃▇
Gold 0 1 4.07 8.45 0 0 1 4 83 ▇▁▁▁▁
Silver 0 1 4.04 7.10 0 0 2 4 78 ▇▁▁▁▁
Bronze 0 1 4.39 6.84 0 1 2 5 77 ▇▁▁▁▁

Level 1

-Let’s get familiar with our data. Create a table that shows the frequency of each Country in the dataset.

tab1 =dataset %>% 
  group_by(Country_Name) %>% 
  summarise(Frequency=n())

slice_sample(tab1,n=5)
Country_Name Frequency
France 29
Switzerland 28
Belarus 7
Croatia 8
Argentina 20

Level 2

-Next, let’s look at the representation of each country’s medal status. Create atable that shows the number of times each country has won a bronze, silver, and gold medal.

tab2=dataset %>% 
  group_by(Country_Name) %>% 
  summarise(Gold = sum(Gold), Silver = sum(Silver), Bronze = sum(Bronze))

slice_sample(tab2,n=5)
Country_Name Gold Silver Bronze
Sri Lanka 0 1 0
Lithuania 6 7 13
Bahrain 2 2 0
Ivory Coast 1 1 2
Germany 202 207 247

Level 3

-Is there a difference in the number of silver medals a country earns if they are competing in their home country? *Note please pick any one country you’d like to look at

home=dataset %>% 
  filter(Host_country=='United States') %>% 
  group_by(Country_Name) %>% 
  summarise(Silver = sum(Silver)) %>% 
  mutate(Percent_Win = round(Silver/sum(Silver) * 100,2))

away =dataset %>%
  filter(!Host_country=='United States') %>% 
  group_by(Country_Name) %>% 
  summarise(Silver = sum(Silver)) %>% 
  mutate(Percent_Win = round(Silver/sum(Silver) * 100,2))
shapiro.test(home$Silver)
## 
##  Shapiro-Wilk normality test
## 
## data:  home$Silver
## W = 0.30633, p-value < 2.2e-16
shapiro.test(away$Silver)
## 
##  Shapiro-Wilk normality test
## 
## data:  away$Silver
## W = 0.45531, p-value < 2.2e-16
hist(home$Silver)

hist(away$Silver)

wilcox1=wilcox.test(home$Silver,away$Silver)

#yes, there's a difference in the number of silver medals a country earns if they are competing in their home country
print(wilcox1)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  home$Silver and away$Silver
## W = 5164.5, p-value = 0.000254
## alternative hypothesis: true location shift is not equal to 0

Level 4

-Create a visualization that shows the United State’s gold medal over time.

USA_visual = dataset %>% 
  filter(Country_Name == 'United States')

USA_visual %>% 
  ggplot(aes(x=Year,y=Gold))+
  geom_line(lwd=1,col='darkred')+
  theme_bw()+
  labs(y='Number of Gold Medals',
       x= 'Years',
       title = 'The Roller Coaster of the American Quest for Gold',
       subtitle = "Examining the trajectory of the fight for the ultimate prize",fill=' ')+
  theme(plot.title = element_text(hjust = .5),
        plot.subtitle = element_text(hjust = .5))

LS0tDQp0aXRsZTogIlBzaSBDaGkgUiAtIEp1bHkgMjAyNCINCmF1dGhvcjogImJ5IEFsYW4gTGFtIg0KI2RhdGU6ICJgciBTeXMuRGF0ZSgpYCINCmRhdGU6ICJEYXRlOiBgciBmb3JtYXQoU3lzLkRhdGUoKSwgJyVkICVCICVZJylgIiANCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0aGVtZTogcmVhZGFibGUNCiAgICBhbHdheXNfYWxsb3dfaHRtbDogeWVzDQogICAgZGZfcHJpbnQ6IGthYmxlDQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZmxvYXQ6IHllcw0KICAgIG51bWJlcl9zZWN0aW9uczogbm8NCiAgICBhbmNob3Jfc2VjdGlvbnM6IFRSVUUNCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUNCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQotLS0NCg0KWyoqU291cmNlKio6IFBzaSBDaGkgUl0oaHR0cHM6Ly9vc2YuaW8vNHBidWMvKQ0KDQojIyBMb2FkIHBhY2thZ2VzIGFuZCBpbXBvcnQgZGF0YQ0KDQpgYGB7ciBzZXR1cCwgd2FybmluZz1GLG1lc3NhZ2U9Rn0NCiNsaWJyYXJ5KHRpZHl2ZXJzZSkNCg0KbGlicmFyeShkcGx5cikNCiNsaWJyYXJ5KHB1cnJyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KDQpkYXRhc2V0PXJlYWQuY3N2KCdodHRwczovL29zZi5pby9kb3dubG9hZC94cTh1cy8nKQ0KYGBgDQoNCmBgYHtyfQ0Kc2tpbXI6OnNraW0oZGF0YXNldCkNCmBgYA0KDQojIyBMZXZlbCAxIA0KDQotTGV04oCZcyBnZXQgZmFtaWxpYXIgd2l0aCBvdXIgZGF0YS4gQ3JlYXRlIGEgdGFibGUgdGhhdCBzaG93cyB0aGUgZnJlcXVlbmN5IG9mIGVhY2ggQ291bnRyeSBpbiB0aGUgZGF0YXNldC4gDQoNCmBgYHtyfQ0KdGFiMSA9ZGF0YXNldCAlPiUgDQogIGdyb3VwX2J5KENvdW50cnlfTmFtZSkgJT4lIA0KICBzdW1tYXJpc2UoRnJlcXVlbmN5PW4oKSkNCg0Kc2xpY2Vfc2FtcGxlKHRhYjEsbj01KQ0KYGBgDQoNCiMjIExldmVsIDINCg0KLU5leHQsIGxldOKAmXMgbG9vayBhdCB0aGUgcmVwcmVzZW50YXRpb24gb2YgZWFjaCBjb3VudHJ54oCZcyBtZWRhbCBzdGF0dXMuIENyZWF0ZSBhdGFibGUgdGhhdCBzaG93cyB0aGUgbnVtYmVyIG9mIHRpbWVzIGVhY2ggY291bnRyeSBoYXMgd29uIGEgYnJvbnplLCBzaWx2ZXIsIGFuZCBnb2xkIG1lZGFsLg0KDQpgYGB7cn0NCnRhYjI9ZGF0YXNldCAlPiUgDQogIGdyb3VwX2J5KENvdW50cnlfTmFtZSkgJT4lIA0KICBzdW1tYXJpc2UoR29sZCA9IHN1bShHb2xkKSwgU2lsdmVyID0gc3VtKFNpbHZlciksIEJyb256ZSA9IHN1bShCcm9uemUpKQ0KDQpzbGljZV9zYW1wbGUodGFiMixuPTUpDQpgYGANCg0KIyMgTGV2ZWwgMw0KDQotSXMgdGhlcmUgYSBkaWZmZXJlbmNlIGluIHRoZSBudW1iZXIgb2Ygc2lsdmVyIG1lZGFscyBhIGNvdW50cnkgZWFybnMgaWYgdGhleSBhcmUgY29tcGV0aW5nIGluIHRoZWlyIGhvbWUgY291bnRyeT8gKk5vdGUgcGxlYXNlIHBpY2sgYW55IG9uZSBjb3VudHJ5IHlvdeKAmWQgbGlrZSB0byBsb29rIGF0DQoNCmBgYHtyfQ0KaG9tZT1kYXRhc2V0ICU+JSANCiAgZmlsdGVyKEhvc3RfY291bnRyeT09J1VuaXRlZCBTdGF0ZXMnKSAlPiUgDQogIGdyb3VwX2J5KENvdW50cnlfTmFtZSkgJT4lIA0KICBzdW1tYXJpc2UoU2lsdmVyID0gc3VtKFNpbHZlcikpICU+JSANCiAgbXV0YXRlKFBlcmNlbnRfV2luID0gcm91bmQoU2lsdmVyL3N1bShTaWx2ZXIpICogMTAwLDIpKQ0KDQphd2F5ID1kYXRhc2V0ICU+JQ0KICBmaWx0ZXIoIUhvc3RfY291bnRyeT09J1VuaXRlZCBTdGF0ZXMnKSAlPiUgDQogIGdyb3VwX2J5KENvdW50cnlfTmFtZSkgJT4lIA0KICBzdW1tYXJpc2UoU2lsdmVyID0gc3VtKFNpbHZlcikpICU+JSANCiAgbXV0YXRlKFBlcmNlbnRfV2luID0gcm91bmQoU2lsdmVyL3N1bShTaWx2ZXIpICogMTAwLDIpKQ0KYGBgDQoNCmBgYHtyfQ0Kc2hhcGlyby50ZXN0KGhvbWUkU2lsdmVyKQ0Kc2hhcGlyby50ZXN0KGF3YXkkU2lsdmVyKQ0KDQpoaXN0KGhvbWUkU2lsdmVyKQ0KaGlzdChhd2F5JFNpbHZlcikNCmBgYA0KDQoNCmBgYHtyfQ0Kd2lsY294MT13aWxjb3gudGVzdChob21lJFNpbHZlcixhd2F5JFNpbHZlcikNCg0KI3llcywgdGhlcmUncyBhIGRpZmZlcmVuY2UgaW4gdGhlIG51bWJlciBvZiBzaWx2ZXIgbWVkYWxzIGEgY291bnRyeSBlYXJucyBpZiB0aGV5IGFyZSBjb21wZXRpbmcgaW4gdGhlaXIgaG9tZSBjb3VudHJ5DQpwcmludCh3aWxjb3gxKQ0KDQpgYGANCg0KIyMgTGV2ZWwgNA0KDQotQ3JlYXRlIGEgdmlzdWFsaXphdGlvbiB0aGF0IHNob3dzIHRoZSBVbml0ZWQgU3RhdGXigJlzIGdvbGQgbWVkYWwgb3ZlciB0aW1lLiANCmBgYHtyfQ0KVVNBX3Zpc3VhbCA9IGRhdGFzZXQgJT4lIA0KICBmaWx0ZXIoQ291bnRyeV9OYW1lID09ICdVbml0ZWQgU3RhdGVzJykNCg0KVVNBX3Zpc3VhbCAlPiUgDQogIGdncGxvdChhZXMoeD1ZZWFyLHk9R29sZCkpKw0KICBnZW9tX2xpbmUobHdkPTEsY29sPSdkYXJrcmVkJykrDQogIHRoZW1lX2J3KCkrDQogIGxhYnMoeT0nTnVtYmVyIG9mIEdvbGQgTWVkYWxzJywNCiAgICAgICB4PSAnWWVhcnMnLA0KICAgICAgIHRpdGxlID0gJ1RoZSBSb2xsZXIgQ29hc3RlciBvZiB0aGUgQW1lcmljYW4gUXVlc3QgZm9yIEdvbGQnLA0KICAgICAgIHN1YnRpdGxlID0gIkV4YW1pbmluZyB0aGUgdHJhamVjdG9yeSBvZiB0aGUgZmlnaHQgZm9yIHRoZSB1bHRpbWF0ZSBwcml6ZSIsZmlsbD0nICcpKw0KICB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gLjUpLA0KICAgICAgICBwbG90LnN1YnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gLjUpKQ0KYGBgDQoNCmBgYHtyIGluY2x1ZGU9RkFMU0V9DQojYmVlcCB3aGVuIGRvbmUNCmlmIChyZXF1aXJlKCJiZWVwciIscXVpZXRseSA9IFQpKQ0KICBiZWVwcjo6YmVlcCgyKQ0KYGBg