#Bring in different packages
library(tidyr)
library(readxl)
library(dplyr)
library(ggplot2)
library(ggthemes)
library(scales)
#Read in the file
olympic2014 <- read_excel("OlympicMedalsbyEvent2014.xlsx")
#Add a column for total medal count and group by country
medal_count_2014 <- olympic2014 %>% group_by(Country) %>% mutate(medal_count = 1) %>% summarize(total_2014_medals = sum(medal_count))
#Plot the medal count
ggplot(medal_count_2014, aes(x = reorder(Country, total_2014_medals), y = total_2014_medals)) +
geom_bar(stat="identity") +
coord_flip() +
ggtitle("Winter Olympics 2014 Total Medals Won") +
xlab("Country") +
ylab("Total Medals Won") +
theme_few()
Latitude-the angular distance of a place north or south of the earth’s equator, or of a celestial object north or south of the celestial equator, usually expressed in degrees and minutes.
GDP per Capita-This is the “average” output of the economy per person measured in a base year prices. This ratio is often used as a measure of standard of living in comparisons over time of one country, or between different countries when measured in the same currency.
Average Temperature- mean monthly and annual temperatures by country for the period 1961-1999. Values are in degrees Celsius.
#Read in file
countries <- read.csv("countries.csv")
#Change column name for joining to original dataset
colnames(countries)[colnames(countries)=="name"] <- "Country"
#Create new dataset combining latitude and medals won
medal_loc_2014 <- inner_join(medal_count_2014, countries, by = "Country") %>% select(country, Country, total_2014_medals, latitude)
#Plot latitude and medals won
ggplot(medal_loc_2014, aes(x = latitude, y = total_2014_medals)) +
geom_point(size = 5, alpha = 0.5) +
ggtitle("Country Latitude vs Number of 2014 Medals") +
xlab("Latitude (degrees)") +
ylab("Total Medals Won") +
theme_few()
#Bring in data file
gdp <- read.csv("UNdata_Export_GDP_per_Capita.csv")
#Filter for only 2014 values
gdp_2014 <- gdp %>% filter(Year == 2014)
#Change column name to join to original data set.
colnames(gdp_2014)[colnames(gdp_2014)=="Country.or.Area"] <- "Country"
#Join the datasets
medal_gdp_2014 <- left_join(medal_count_2014, gdp_2014, by = "Country") %>% select(Country, total_2014_medals, Value)
#Plot the GDP/Capita compared to medals
ggplot(medal_gdp_2014, aes(x = Value, y = total_2014_medals)) +
geom_point(size = 5, alpha = 0.5) +
geom_smooth(method = "lm") +
ggtitle("2014 GDP/Capita vs Number of 2014 Medals") +
xlab("GDP/Capita ($/person)") +
ylab("Total Medals Won") +
theme_few()
#Bring in temperature dataset
avg_temp <- read_xls("cckp_historical_data_0.xls", sheet = "Country_temperatureCRU")
#The tempareture data uses ISO country codes, bring in dataset to map ISO code to country name
country_codes <- read.csv("country code list.csv")
#Change column name for joining tables
colnames(country_codes)[colnames(country_codes)=="ISO.ALPHA.3.Code"] <- "ISO_3DIGIT"
#Join to country name mapping data set to include country name in temperature dataset
avg_temp <- left_join(avg_temp, country_codes, by = "ISO_3DIGIT")
#Change column name to join to original dataset
colnames(avg_temp)[colnames(avg_temp)=="Country.or.Area.Name"] <- "Country"
#Join temperature data to medal data
medal_temp_2014 <- inner_join(medal_count_2014, avg_temp, by = "Country") %>% select(Country, total_2014_medals, Annual_temp)
#Plot medals copmpared to average tempareture
ggplot(medal_temp_2014, aes(x = Annual_temp, y = total_2014_medals)) +
geom_point(size = 5, alpha = 0.5) +
geom_smooth(method = "lm") +
ggtitle("Average Temperature vs Number of 2014 Medals") +
xlab("*Avg Temp (degrees C)") +
ylab("Total Medals Won") +
labs(caption = "*Average temperature of timeframe 1961 through 1999") +
theme_few()
medal_gt_2014 <-left_join(medal_gdp_2014, medal_temp_2014, by = "Country")
ggplot(medal_gt_2014, aes(x = Annual_temp, y = total_2014_medals.x, size = Value)) +
geom_point(alpha = 0.5) +
ggtitle("2014 Medalling Countries") +
xlab("*Avg Temp (degrees C)") +
ylab("Total Medals Won") +
labs(size = "GDP/Capita ($/person)", caption = "*Average temperature of timeframe 1961 through 1999") +
theme_few()
#Bring in data file
olympicmedals <- read_excel("HistoricWinterOlympicMedals.xlsx")
#Filter for 1994 olympics then summarize total coutn of medals
medal_count_1994 <- olympicmedals %>% filter(Year == 1994) %>% group_by(NOC) %>% mutate(medal_count = 1) %>% summarize(total_medals = sum(medal_count))
#Filter GDP data for 1994
gdp_1994 <- gdp %>% filter(Year == 1994)
#Change column name to join to medal data
colnames(gdp_1994)[colnames(gdp_1994)=="Country.or.Area"] <- "Country"
#Medal data has ISO codes, so need to change column name to join to mappying table
colnames(medal_count_1994)[colnames(medal_count_1994)=="NOC"] <- "ISO_3DIGIT"
#Bring in country name to medal data to join to other datasets
medal_count_1994_2 <- inner_join(medal_count_1994, country_codes, by = "ISO_3DIGIT")
#Change column name to join to temp and gdp datasets
colnames(medal_count_1994_2)[colnames(medal_count_1994_2)=="Country.or.Area.Name"] <- "Country"
#Join medal to temperature data
medal_temp_1994 <- inner_join(medal_count_1994_2, avg_temp, by = "Country") %>% select(Country, total_medals, Annual_temp)
#Add gdp data to previous table
medal_tg_1994 <- inner_join(medal_temp_1994, gdp_1994, by = "Country")
#Plot three variable to see how trend compares to 2014
ggplot(medal_tg_1994, aes(x = Annual_temp, y = total_medals, size = Value)) +
geom_point(alpha = 0.5) +
ggtitle("1994 Medalling Countries") +
xlab("*Avg Temp (degrees C)") +
ylab("Total Medals Won") +
labs(size = "GDP/Capita ($/person)", caption = "*Average temperature of timeframe 1961 through 1999") +
theme_few()
#Filter for 2002 and summarize total medals by country
medal_count_2002 <- olympicmedals %>% filter(Year == 2002) %>% group_by(NOC) %>% mutate(medal_count = 1) %>% summarize(total_medals = sum(medal_count))
#Filter for GDP for 2002 and change column name for doing join
gdp_2002 <- gdp %>% filter(Year == 2002)
colnames(gdp_2002)[colnames(gdp_2002)=="Country.or.Area"] <- "Country"
#Change column name to join ot country code mapping tables
colnames(medal_count_2002)[colnames(medal_count_2002)=="NOC"] <- "ISO_3DIGIT"
#Join medal data to country code mapping to get country name
medal_count_2002_2 <- inner_join(medal_count_2002, country_codes, by = "ISO_3DIGIT")
colnames(medal_count_2002_2)[colnames(medal_count_2002_2)=="Country.or.Area.Name"] <- "Country"
#Join medal data to temperature data
medal_temp_2002 <- left_join(medal_count_2002_2, avg_temp, by = "Country") %>% select(Country, total_medals, Annual_temp)
#Add GDP data to previous dataset
medal_tg_2002 <- inner_join(medal_temp_2002, gdp_2002, by = "Country")
#Plot variables to see if trends still apply
ggplot(medal_tg_2002, aes(x = Annual_temp, y = total_medals, size = Value)) +
geom_point(alpha = 0.5) +
ggtitle("2002 Medalling Countries") +
xlab("*Avg Temp (degrees C)") +
ylab("Total Medals Won") +
labs(size = "GDP/Capita ($/person)", caption = "*Average temperature of timeframe 1961 through 1999") +
theme_few()