#The four subsets contain information that potentially impacts the life expectancy of the world. The data cover relative information between between 1800 and 2018.
library(RCurl)
x <- getURL("https://raw.githubusercontent.com/JSchrager94/STA-553/main/Data/income_per_person.csv")
income.person <- read.csv(text = x)
x <- getURL("https://raw.githubusercontent.com/JSchrager94/STA-553/main/Data/life_expectancy_years.csv")
life.expectancy <- read.csv(text = x)
x <- getURL("https://raw.githubusercontent.com/JSchrager94/STA-553/main/Data/countries_total.csv")
country.totals <- read.csv(text = x)
x <- getURL("https://raw.githubusercontent.com/JSchrager94/STA-553/main/Data/population_total.csv")
pop.size <- read.csv(text = x)
x <- getURL("https://raw.githubusercontent.com/JSchrager94/STA-553/main/Data/POC.csv")
poc <- read.csv(text = x)
#creating longitudinal data for life expectancy
life.expectancy.long <- life.expectancy %>%
gather(key = "Year", # the column names of the wide table
value = "lifeExp", # the numerical values of the table
-geo, # drop country variable: its value will not be gathered (stacked)!
na.rm = TRUE) # removing records with missing values
##
correct.life.exp.data <- life.expectancy.long %>%
mutate(year = substr(Year,2,5)) %>%
select(-Year)
#creating longitudinal data for income
income.person.long <- income.person %>%
gather(key = "Year",
value = "income",
-geo,
na.rm = TRUE)
correct.income.person.data <- income.person.long %>%
mutate(year = substr(Year,2,5)) %>%
select(-Year)
#creating longitudinal data for population size
pop.size.long <- pop.size %>%
gather(key = "Year",
value = "pop_size",
-geo,
na.rm = TRUE)
#getting rid of x in the year variable
correct.pop.size.long <- pop.size.long %>%
mutate(year = substr(Year,2,5)) %>%
select(-Year)
#Sorting data by year
#correct.income.person.data[order(correct.income.person.data$year),]
#correct.life.exp.data[order(correct.life.exp.data$year),]
LifeExpIncom = merge(x = correct.income.person.data, y = correct.life.exp.data)
LifeExpIncom.1 = merge(x = LifeExpIncom, y = country.totals, by.x = 'geo', by.y ='name', all = FALSE)
LifeExpIncom.2 = merge(x = LifeExpIncom.1, y = correct.pop.size.long)
life.exp <- round(LifeExpIncom.2$lifeExp)
#subsetting datat set for year 2015
year.15 <- subset(LifeExpIncom.2, year == 2015)
#interactive plot for 2015
pop.size = year.15$`pop size`
plot_ly(
data = year.15,
x = ~lifeExp, # Horizontal axis
y = ~income,
p = ~pop.size, # Vertical axis
color = ~factor(geo), # must be a numeric factor
text = ~geo, # show the species in the hover text
## using the following hovertemplate() to add the information of the
## two numerical variable to the hover text.
hovertemplate = paste('<i><b>Population<b></i>: %{p}',
'<br><b>%{text}</b>'),
alpha = 0.5,
size = ~pop_size,
type = "scatter",
mode = "markers"
) %>%
layout(
## graphic size
with = 700,
height = 700)
year.2 <- as.integer(LifeExpIncom.2$year)
year <- as.numeric(LifeExpIncom.2$year)
lifeExp_num <- as.data.frame(apply(LifeExpIncom.2, 2, as.numeric)) # Convert all variable types to numeric sapply(lifeExp_num, class) lifeExp_num\(geo <- as.character(lifeExp_num\)geo) sapply(lifeExp_num, class)
#interactive plot for all the years
w <- ggplot(LifeExpIncom.2, aes(income, lifeExp,
size = pop_size, colour = geo)) +
geom_point(alpha = 0.3, show.legend = FALSE) +
scale_colour_manual(values = country_colors) +
#scale_color_manual(values=c("dodgerblue4", "darkolivegreen4","darkorchid3")) +
#scale_color_brewer(palette="Set1") +
scale_size(range = c(2,50)) +
scale_x_log10() +
# break down the previous single plot by continent
# facet_wrap(~continent) + # create multiple panels according to the continents
# Here comes the gganimate specific bits
labs(title = 'Year: {frame_time}',
x = 'Life Expectancy',
y = 'Income') +
transition_time(year.2) +
ease_aes('linear')
###
animate(w, renderer = gifski_renderer(),
rewind = TRUE)
#Random sample of 500 gas stations
samp <- poc %>% ungroup() %>% sample_n(500)
#gas stations
## preparing data
#poc <- read_csv("https://raw.githubusercontent.com/JSchrager94/STA-553/main/Data/POC.csv")
poc.site <- samp[samp$POC == 1,]
# geo styling
geostyle <- list(scope = 'usa',
projection = list(type = 'albers usa'),
showland = TRUE,
landcolor = toRGB("gray95"),
subunitcolor = toRGB("gray85"),
countrycolor = toRGB("gray85"),
countrywidth = 0.5,
subunitwidth = 0.5
)
## plotting map
fig <- plot_geo(samp, lat = ~ycoord, lon = ~xcoord) %>%
add_markers(text = ~SITE_DESCRIPTION,
color = "red",
symbol = I("circle"),
size = I(8),
hoverinfo = "text" ) %>%
layout( title = 'Gas stations', geo = geostyle)
fig