#The four subsets contain information that potentially impacts the life expectancy of the world. The data cover relative information between between 1800 and 2018.

library(RCurl)
x <- getURL("https://raw.githubusercontent.com/JSchrager94/STA-553/main/Data/income_per_person.csv")
income.person <- read.csv(text = x)

x <- getURL("https://raw.githubusercontent.com/JSchrager94/STA-553/main/Data/life_expectancy_years.csv")
life.expectancy <- read.csv(text = x)

x <- getURL("https://raw.githubusercontent.com/JSchrager94/STA-553/main/Data/countries_total.csv")
country.totals <- read.csv(text = x)

x <- getURL("https://raw.githubusercontent.com/JSchrager94/STA-553/main/Data/population_total.csv")
pop.size <- read.csv(text = x)

x <- getURL("https://raw.githubusercontent.com/JSchrager94/STA-553/main/Data/POC.csv")
poc <- read.csv(text = x)

#creating longitudinal data for life expectancy

life.expectancy.long <- life.expectancy %>%
  gather(key = "Year",       # the column names of the wide table
         value = "lifeExp",  # the numerical values of the table
         -geo,          # drop country variable: its value will not be gathered (stacked)!
         na.rm = TRUE)       # removing records with missing values
##
correct.life.exp.data <- life.expectancy.long %>%
                      mutate(year = substr(Year,2,5)) %>%
                      select(-Year)

#creating longitudinal data for income

income.person.long <- income.person %>%
  gather(key = "Year",      
         value = "income", 
         -geo,         
         na.rm = TRUE)      
correct.income.person.data <- income.person.long %>%
                      mutate(year = substr(Year,2,5)) %>%
                      select(-Year)

#creating longitudinal data for population size

pop.size.long <- pop.size %>%
  gather(key = "Year",      
         value = "pop_size", 
         -geo,         
         na.rm = TRUE)      

#getting rid of x in the year variable

correct.pop.size.long <- pop.size.long %>%
                      mutate(year = substr(Year,2,5)) %>%
                      select(-Year)

#Sorting data by year

#correct.income.person.data[order(correct.income.person.data$year),]

#correct.life.exp.data[order(correct.life.exp.data$year),]

Merge data stes together

LifeExpIncom = merge(x = correct.income.person.data, y = correct.life.exp.data)


LifeExpIncom.1 = merge(x = LifeExpIncom, y = country.totals, by.x = 'geo', by.y ='name', all = FALSE)

LifeExpIncom.2 = merge(x = LifeExpIncom.1, y = correct.pop.size.long)
life.exp <- round(LifeExpIncom.2$lifeExp)

#subsetting datat set for year 2015

year.15 <- subset(LifeExpIncom.2, year == 2015)

#interactive plot for 2015

pop.size = year.15$`pop size`


plot_ly(
    data = year.15,
    x = ~lifeExp,  # Horizontal axis 
    y = ~income,  
    p = ~pop.size, # Vertical axis 
     color = ~factor(geo), # must be a numeric factor
     text = ~geo,     # show the species in the hover text
     ## using the following hovertemplate() to add the information of the
     ## two numerical variable to the hover text.
     hovertemplate = paste('<i><b>Population<b></i>: %{p}',
                           '<br><b>%{text}</b>'),
     alpha  = 0.5,
     size = ~pop_size,
     type = "scatter",
     mode = "markers"
 ) %>%
    layout(  
      ## graphic size
      with = 700,
      height = 700)
year.2 <- as.integer(LifeExpIncom.2$year)

year <- as.numeric(LifeExpIncom.2$year)

lifeExp_num <- as.data.frame(apply(LifeExpIncom.2, 2, as.numeric)) # Convert all variable types to numeric sapply(lifeExp_num, class) lifeExp_num\(geo <- as.character(lifeExp_num\)geo) sapply(lifeExp_num, class)

#interactive plot for all the years

w <- ggplot(LifeExpIncom.2, aes(income, lifeExp, 
                  size = pop_size, colour = geo)) +
           geom_point(alpha = 0.3, show.legend = FALSE) +
           scale_colour_manual(values = country_colors) +
           #scale_color_manual(values=c("dodgerblue4", "darkolivegreen4","darkorchid3")) +
           #scale_color_brewer(palette="Set1") +
           scale_size(range = c(2,50)) +
           scale_x_log10() +
           # break down the previous single plot by continent 
           # facet_wrap(~continent) +     # create multiple panels according to the continents
           # Here comes the gganimate specific bits
           labs(title = 'Year: {frame_time}', 
                    x = 'Life Expectancy',
                    y = 'Income') +
           transition_time(year.2) +
           ease_aes('linear')
###
animate(w, renderer = gifski_renderer(),
          rewind = TRUE)

#Random sample of 500 gas stations

samp <- poc %>% ungroup() %>% sample_n(500)

#gas stations

## preparing data
#poc <- read_csv("https://raw.githubusercontent.com/JSchrager94/STA-553/main/Data/POC.csv")
poc.site <- samp[samp$POC == 1,]
# geo styling
geostyle <- list(scope = 'usa',
                 projection = list(type = 'albers usa'),
                 showland = TRUE,
                 landcolor = toRGB("gray95"),
                 subunitcolor = toRGB("gray85"),
                 countrycolor = toRGB("gray85"),
                 countrywidth = 0.5,
                 subunitwidth = 0.5
               )
## plotting map
fig <- plot_geo(samp, lat = ~ycoord, lon = ~xcoord) %>%
       add_markers(text = ~SITE_DESCRIPTION, 
                   color = "red", 
                   symbol = I("circle"), 
                   size = I(8), 
                   hoverinfo = "text" )   %>%
        layout( title = 'Gas stations', geo = geostyle)
fig