final-1.R

## LOADING REQUIRED PACKAGES


library(tidyverse)

## -- Attaching packages ------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.0     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   0.8.5
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0

## -- Conflicts ---------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(ggplot2)
library(readr)
library(shinythemes)
library(qpcR)

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: minpack.lm

## Loading required package: rgl

## Loading required package: robustbase

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

library(rsconnect)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:MASS':
## 
##     select

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

pdf(NULL)


## READING LIVE DATA FROM HUMDATA

## CONFIRMED CASES 
url1<-"https://data.humdata.org/hxlproxy/api/data-preview.csv?url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_covid19_confirmed_global.csv&filename=time_series_covid19_confirmed_global.csv"
conff<-read.csv(url1, stringsAsFactors =F)
conf<-read.csv(url1, stringsAsFactors =F)

##CONFIRMED DEATHS
url2<-"https://data.humdata.org/hxlproxy/api/data-preview.csv?url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_covid19_deaths_global.csv&filename=time_series_covid19_deaths_global.csv"
deathh<-read.csv(url2, stringsAsFactors =F)
death<-read.csv(url2, stringsAsFactors =F)

##CONFIRMED RECOVERED 
url3<-"https://data.humdata.org/hxlproxy/api/data-preview.csv?url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_covid19_recovered_global.csv&filename=time_series_covid19_recovered_global.csv"
recc<-read.csv(url3, stringsAsFactors =F)
rec<-read.csv(url3, stringsAsFactors =F)

## RENAMING COLUMNS, ADDING AND UPDATING DATE FORMAT

colnames(conf)<-as.Date(gsub("X","",colnames(conf)),"%m.%d.%y")
colnames(conf)[1:4]<-c("Region","country","lat","lon")


colnames(death)<-as.Date(gsub("X","",colnames(death)),"%m.%d.%y")
colnames(death)[1:4]<-c("Region","country","lat","lon")


colnames(rec)<-as.Date(gsub("X","",colnames(rec)),"%m.%d.%y")
colnames(rec)[1:4]<-c("Region","country","lat","lon")


## PLEASE DONT USE THIS FOR ANALYSIS - THIS DATA SET IS LARGE - WE PLAN TO USE THIS FOR FURTHER ANALYSIS IF TIME ALLOWS 
full_data <- qpcR:::cbind.na(conf,death,rec)


## A GRAPH FOR TOTAL CASES 
graph1<- function (Countryname){
  
Country<-as.character(Countryname)

a<-conf%>%
  filter(country==Country) %>%
  t()
conf1<-data.frame("confirmed"=a[-1:-4,])
conf1$confirmed<-as.numeric(as.character(conf1$confirmed))

b<-death%>%
  filter(country==Country) %>%
  t()

death1<-data.frame("death"=b[-1:-4,])
death1$death<-as.numeric(as.character(death1$death))

c<-rec%>%
  filter(country==Country) %>%
  t()
rec1<-data.frame("recovered"=c[-1:-4,])
rec1$recovered<-as.numeric(as.character(rec1$recovered))

g<-data.frame("date"=as.Date(row.names(conf1)),
                   "number_confirmed"=conf1[,1],"number_death"=death1[,1],
                   "number_recovered"=rec1[,1])

plot_ly(g, x = ~date, y = ~number_confirmed) %>%
  add_trace(y = ~number_death, name = "deaths",mode = 'lines+markers') %>%
  add_trace(y = ~number_recovered, name = "recovered",mode = 'lines+markers') %>%
  add_trace(y = ~number_confirmed, name = "confirmed", mode = 'lines+markers') %>%
  layout(title = "Total Number of Cases",
         xaxis = list(title = "Date"),
         yaxis = list (title = "Number of Cases"))

}

## A GRAPH FOR TOTAL CASES COMPARISON

graph_1<- function (Countryname){
  
  Country<-as.character(Countryname)
  
  a<-conf%>%
    filter(country==Country) %>%
    t()
  conf1<-data.frame("confirmed"=a[-1:-4,])
  conf1$confirmed<-as.numeric(as.character(conf1$confirmed))
  
  b<-death%>%
    filter(country==Country) %>%
    t()
  
  death1<-data.frame("death"=b[-1:-4,])
  death1$death<-as.numeric(as.character(death1$death))
  
  c<-rec%>%
    filter(country==Country) %>%
    t()
  rec1<-data.frame("recovered"=c[-1:-4,])
  rec1$recovered<-as.numeric(as.character(rec1$recovered))
  
  g_<-data.frame("date"=as.Date(row.names(conf1)),
                "number_confirmed"=conf1[,1],"number_death"=death1[,1],
                "number_recovered"=rec1[,1])
  
  plot_ly(g_, x = ~date, y = ~number_confirmed) %>%
    add_trace(y = ~number_death, name = "deaths",mode = 'lines+markers') %>%
    add_trace(y = ~number_recovered, name = "recovered",mode = 'lines+markers') %>%
    add_trace(y = ~number_confirmed, name = "confirmed", mode = 'lines+markers') %>%
    layout(title = "Total Number of Cases",
           xaxis = list(title = "Date"),
           yaxis = list (title = "Number of Cases"))
  
}


## GETTING MAP DATA 
map<-conf[,1:4]
map<-map %>%
  mutate("confirmed"=conf[,ncol(conf)]) %>%
  mutate("death"=death[,ncol(death)]) %>% 
  qpcR:::cbind.na("recovered"=rec[,ncol(rec)])

library(leaflet)

map1<- leaflet() %>% 
  addProviderTiles("CartoDB") %>% 
  addCircleMarkers(data =map, radius =2, 
  label= ~ paste0("Confirmed Cases= ",confirmed," Death=", death, "Recovered=", recovered),
  popup = ~ country)

## Assuming "lon" and "lat" are longitude and latitude, respectively

## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with either
## missing or invalid lat/lon values and will be ignored

## GETTING DATA FOR CLUSTERING AND ADDING RECOVERY RATE

n<-conf[-3:-(ncol(conf)-1)]
colnames(n)[3]<-"confirmed"
n$death<-death[,ncol(death)]
###column binding with NA
n <- qpcR:::cbind.na(n,rec[,ncol(rec)])
colnames(n)[5] <- "recovered"
n <- n %>% mutate(recoveryrate = ((n$recovered)/(n$confirmed)*100))


n_data<-n %>%
       group_by(country) %>%
       summarise(conf=sum(confirmed),death=sum(death),rec=sum(recovered, na.rm = TRUE), recrate=sum(recoveryrate))


## FILTERING RECOVERY RATES THAT ARE ABOVE 100% 

n_data_recovery <- n_data %>% filter(recrate < 100)

### DATA FOR CLUSTER ANALYSIS 

pop1<-read.csv("pop.csv",stringsAsFactors =F)
pop1<-pop1 %>%
  filter(Time==2019)
pop<-pop1[,7:9]
pop$country<-pop1$Location
for (i in 1:nrow(pop)){
  if(pop[i,4]=="Iran (Islamic Republic of)"){
    pop[i,4]="Iran"
  } 
  else if( pop[i,4]=="United Kingdom"){
    pop[i,4]="UK"
  }
  else if(pop[i,4]=="United States of America"){
    pop[i,4]="US"
  }        

}

## FINAL CLUSTERING DATA 
cl_data<-left_join(n_data,pop, by='country') %>% na.omit()

### K MEANS FOR CLUSTER SIMILARITY - WHICH COUNTRIES ARE SIMILAR ?

# scaled<-scale(cl_data[-1])
# cl<-kmeans(scaled,5)
# cl_data$cluster<-as.numeric(cl$cluster)

g2<-cl_data%>%
  arrange(desc(conf)) %>%
  top_n(20,conf)

g3<-cl_data%>%
  arrange(desc(death)) %>%
  top_n(20,death) 

g4 <- cl_data %>%
  arrange(desc(rec)) %>%
  top_n(20,rec)

g6 <- cl_data %>%
  arrange(desc(recrate)) %>%
  top_n(20,recrate)

graph2 <- plot_ly(data = g2, x=~PopTotal, y = ~conf, z = ~cluster, text = ~country, color = ~cluster) %>% 
  add_text(textposition = "top right") %>% 
  layout(autosize = F, scene = list(xaxis = list(title = 'Total Population'),
                                    yaxis = list(title = 'Confirmed Cases')))


graph3 <- plot_ly(data = g3, x=~PopTotal, y = ~death, z = ~cluster, text = ~country, color = ~cluster) %>% 
  add_text(textposition = "top right") %>% 
  layout(autosize = F, scene = list(xaxis = list(title = 'Total Population'),
                                    yaxis = list(title = 'Confirmed Deaths')))


graph4 <- plot_ly(data = g4, x=~PopTotal, y = ~rec, z = ~cluster, text = ~country, color = ~cluster) %>% 
  add_text(textposition = "top right") %>%
  layout(autosize = F, scene = list(xaxis = list(title = 'Total Population'),
                                    yaxis = list(title = 'Recovered Cases')))

## ADDING DATA FOR CHOROPLETH MAPS, AND GETTING COUNTRY CODES 

Country_Codes <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv")
## inner joining country_codes with n_data
colnames(Country_Codes)[1] <- "country"

## FIXING SOME COUNTRY NAMES 
Country_Codes <- Country_Codes %>% 
  mutate(country = ifelse(as.character(country) == "United States", "US", as.character(country))) %>% 
  mutate(country = ifelse(as.character(country) == "Congo, Democratic Republic of the","Congo (Brazzaville)", as.character(country))) %>% 
  mutate(country = ifelse(as.character(country) == "Congo, Republic of the","Congo (Kinshasa)", as.character(country)))

n_cc <- inner_join(n_data,Country_Codes, by = "country")

## LOG TRANSFORMATION - MIGHT USE LATER AND MUTATING TO ADD NEW COLUMNS 
n_cc <- n_cc %>%
  mutate(lconf = log(n_cc$conf)) %>%
  mutate(ldeath = log(n_cc$death)) %>%
  mutate(lrec = log(n_cc$rec)) 

n_cc_recovery <- n_cc %>% filter(recrate < 100)

## ADDING ACTUAL CHROLOPLETH MAPS

confirmed_ch <- plot_ly(n_cc, type = 'choropleth', locations = n_cc$CODE, z=(n_cc$conf), text=n_cc$country,colorscale="Viridis", reversescale=TRUE) %>%
  layout(title = "Confirmed Case Counts For all Countries") 
death_ch <- plot_ly(n_cc, type = 'choropleth', locations = n_cc$CODE, z=(n_cc$death), text=n_cc$country,colorscale="Reds",reversescale=FALSE) %>%
  layout(title = "Death Counts For all Countries") 
recovered_ch <- plot_ly(n_cc, type = 'choropleth', locations = n_cc$CODE, z=(n_cc$rec), text=n_cc$country,colorscale="Greens",reversescale=TRUE) %>%
  layout(title = "Recovery Counts For all Countries") 
recoveryrate_ch <- plot_ly(n_cc_recovery, type = 'choropleth', locations = n_cc_recovery$CODE, z=(n_cc_recovery$recrate), text=n_cc_recovery$country,colorscale="Greens",reversescale=TRUE) %>%
  layout(title = "Recovery Rate For all Countries") 


library(ggpubr)
library(magrittr)

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

## ADDING CORR MATRIX FOR CONFIRMED, DEATHS, RECOVERED, ABD GDP - DOES GDP IMPACT ANY OF THOE THINGS? 
n_cc_corr <- cor(n_cc[,2:6])

## ADDING PIE CHART - MIGHT USE LATER 
n_cc_pie <- n_data %>% summarise(confirmed_=sum(conf), deaths_=sum(death), recovered_=sum(rec))

final-1.R

Haris

2021-01-11