## LOADING REQUIRED PACKAGES
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 0.8.5
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ---------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(readr)
library(shinythemes)
library(qpcR)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: minpack.lm
## Loading required package: rgl
## Loading required package: robustbase
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(rsconnect)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
pdf(NULL)
## READING LIVE DATA FROM HUMDATA
## CONFIRMED CASES
url1<-"https://data.humdata.org/hxlproxy/api/data-preview.csv?url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_covid19_confirmed_global.csv&filename=time_series_covid19_confirmed_global.csv"
conff<-read.csv(url1, stringsAsFactors =F)
conf<-read.csv(url1, stringsAsFactors =F)
##CONFIRMED DEATHS
url2<-"https://data.humdata.org/hxlproxy/api/data-preview.csv?url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_covid19_deaths_global.csv&filename=time_series_covid19_deaths_global.csv"
deathh<-read.csv(url2, stringsAsFactors =F)
death<-read.csv(url2, stringsAsFactors =F)
##CONFIRMED RECOVERED
url3<-"https://data.humdata.org/hxlproxy/api/data-preview.csv?url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_covid19_recovered_global.csv&filename=time_series_covid19_recovered_global.csv"
recc<-read.csv(url3, stringsAsFactors =F)
rec<-read.csv(url3, stringsAsFactors =F)
## RENAMING COLUMNS, ADDING AND UPDATING DATE FORMAT
colnames(conf)<-as.Date(gsub("X","",colnames(conf)),"%m.%d.%y")
colnames(conf)[1:4]<-c("Region","country","lat","lon")
colnames(death)<-as.Date(gsub("X","",colnames(death)),"%m.%d.%y")
colnames(death)[1:4]<-c("Region","country","lat","lon")
colnames(rec)<-as.Date(gsub("X","",colnames(rec)),"%m.%d.%y")
colnames(rec)[1:4]<-c("Region","country","lat","lon")
## PLEASE DONT USE THIS FOR ANALYSIS - THIS DATA SET IS LARGE - WE PLAN TO USE THIS FOR FURTHER ANALYSIS IF TIME ALLOWS
full_data <- qpcR:::cbind.na(conf,death,rec)
## A GRAPH FOR TOTAL CASES
graph1<- function (Countryname){
Country<-as.character(Countryname)
a<-conf%>%
filter(country==Country) %>%
t()
conf1<-data.frame("confirmed"=a[-1:-4,])
conf1$confirmed<-as.numeric(as.character(conf1$confirmed))
b<-death%>%
filter(country==Country) %>%
t()
death1<-data.frame("death"=b[-1:-4,])
death1$death<-as.numeric(as.character(death1$death))
c<-rec%>%
filter(country==Country) %>%
t()
rec1<-data.frame("recovered"=c[-1:-4,])
rec1$recovered<-as.numeric(as.character(rec1$recovered))
g<-data.frame("date"=as.Date(row.names(conf1)),
"number_confirmed"=conf1[,1],"number_death"=death1[,1],
"number_recovered"=rec1[,1])
plot_ly(g, x = ~date, y = ~number_confirmed) %>%
add_trace(y = ~number_death, name = "deaths",mode = 'lines+markers') %>%
add_trace(y = ~number_recovered, name = "recovered",mode = 'lines+markers') %>%
add_trace(y = ~number_confirmed, name = "confirmed", mode = 'lines+markers') %>%
layout(title = "Total Number of Cases",
xaxis = list(title = "Date"),
yaxis = list (title = "Number of Cases"))
}
## A GRAPH FOR TOTAL CASES COMPARISON
graph_1<- function (Countryname){
Country<-as.character(Countryname)
a<-conf%>%
filter(country==Country) %>%
t()
conf1<-data.frame("confirmed"=a[-1:-4,])
conf1$confirmed<-as.numeric(as.character(conf1$confirmed))
b<-death%>%
filter(country==Country) %>%
t()
death1<-data.frame("death"=b[-1:-4,])
death1$death<-as.numeric(as.character(death1$death))
c<-rec%>%
filter(country==Country) %>%
t()
rec1<-data.frame("recovered"=c[-1:-4,])
rec1$recovered<-as.numeric(as.character(rec1$recovered))
g_<-data.frame("date"=as.Date(row.names(conf1)),
"number_confirmed"=conf1[,1],"number_death"=death1[,1],
"number_recovered"=rec1[,1])
plot_ly(g_, x = ~date, y = ~number_confirmed) %>%
add_trace(y = ~number_death, name = "deaths",mode = 'lines+markers') %>%
add_trace(y = ~number_recovered, name = "recovered",mode = 'lines+markers') %>%
add_trace(y = ~number_confirmed, name = "confirmed", mode = 'lines+markers') %>%
layout(title = "Total Number of Cases",
xaxis = list(title = "Date"),
yaxis = list (title = "Number of Cases"))
}
## GETTING MAP DATA
map<-conf[,1:4]
map<-map %>%
mutate("confirmed"=conf[,ncol(conf)]) %>%
mutate("death"=death[,ncol(death)]) %>%
qpcR:::cbind.na("recovered"=rec[,ncol(rec)])
library(leaflet)
map1<- leaflet() %>%
addProviderTiles("CartoDB") %>%
addCircleMarkers(data =map, radius =2,
label= ~ paste0("Confirmed Cases= ",confirmed," Death=", death, "Recovered=", recovered),
popup = ~ country)
## Assuming "lon" and "lat" are longitude and latitude, respectively
## Warning in validateCoords(lng, lat, funcName): Data contains 1 rows with either
## missing or invalid lat/lon values and will be ignored
## GETTING DATA FOR CLUSTERING AND ADDING RECOVERY RATE
n<-conf[-3:-(ncol(conf)-1)]
colnames(n)[3]<-"confirmed"
n$death<-death[,ncol(death)]
###column binding with NA
n <- qpcR:::cbind.na(n,rec[,ncol(rec)])
colnames(n)[5] <- "recovered"
n <- n %>% mutate(recoveryrate = ((n$recovered)/(n$confirmed)*100))
n_data<-n %>%
group_by(country) %>%
summarise(conf=sum(confirmed),death=sum(death),rec=sum(recovered, na.rm = TRUE), recrate=sum(recoveryrate))
## FILTERING RECOVERY RATES THAT ARE ABOVE 100%
n_data_recovery <- n_data %>% filter(recrate < 100)
### DATA FOR CLUSTER ANALYSIS
pop1<-read.csv("pop.csv",stringsAsFactors =F)
pop1<-pop1 %>%
filter(Time==2019)
pop<-pop1[,7:9]
pop$country<-pop1$Location
for (i in 1:nrow(pop)){
if(pop[i,4]=="Iran (Islamic Republic of)"){
pop[i,4]="Iran"
}
else if( pop[i,4]=="United Kingdom"){
pop[i,4]="UK"
}
else if(pop[i,4]=="United States of America"){
pop[i,4]="US"
}
}
## FINAL CLUSTERING DATA
cl_data<-left_join(n_data,pop, by='country') %>% na.omit()
### K MEANS FOR CLUSTER SIMILARITY - WHICH COUNTRIES ARE SIMILAR ?
# scaled<-scale(cl_data[-1])
# cl<-kmeans(scaled,5)
# cl_data$cluster<-as.numeric(cl$cluster)
g2<-cl_data%>%
arrange(desc(conf)) %>%
top_n(20,conf)
g3<-cl_data%>%
arrange(desc(death)) %>%
top_n(20,death)
g4 <- cl_data %>%
arrange(desc(rec)) %>%
top_n(20,rec)
g6 <- cl_data %>%
arrange(desc(recrate)) %>%
top_n(20,recrate)
graph2 <- plot_ly(data = g2, x=~PopTotal, y = ~conf, z = ~cluster, text = ~country, color = ~cluster) %>%
add_text(textposition = "top right") %>%
layout(autosize = F, scene = list(xaxis = list(title = 'Total Population'),
yaxis = list(title = 'Confirmed Cases')))
graph3 <- plot_ly(data = g3, x=~PopTotal, y = ~death, z = ~cluster, text = ~country, color = ~cluster) %>%
add_text(textposition = "top right") %>%
layout(autosize = F, scene = list(xaxis = list(title = 'Total Population'),
yaxis = list(title = 'Confirmed Deaths')))
graph4 <- plot_ly(data = g4, x=~PopTotal, y = ~rec, z = ~cluster, text = ~country, color = ~cluster) %>%
add_text(textposition = "top right") %>%
layout(autosize = F, scene = list(xaxis = list(title = 'Total Population'),
yaxis = list(title = 'Recovered Cases')))
## ADDING DATA FOR CHOROPLETH MAPS, AND GETTING COUNTRY CODES
Country_Codes <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv")
## inner joining country_codes with n_data
colnames(Country_Codes)[1] <- "country"
## FIXING SOME COUNTRY NAMES
Country_Codes <- Country_Codes %>%
mutate(country = ifelse(as.character(country) == "United States", "US", as.character(country))) %>%
mutate(country = ifelse(as.character(country) == "Congo, Democratic Republic of the","Congo (Brazzaville)", as.character(country))) %>%
mutate(country = ifelse(as.character(country) == "Congo, Republic of the","Congo (Kinshasa)", as.character(country)))
n_cc <- inner_join(n_data,Country_Codes, by = "country")
## LOG TRANSFORMATION - MIGHT USE LATER AND MUTATING TO ADD NEW COLUMNS
n_cc <- n_cc %>%
mutate(lconf = log(n_cc$conf)) %>%
mutate(ldeath = log(n_cc$death)) %>%
mutate(lrec = log(n_cc$rec))
n_cc_recovery <- n_cc %>% filter(recrate < 100)
## ADDING ACTUAL CHROLOPLETH MAPS
confirmed_ch <- plot_ly(n_cc, type = 'choropleth', locations = n_cc$CODE, z=(n_cc$conf), text=n_cc$country,colorscale="Viridis", reversescale=TRUE) %>%
layout(title = "Confirmed Case Counts For all Countries")
death_ch <- plot_ly(n_cc, type = 'choropleth', locations = n_cc$CODE, z=(n_cc$death), text=n_cc$country,colorscale="Reds",reversescale=FALSE) %>%
layout(title = "Death Counts For all Countries")
recovered_ch <- plot_ly(n_cc, type = 'choropleth', locations = n_cc$CODE, z=(n_cc$rec), text=n_cc$country,colorscale="Greens",reversescale=TRUE) %>%
layout(title = "Recovery Counts For all Countries")
recoveryrate_ch <- plot_ly(n_cc_recovery, type = 'choropleth', locations = n_cc_recovery$CODE, z=(n_cc_recovery$recrate), text=n_cc_recovery$country,colorscale="Greens",reversescale=TRUE) %>%
layout(title = "Recovery Rate For all Countries")
library(ggpubr)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
## ADDING CORR MATRIX FOR CONFIRMED, DEATHS, RECOVERED, ABD GDP - DOES GDP IMPACT ANY OF THOE THINGS?
n_cc_corr <- cor(n_cc[,2:6])
## ADDING PIE CHART - MIGHT USE LATER
n_cc_pie <- n_data %>% summarise(confirmed_=sum(conf), deaths_=sum(death), recovered_=sum(rec))