Assignment 10 - plotly & Uncertainty

Group 1, John Hope (jah9kqn)

Due Date: 11:59pm, Mar 20

Group Homework

Part 1

Part 1: Results

library(tidyverse) # load tidyr package
library(plotly) # load plotly package

data(EuStockMarkets) # load EuStockMarkets
dat <- as.data.frame(EuStockMarkets) # coerce it to a data frame
dat$time <- time(EuStockMarkets) # add `time` variable

# Converting to long-dformat
new_data<-gather(data=dat,key = "Stock",value = "Price",1:4)

# Setting colors
colors<-c("firebrick2","dodgerblue3","mediumseagreen","magenta4")

# Create plot
plot_ly(x=new_data$time, y=new_data$Price, color = ~new_data$Stock, 
        colors = colors ,type = "scatter",mode="lines") %>% 
  layout(title="",xaxis=list(title = list(text = "time")),yaxis=list(title=list(text="price"))) 

Part 2

library(foreign)
library(boot)

# Read data
scs <- read.spss("/Users/johnhope/Desktop/DS3003/Data/SCS_QE.sav", to.data.frame=TRUE)

# Bootstrap function
b.stat <- function(data, i)
{
   b.dat <- data[i ,]
   out.lm <- lm(mathpre ~ mars, b.dat)
   predict(out.lm, data.frame(mars=scs$mars))   
}

# 2000 replicas
b.out <- boot(scs, b.stat, R = 2000)

# Bounds
b.ci <- t(sapply(1:nrow(scs), function(x) boot.ci(b.out, index = x, type = 'perc')$percent))[, 4:5]

# Rename columns
dimnames(b.ci) <- list(rownames(scs), c('lower', 'upper'))

# Bind data
scs2 <- cbind(scs, b.ci)

# Create plot
ggplot(scs2, aes(x=mars, y=mathpre)) + geom_point(alpha=0.2) + labs(x = 'math anxiety ranking', y = 'math pretest score') + theme_bw() + 
        geom_smooth(method='lm', formula= y~x, se = FALSE) +
        geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.3, fill="#69b3a2")

Part 3

Part 3: Instruction

Part 3: Instruction (Cont’d)

# Read data
covid <- read.csv("/Users/johnhope/Desktop/DS3003/Data/WHO-COVID-19-global-table-data.csv", 
                  row.names = NULL)

# Renaming columns
covid<-covid %>% 
  rename(deaths_cum_total=Deaths...cumulative.total, 
         cases_cum_total=Cases...cumulative.total)
colnames(covid)[1] <- 'Country'
colnames(covid)[2] <- 'WHO_region'

# Fiiltering to 3 countries per region and finding mortality rate
covid_countries <- covid %>% 
  filter(Country %in% c("Algeria","Congo","Liberia","Mexico","Canada",
                        "United States of America","France","Germany",
                        "Italy","Nepal","India","Thailand","Australia",
                        "Republic of Korea","Singapore","Bahrain","Iraq",
                        "Libya")) %>% 
  mutate(rate=(deaths_cum_total)/(cases_cum_total),
         SE= sqrt(rate*(1-rate)/(cases_cum_total)))

is.nan.data.frame <- function(x){
   do.call(cbind, lapply(x, is.nan))
}

# Create plot
ggplot(covid_countries,aes(x=Country,y=rate))+
   geom_bar(stat="identity")+
   facet_grid(~WHO_region,scale="free")+
  geom_errorbar(aes(ymin=rate-1.96*SE, 
                    ymax=rate+1.96*SE), 
                width=.2,
                position=position_dodge(.9))+
  labs(y = "Mortality rate",x="Country")+
  theme(axis.text.x = element_text(hjust = 1))+theme(axis.text.x = element_text(angle = 45))