##1.1 Implementing Kernel Regression

###1.1.1 Function that sample iid data-points (x,y)

sample_f <- function(n = 1, use_x=c(), lambda, sigma2=0.3) {
  len <- length(use_x)
  if (len > 0) { #the condition of the vector x_use
    epsilon <- rnorm(length(use_x), 0, sigma2)
    f_x <- sin(lambda*use_x)+0.3*(use_x^2)+((use_x-0.4)/3)^3
    y_given_x <-  f_x + epsilon
    return(as.data.frame(cbind(use_x, y_given_x)))
    }
  if (len == 0) { #the same function but Instead of using use_x vector we grill a new use_x vector out of a uniform distribution. 
    epsilon <- rnorm(n, 0, sigma2)
    x <- runif(n, -2, 2)
    f_x <- sin(lambda*x)+0.3*(x^2)+((x-0.4)/3)^3
    y_given_x <- f_x + epsilon
    return(as.data.frame(cbind(x,y_given_x)))
    }
  }

###1.1.2 implement of Kernel regression

kernel_regression <- function(train_x, train_y, h, test_x){
  kx <- sapply(train_x, function(xi) dnorm((train_x-xi)/h)/h)
  w <- kx/rowSums(kx) 
  y_hat <- w %*% train_y
  return(list(y_hat, w))
}

#we took the function idea from bookdown.org. there is a link at the end of the lab

###1.1.3 training data set with \(\lambda = 1.5\) and \(n = 60\)

samp_df <- sample_f(n=60, lambda = 1.5)

train_x <- samp_df$x
train_y <- samp_df$y_given_x
test_x <- train_x
re <- kernel_regression(train_x, train_y,0.5, test_x)
y_hat <- re[[1]]

comb <- data.frame(y_hat = y_hat, x = train_x, y = train_y)

colors <- c("pred" = "magenta3", "train" = "navy")

plt <- ggplot(comb, aes(x = x)) +
  geom_line(aes(y = y_hat, color = "pred"), size = 1) +
  geom_point(aes(y = y, color = "train"), size = 2, alpha = 0.6, shape = 19) +
  labs(x = "X",
       y = "Y",
       color = "Legend") +
  scale_color_manual(values = colors)+ggtitle("Train Set VS Kernel Regression Prediction")

plt

##1.2 Regression errorsa for Kernel Regression

###1.2.1 sample training set

For all combination of \(\lambda = 1.5, 5\) and \(n = 60, 300\) and for different values of \(h\), we’ll be using the 2 functions from 1.1.

samp_60_1.5 <- sample_f(n=60, lambda = 1.5)
samp_60_1.5$n_is <- 60
samp_60_1.5$lambda_is <- 1.5

samp_60_5 <- sample_f(n=60, lambda = 5)
samp_60_5$n_is <- 60
samp_60_5$lambda_is <- 5

samp_300_1.5 <- sample_f(n=300, lambda = 1.5)
samp_300_1.5$n_is <- 300
samp_300_1.5$lambda_is <- 1.5

samp_300_5 <- sample_f(n=300, lambda = 5)
samp_300_5$n_is <- 300
samp_300_5$lambda_is <- 5


h_vec <- seq(0.5, 12, 0.5)

data_fun <- function(data, vector_h){
  x <- data$x
  y <- data$y_given_x
  for (h in vector_h) {
    re <- kernel_regression(x, y, h, x)
    y_hat <- re[[1]]
    data[paste0("y hat for h=",h)] <- y_hat
    
  }
  return(data)
}

samp_60_1.5_final <- data_fun(samp_60_1.5, h_vec)
samp_60_5_final <- data_fun(samp_60_5, h_vec)
samp_300_1.5_final <- data_fun(samp_300_1.5, h_vec)
samp_300_5_final <- data_fun(samp_300_5, h_vec)

###a compute the empirical error (training set error)- [err], based on the X’s in the trainig set

err_func <- function(data){
 err <- NULL
 for (i in 5:28) {
   err <- c(err, mean((data[,i]-data[,2])^2))
 }
 err <- data.frame(err)
 err$h <- h_vec
 err$n <- data[1,3]
 err$lambda <- data[1,4]
 return(err)
}

err_of_60_1.5 <- err_func(samp_60_1.5_final)
err_of_60_5 <- err_func(samp_60_5_final)
err_of_300_1.5 <- err_func(samp_300_1.5_final)
err_of_300_5 <- err_func(samp_300_5_final)

err <- rbind(err_of_60_1.5, err_of_60_5, err_of_300_1.5, err_of_300_5)


knitr::kable(head(err, 15))
err h n lambda
0.1458396 0.5 60 1.5
0.2651012 1.0 60 1.5
0.4041466 1.5 60 1.5
0.5145378 2.0 60 1.5
0.5882425 2.5 60 1.5
0.6360003 3.0 60 1.5
0.6676600 3.5 60 1.5
0.6893954 4.0 60 1.5
0.7048415 4.5 60 1.5
0.7161621 5.0 60 1.5
0.7246837 5.5 60 1.5
0.7312478 6.0 60 1.5
0.7364056 6.5 60 1.5
0.7405287 7.0 60 1.5
0.7438748 7.5 60 1.5

###b compute the expected optimism [Eop] based on the X’s in the trainig set

\(E_{op} = \frac{2\sigma^2}{n}Tr(w)\)

\(\sigma^2 = Var(\epsilon) = 0.3\)

\(w \in \mathbb{R}^{n \times n}\) is a matrix that represents the weights and Tr(w) is the trace of the weight matrix.

eop_func <- function(data,vector_h, sigma=0.3) {
  n <- data[1,3]
  lambda <- data[1,4]
  tr_w <- c()
  x <- data$x
  y <- data$y_given_x
  for (h in vector_h) {
    re <- kernel_regression(x, y, h, x)
    w <- re[[2]]
    tr_w <- c(tr_w, sum(diag(w)))
  }
  tr_w <- data.frame(tr_w)
  tr_w$h <- vector_h
  tr_w$n <- n
  tr_w$lambda <- lambda
  eop <- tr_w
  eop$Eop <- (2*sigma)/n*eop$tr_w
  return(eop[-1]) #remove the trace column
}

eop_of_60_1.5 <- eop_func(samp_60_1.5, h_vec)
eop_of_60_5 <- eop_func(samp_60_5, h_vec)
eop_of_300_1.5 <- eop_func(samp_300_1.5, h_vec)
eop_of_300_5 <- eop_func(samp_300_5, h_vec)

Eop <- rbind(eop_of_60_1.5, eop_of_60_5, eop_of_300_1.5, eop_of_300_5)

Eop <- Eop[, c("Eop", "h", "n", "lambda")]

knitr::kable(head(Eop, 15))
Eop h n lambda
0.0376335 0.5 60 1.5
0.0216912 1.0 60 1.5
0.0162580 1.5 60 1.5
0.0137448 2.0 60 1.5
0.0124496 2.5 60 1.5
0.0117161 3.0 60 1.5
0.0112658 3.5 60 1.5
0.0109711 4.0 60 1.5
0.0107681 4.5 60 1.5
0.0106226 5.0 60 1.5
0.0105147 5.5 60 1.5
0.0104326 6.0 60 1.5
0.0103687 6.5 60 1.5
0.0103179 7.0 60 1.5
0.0102770 7.5 60 1.5

###c Estimate the accuracy of the regression using 5-fold cross-validation error.

\(\widehat{EPE}_{CV} = \frac{1}{k}\sum_{i=1}^{k}{\widehat{EPE}_{i}}\) when, \(\widehat{EPE}_{i} = \frac{k}{n}\sum_{j\in B_i}(\hat{f}_{-i}(x_j)-y_j)^2\) in our case \(k=5\)

acc_func <- function(data, vector_h, k){
  n <- data[1,3]
  lambda <- data[1,4]
  data <- data[-4]
  data <- data[-3]
  EPE <- c()
  for (h in vector_h) {
    samp_data <- data[sample(nrow(data)),]
    EPE_2 <- c()
    five_folds <- cut(seq(1,nrow(samp_data)), breaks=k,labels=FALSE)
    for (fold in 1:k) {
      index <- which(five_folds==fold, arr.ind= TRUE)
      train_data <- samp_data[-index,]
      test_data <- samp_data[index,]
      x <- train_data$x
      y <- train_data$y_given_x
      ker <- kernel_regression(x,y,h,test_data$x)
      y_hat_kernel <- ker[[1]]
      epe <- c((k/n)*sum((y_hat_kernel-y)^2))
      EPE_2 <- cbind(EPE_2, epe)
    }
    EPE <- rbind(EPE, c(mean(EPE_2),n,lambda,h))
  }
  return(data.frame(EPE))
}

acc_60_1.5 <- acc_func(samp_60_1.5, h_vec, 5)
acc_60_5 <- acc_func(samp_60_5, h_vec, 5)
acc_300_1.5 <- acc_func(samp_300_1.5, h_vec, 5)
acc_300_5 <- acc_func(samp_300_5, h_vec, 5)

accuracy <- data.frame(rbind(acc_60_1.5, acc_60_5, acc_300_1.5, acc_300_5))
colnames(accuracy) <- c("accuracy", "n", "lambda", "h")

accuracy <- accuracy[, c("accuracy", "h", "n", "lambda")]

knitr::kable(head(accuracy, 15))
accuracy h n lambda
0.5742686 0.5 60 1.5
1.0488981 1.0 60 1.5
1.6105144 1.5 60 1.5
2.0556505 2.0 60 1.5
2.3460287 2.5 60 1.5
2.5363872 3.0 60 1.5
2.6693256 3.5 60 1.5
2.7481665 4.0 60 1.5
2.8086414 4.5 60 1.5
2.8447418 5.0 60 1.5
2.8944190 5.5 60 1.5
2.9070510 6.0 60 1.5
2.9183461 6.5 60 1.5
2.9610769 7.0 60 1.5
2.9617805 7.5 60 1.5

###d Estimate the in-sample expected error (EPE_IN) of your regression for multiple values of h

EPEin_func <- function(data){
  EPE_in <- c()
  n <- data[1,3]
  lambda <- data[1,4]
  for (i in 5:28) {
    epe_in <- NULL
    for (j in 1:150) {
      n_v <- sample_f(n= length(data[,1]), data[,1], lambda = lambda)[,2]
      epe_in <- c(epe_in, mean((n_v-data[,i])^2))
    }
    EPE_in <- c(EPE_in, mean(epe_in))
  }
  EPE_in <- data.frame(EPE_in)
  EPE_in$h <- h_vec
  EPE_in$n <- n
  EPE_in$lambda <- lambda
  return(EPE_in)
}

epein_60_1.5 <- EPEin_func(samp_60_1.5_final)
epein_60_5 <- EPEin_func(samp_60_5_final)
epein_300_1.5 <- EPEin_func(samp_300_1.5_final)
epein_300_5 <- EPEin_func(samp_300_5_final)

EPE_in <- rbind(epein_60_1.5,epein_60_5,epein_300_1.5,epein_300_5)

knitr::kable(head(EPE_in, 15))
EPE_in h n lambda
0.1366662 0.5 60 1.5
0.2778237 1.0 60 1.5
0.4313549 1.5 60 1.5
0.5546971 2.0 60 1.5
0.6369097 2.5 60 1.5
0.6976736 3.0 60 1.5
0.7388078 3.5 60 1.5
0.7598709 4.0 60 1.5
0.7799180 4.5 60 1.5
0.7853039 5.0 60 1.5
0.7938052 5.5 60 1.5
0.8013153 6.0 60 1.5
0.7992805 6.5 60 1.5
0.8148786 7.0 60 1.5
0.8098791 7.5 60 1.5

###e Estimate the out-of-sample expected prediction error (EPE) of your regression function

EPE_func <- function(data, vector_h){
  res_epe <- NULL
  x_train <- data$x
  y_train <- data$y_given_x
  n <- data[1,3]
  lambda <- data[1,4]
  for (h in vector_h) {
    EPE_e <- NULL
    for (j in 1:150) {
      samp_data <- sample_f(n=length(data[,1]), lambda = lambda)
      reg <- kernel_regression(x_train, y_train, h, x_train)
      y_hat <- reg[[2]]
      EPE_e <- c(EPE_e, mean((y_hat-samp_data[,2])^2))
    }
    res_epe <- c(res_epe, mean(EPE_e))
    EPE_e <- NULL
  }
  res_epe <- data.frame(res_epe)
  res_epe$h <- vector_h
  res_epe$n <- n
  res_epe$lambda <- lambda
  return(res_epe)
}

epe_60_1.5 <- EPE_func(samp_60_1.5, h_vec)
epe_60_5 <- EPE_func(samp_60_5, h_vec)
epe_300_1.5 <- EPE_func(samp_300_1.5, h_vec)
epe_300_5 <- EPE_func(samp_300_5, h_vec)

EPE_all <- rbind(epe_60_1.5, epe_60_5, epe_300_1.5, epe_300_5)
colnames(EPE_all) <- c("EPE", "h", "n", "lambda")

knitr::kable(head(EPE_all, 15))
EPE h n lambda
0.9531885 0.5 60 1.5
0.9430308 1.0 60 1.5
0.9408462 1.5 60 1.5
0.9420706 2.0 60 1.5
0.9480169 2.5 60 1.5
0.9229514 3.0 60 1.5
0.9378307 3.5 60 1.5
0.9439105 4.0 60 1.5
0.9432163 4.5 60 1.5
0.9312691 5.0 60 1.5
0.9375390 5.5 60 1.5
0.9321126 6.0 60 1.5
0.9460991 6.5 60 1.5
0.9349653 7.0 60 1.5
0.9399832 7.5 60 1.5

###Ploting all the results

err$group <- "err"
colnames(err)[1] <- "value"
Eop$group <- "Eop"
colnames(Eop)[1] <- "value"
accuracy$group <- "accuracy"
colnames(accuracy)[1] <- "value"
EPE_in$group <- "EPE_in"
colnames(EPE_in)[1] <- "value"
EPE_all$group <- "EPE"
colnames(EPE_all)[1] <- "value"

full_data <- rbind(err,Eop,accuracy,EPE_in, EPE_all)

lambda_1.5 <- subset(full_data, lambda == 1.5)
lambda_5 <- subset(full_data, lambda == 5)

lambda_1.5$n <- as.character(lambda_1.5$n)
lambda_5$n <- as.character(lambda_5$n)

lambda_1.5_plot <- ggplot(lambda_1.5, aes(x=h, y=value, group=interaction(group, n), color=group))+ geom_line(aes(color=group, linetype = n), size = 1)+labs(title = "Regression errors for a differnt N", subtitle = "for lambda = 1.5", color = "type pf error", x = "H")

lambda_5_plot <- ggplot(lambda_5, aes(x=h, y=value, group=interaction(group, n), color=group))+ geom_line(aes(color=group, linetype = n), size = 1)+labs(title = "Regression errors for a differnt N", subtitle = "for lambda = 5", color = "type pf error", x = "H")

ggarrange(lambda_1.5_plot, lambda_5_plot, ncol = 1, nrow = 2)

###Conclusions

err: in both cases, when lambda=1.5 and when lambda = 5, as n and h increase the value of err also slightly increases until it almost reach 1. We can also see that there is not much change between the value of err when n is 60 or 300.

Eop: in both cases, when lambda=1.5 and when lambda = 5, as n and h increase the value of err slightly decreases. . We can also see that there is not much change in the value of err when n is 60 or 300 and when lambda is 1.5 or 5.

Accuracy: As h increases the accuracy also increases. When lambda = 1.5 there is not much difference in the accuracy between different values of n but when lambda is equal to 5 we see the difference more clearly: as n increases the accuracy decreases.

EPE_in: When n=60 as lambda increases the value pf EPE_in is also increases. when n=300 as lambda increases the value pf EPE_in is decreases. As h increases the EPE_in also increases.

EPE: As h and n increases the value of EPE is also increases. As lambda increases the value of EPE decreases.

###1.2.2 using quadratic regression instead of using kernel regression

####a

quadratic_function <- function(data){
  data$x_2 <- data$x^2
  return(lm(y_given_x ~ x + x_2, data = data))
}

samp_60_1.5_quad_all <- quadratic_function(samp_60_1.5)
samp_60_1.5_quad <- samp_60_1.5
samp_60_1.5_quad$x_2 <- samp_60_1.5$x^2
samp_60_1.5_quad$y_hat <- samp_60_1.5_quad_all$fitted.values
  
samp_60_5_quad_all <- quadratic_function(samp_60_5)
samp_60_5_quad <- samp_60_5
samp_60_5_quad$x_2 <- samp_60_5$x^2
samp_60_5_quad$y_hat <- samp_60_5_quad_all$fitted.values
  
samp_300_1.5_quad_all <- quadratic_function(samp_300_1.5)
samp_300_1.5_quad <- samp_300_1.5
samp_300_1.5_quad$x_2 <- samp_300_1.5$x^2
samp_300_1.5_quad$y_hat <- samp_300_1.5_quad_all$fitted.values
  
samp_300_5_quad_all <- quadratic_function(samp_300_5)
samp_300_5_quad <- samp_300_5
samp_300_5_quad$x_2 <- samp_300_5$x^2
samp_300_5_quad$y_hat <- samp_300_5_quad_all$fitted.values

err_quad_func <- function(data){
  n <- data[1,3]
  lambda <- data[1,4]
  err <- mean((data$y_hat-data$y_given_x)^2)
  err <- cbind(err, n, lambda)
}

err_quad_60_1.5 <- err_quad_func(samp_60_1.5_quad)
err_quad_60_5 <- err_quad_func(samp_60_5_quad)
err_quad_300_1.5 <- err_quad_func(samp_300_1.5_quad)
err_quad_300_5 <- err_quad_func(samp_300_5_quad)

err_quad <- rbind(err_quad_60_1.5,err_quad_60_5, err_quad_300_1.5, err_quad_300_5)

knitr::kable(err_quad)
err n lambda
0.2509680 60 1.5
0.4900864 60 5.0
0.2224521 300 1.5
0.4734439 300 5.0

####b

eop_quad_func <- function(data, sigma=0.3) {
  n <- data[1,3]
  lambda <- data[1,4]
  matrix_x <- as.matrix(cbind(1, data[,1], data[,1]^2))
  weight_matrix <- matrix_x %*% solve(t(matrix_x) %*% matrix_x)%*%t(matrix_x)
  #weight_matrix <- data$fitted.value
  tr_w <- sum(diag(weight_matrix))
  tr_w <- as.data.frame(tr_w)
  eop <- tr_w
  eop$Eop <- (2*sigma)/n*eop$tr_w
  eop$n <- n
  eop$lambda <- lambda
  return(eop[-1]) 
}


eop_quad_60_1.5 <- eop_quad_func(samp_60_1.5)
eop_quad_60_5 <- eop_quad_func(samp_60_5)
eop_quad_300_1.5 <- eop_quad_func(samp_300_1.5)
eop_quad_300_5 <- eop_quad_func(samp_300_5)

Eop_quad <- rbind(eop_quad_60_1.5, eop_quad_60_5, eop_quad_300_1.5, eop_quad_300_5)


knitr::kable(Eop_quad)
Eop n lambda
0.030 60 1.5
0.030 60 5.0
0.006 300 1.5
0.006 300 5.0

####c

acc_quad_func <- function(data, k){
  n <- data[1,3]
  lambda <- data[1,4]
  data <- data[-4]
  data <- data[-3]
  EPE <- NULL
  samp_data <- data[sample(nrow(data)),]
  five_folds <- cut(seq(1,nrow(samp_data)), breaks=k,labels=FALSE)
  for (fold in 1:k) {
    index <- which(five_folds==fold, arr.ind= TRUE)
    train_data <- samp_data[-index,]
    test_data <- samp_data[index,]
    y <- train_data$y_given_x
    quad <- quadratic_function(train_data)
    y_hat <- quad$fitted.value
    epe <- c((k/n)*sum((y_hat-y)^2))
    }
  EPE <- rbind(EPE, c(round(epe[1],4),n,lambda))
  return(data.frame(EPE))
  }

acc_60_1.5_quad <- acc_quad_func(samp_60_1.5, 5)
acc_60_5_quad <- acc_quad_func(samp_60_5, 5)
acc_300_1.5_quad <- acc_quad_func(samp_300_1.5, 5)
acc_300_5_quad <- acc_quad_func(samp_300_5, 5)

accuracy_quad <- data.frame(rbind(acc_60_1.5_quad, acc_60_5_quad, acc_300_1.5_quad, acc_300_5_quad))
colnames(accuracy_quad) <- c("accuracy", "n", "lambda")

accuracy_quad <- accuracy_quad[, c("accuracy", "n", "lambda")]

knitr::kable(accuracy_quad)
accuracy n lambda
1.0361 60 1.5
1.9101 60 5.0
0.9323 300 1.5
1.9914 300 5.0

####d

EPEin_quad_func <- function(data){
  EPE_in <- c()
  n <- data[1,3]
  lambda <- data[1,4]
  y_hat <- quadratic_function(data)$fitted.value
  for (j in 1:150) {
    n_v <- sample_f(n= length(data[,1]), data[,1], lambda = lambda)[,2]
    EPE_in <- c(EPE_in, mean((n_v-y_hat)^2))
    }
  EPE_in <- data.frame(mean(EPE_in))
  EPE_in$n <- n
  EPE_in$lambda <- lambda
  colnames(EPE_in) <- c("EPE_in", "n", "lambda")
  return(EPE_in)
}

epein_60_1.5_quad <- EPEin_quad_func(samp_60_1.5)
epein_60_5_quad <- EPEin_quad_func(samp_60_5)
epein_300_1.5_quad <- EPEin_quad_func(samp_300_1.5)
epein_300_5_quad <- EPEin_quad_func(samp_300_5)

EPE_in_quad <- rbind(epein_60_1.5_quad,epein_60_5_quad,epein_300_1.5_quad,epein_300_5_quad)

knitr::kable(EPE_in_quad)
EPE_in n lambda
0.2462005 60 1.5
0.5385703 60 5.0
0.2156308 300 1.5
0.5123152 300 5.0

###e compute the expected optimism [Eop]

EPE_quad_func <- function(data){
  res_epe <- NULL
  x_train <- data$x
  y_train <- data$y_given_x
  n <- data[1,3]
  lambda <- data[1,4]
  EPE_e <- NULL
  for (j in 1:150) {
    samp_data <- sample_f(n=length(data[,1]), lambda = lambda)
    y_hat <- quadratic_function(samp_data)$fitted.value
    EPE_e <- c(EPE_e, mean((y_hat-samp_data[,2])^2))
    }
  EPE_e <- data.frame(mean(EPE_e))
  EPE_e$n <- n
  EPE_e$lambda <- lambda
  colnames(EPE_e) <- c("EPE", "n", "lambda")
  return(EPE_e)
}

epe_60_1.5_quad <- EPE_quad_func(samp_60_1.5)
epe_60_5_quad <- EPE_quad_func(samp_60_5)
epe_300_1.5_quad <- EPE_quad_func(samp_300_1.5)
epe_300_5_quad <- EPE_quad_func(samp_300_5)

EPE_quad_all <- rbind(epe_60_1.5_quad, epe_60_5_quad, epe_300_1.5_quad, epe_300_5_quad)


knitr::kable(EPE_quad_all)
EPE n lambda
0.2104322 60 1.5
0.5443471 60 5.0
0.2183353 300 1.5
0.5585978 300 5.0

###Conclusions

err: As lambda and n are getting bigger the err increases as well. Eop: When n is bigger the value of Eop is smaller. Accuracy: When n is bigger the accuracy is smaller. When lambda is bigger the accuracy increases as well. EPE_in: As lambda and n are getting bigger the EPE_in value increases as well. EPE: As lambda and n are getting bigger the EPE value increases as well.

##2 fMRI Data

In the following part, we will fit prediction models for the brain-activity in measured from a visual-processing area in the brain in response to natural images (X matrix).

Our the data consists of 3 measured responses to each image at 3 locations, voxels, in the brain. Our main goal is to predict the brain response at each voxel to new images.

Our secondary goal is to interpret the prediction models in relation to the scientific problem.

We will fit models using 1500 training examples, and estimate the accuracy of prediction on 250 test examples.

The predictor vector is composed of 2729 image features as vectors, based on Gabor wavelet composition.

Hence, will fit 3 separate regression models.

We will validate model accuracy based on how well they predict the test data and calculate the Mean Squared Prediction Error and the square-root of it:

$ MSPE(f_hat)= 1/250= sigma [(f(Ij ) − Yj)^2]$

and

$RMSPE= sqrt(MSPE) $

#The basic data is as follows:

train responses of training data - 1500 × 3 .

feature train of features for each train image - 1500 × 2729 .

feature test features for each test image - 250 × 2729 .

Next, we will fit a regression model for each column of train resp using the matrix of features in feature train. Later, we will report predictions for the test examples in feature test.

## [1] "train_resp, 1500 x 3"
## [1] "feature_train, 1500 x 2729"
## [1] "feature_test, 250 x 2729"

#2.1 Prediction model

For each voxel 1-3, we will fit a linear model of the features. There are more features than responses,so we will use penalised regression,a method which keep all the predictor variables in the model but regularize the regression coefficients by shrinking them toward zero. A penalized regression method yields a sequence of models, each associated with specific values for one or more tuning parameters.

2.1.1 Model fitting

For each of the three response vectors, we will fit a model using the following steps:

  1. Set aside validation data to evaluate your models after fitting:
# we want to choose 1200 random 80% of data for the training part, and use the rest for validation

split1= sample(c( 0.80 * nrow(train_resp)))

validation_data_Y = train_resp[-split1, ]

train_Y= train_resp[split1,]

train_X=feature_train[split1,]

validation_data_X= feature_train[-split1, ]

#dim(train_Y);dim(train_X); dim(validation_data_X);dim(validation_data_Y)

# n_aside= 1500-250

train_resp2=train_Y # train_resp[1:n_aside, 1:3]

feature_train2=train_X#feature_train[1:n_aside, 1:2729]

train_resp_aside = validation_data_Y# train_resp[(n_aside+1):1500, 1:3]

feature_train_aside= validation_data_X#feature_train[(n_aside+1):1500, 1:2729]

feature_test2=feature_test 
  1. Using the rest of the data to fit ridge regression and lasso regression. we will choose the best model and the best value for the regularisation parameter λ, using cross validation on the training sample.
# Glmnet is a package that fits generalized linear and similar models via penalized 

#cv.glmnet: Cross-validation for glmnet which Does k-fold cross-validation for glmnet, produces a plot, and returns a value for lambda (and gamma if relax=TRUE)

# https://hastie.su.domains/ISLR2/ISLRv2_website.pdf p 282 

# 3 required ridge regression for 3 different response vectors 

feature_train2[,-1] = as.matrix(feature_train2[,-1])

#lambda_reg= 10^seq(9, -5, by = -.1)

lambda_regmodels =10^ seq(0.14, 0.15, by = 0.0001)

# plannig: cv.glment of ridge( a= 0) and lasso (a=1) for each Vi(i=1,2,3), glment of ridge( a= 0) and lasso (a=1) for each Vi(i=1,2,3)
## and then predict of ridge( a= 0) and lasso (a=1) for each Vi(i=1,2,3
#Voxel 1
ridgereg1 = cv.glmnet(x = feature_train2[,-1], y = train_resp2[,1], alpha = 0,nfolds = 5, lambda = lambda_regmodels)
#Voxel 2
ridgereg2 = cv.glmnet(x = feature_train2[,-1], y = train_resp2[,2], alpha = 0,nfolds = 5, lambda = lambda_regmodels)
# 3
ridgereg3 = cv.glmnet(x = feature_train2[,-1], y = train_resp2[,3], alpha = 0,nfolds = 5, lambda = lambda_regmodels)
# 3 required lasso regression for 3 different response vectors 
#Voxel 1
lassoreg1 = cv.glmnet(x = feature_train2[,-1], y = train_resp2[,1], alpha = 1,nfolds = 5, lambda = lambda_regmodels)
#Voxel 2
lassoreg2 = cv.glmnet(x = feature_train2[,-1], y = train_resp2[,2], alpha = 1,nfolds = 5, lambda = lambda_regmodels)
#Voxel 3
lassoreg3 = cv.glmnet(x = feature_train2[,-1], y = train_resp2[,3], alpha = 1,nfolds = 5, lambda = lambda_regmodels)
# looking for the best models, fitting the model and calcualte predictions

# Ridge and  Lasso for each voxel
# V1
best_ridgereg1 = glmnet(x = feature_train2[,-1], y = train_resp2[,1], alpha = 0, lambda =  ridgereg1$lambda.min)
best_lassoreg1 = glmnet(x = feature_train2[,-1],y = train_resp2[,1], alpha = 1, lambda =  lassoreg1$lambda.min)
# V2
best_ridgereg2 = glmnet(x = feature_train2[,-1],y = train_resp2[,2], alpha = 0, lambda =  ridgereg2$lambda.min)
best_lassoreg2 = glmnet(x = feature_train2[,-1],y = train_resp2[,2], alpha = 1, lambda =  lassoreg2$lambda.min)
# V3
best_ridgereg3 = glmnet(x = feature_train2[,-1],y = train_resp2[,3], alpha = 0, lambda =  ridgereg3$lambda.min)
best_lassoreg3 = glmnet(x = feature_train2[,-1],y = train_resp2[,3], alpha = 1, lambda =  lassoreg3$lambda.min)
# Predictions:

# V1
pred_ridgereg1 = predict(best_ridgereg1, s = ridgereg1$lambda.min, newx = feature_train2[,-1],exact = T)
pred_lassoreg1 = predict(best_lassoreg1, s = lassoreg1$lambda.min, newx = feature_train2[,-1],exact = T)
# V2
pred_ridgereg2 = predict(best_ridgereg2, s = ridgereg2$lambda.min, newx = feature_train2[,-1],exact = T)
pred_lassoreg2 = predict(best_lassoreg2, s = lassoreg2$lambda.min, newx = feature_train2[,-1],exact = T)
# V3
pred_ridgereg3 = predict(best_ridgereg3, s = ridgereg3$lambda.min, newx = feature_train2[,-1],exact = T)
pred_lassoreg3 = predict(best_lassoreg3, s = lassoreg3$lambda.min, newx = feature_train2[,-1],exact = T)
# We would like to calculate the Mean Squared Error , to reseve the average squared difference between the observed and predicted values.

MSE = data.frame(value = rep(0,6), Y = rep(1:3, 2), type= rep(c("ridge","lasso"), each = 3))
for (i in 1:3) {  MSE$value[i] = mean((train_resp2[,i]- get(paste("pred_ridgereg",i,sep = "")))^2)
  MSE$value[i+3] = mean((train_resp2[,i]- get(paste("pred_lassoreg",i,sep = "")))^2) }

library(ggplot2)

ggplot(data = MSE) + geom_col(aes(x=Y,y=value,fill=type ),position = "dodge")+ylab("MSE value using predictions")+xlab("Vectors in voxels data  ")+ggtitle("MSE calculation overall 6 models - which one is the best?") 

We can see that the best model (out of 6) is the ridge regression for each Y vector. Thus, we will use it in the next sections.

c.For the best model of each voxel,we will make predictions for the validation data you set aside. We will Estimate the MSPE for each best model and the standard-error for this estimate and generate an approximate 90% confidence interval for the MSPE using the t distribution.

#Vovel 1 = response 1

#https://moodle2.cs.huji.ac.il/nu21/pluginfile.php/714931/mod_resource/content/0/Lect10B_22_slides.Rmd

MSPE = function(a, b){round((mean( ( a - b)^2 )),3)}

MspeV1Ridge= MSPE(train_resp2[,1],pred_ridgereg1)

library("plotrix")

Mspe.se = std.error(pred_ridgereg1)

#cat("Our MSPE estimation is",MspeV1Ridge, ", and the standard-error for this estimate is",Mspe.se,"."  )
#cat("The estimate MSPE of the best model is ",MspeV1Ridge)
# CI

sd_mspe = sd(pred_ridgereg1)

n_mspe= nrow(pred_ridgereg1)

q_mspe= StudentsT(df = n_mspe)

high_CI_mspe = MspeV1Ridge + quantile(q_mspe, 1 - 0.10 / 2) * sd_mspe / sqrt(n_mspe)

low_CI_mspe = MspeV1Ridge - quantile(q_mspe, 1 - 0.10 / 2) * sd_mspe / sqrt(n_mspe)

#cat("The CI of the MSPE is [High =",high_CI_mspe, ",Low =",low_CI_mspe, "]" )
#https://moodle2.cs.huji.ac.il/nu21/pluginfile.php/714931/mod_resource/content/0/Lect10B_22_slides.Rmd

RMSPE = function(a, b){round(sqrt(mean( ( a - b)^2 )),3)}

RmspeV1Ridge= RMSPE(train_resp2[,1],pred_ridgereg1)

#cat("The estimate RMSPE of the best model of V1 is ",RmspeV1Ridge)
# CI ==https://cran.r-project.org/web/packages/distributions3/vignettes/one-sample-t-confidence-interval.html

sd_rmspe = sd(pred_ridgereg1)

n_rmspe= nrow(pred_ridgereg1)

q_rmspe= StudentsT(df = n_rmspe)

high_CI_rmspe = RmspeV1Ridge + quantile(q_rmspe, 1 - 0.10 / 2) * sd_rmspe / sqrt(n_rmspe)

low_CI_rmspe = RmspeV1Ridge - quantile(q_rmspe, 1 - 0.10 / 2) * sd_rmspe / sqrt(n_rmspe)

#cat("The CI of the RMSPE for V1 is [High =",high_CI_rmspe, ",Low =",low_CI_rmspe, "]" )

#Vovel 2 = response 2

#https://moodle2.cs.huji.ac.il/nu21/pluginfile.php/714931/mod_resource/content/0/Lect10B_22_slides.Rmd

MspeV2Ridge= MSPE(train_resp2[,2],pred_ridgereg2)

library("plotrix")

Mspe.se2 = std.error(pred_ridgereg2)

#cat("Our MSPE estimation of V2 is",MspeV2Ridge, ", and the standard-error for this estimate is",Mspe.se2,"."  )

#cat("The estimate MSPE of the best model is ",MspeV1Ridge)
# CI 

sd_mspe2 = sd(pred_ridgereg2)

n_mspe2= nrow(pred_ridgereg2)

q_mspe2= StudentsT(df = n_mspe2)

high_CI_mspe2 = MspeV2Ridge + quantile(q_mspe2, 1 - 0.10 / 2) * sd_mspe2 / sqrt(n_mspe2)

low_CI_mspe2 = MspeV2Ridge - quantile(q_mspe2, 1 - 0.10 / 2) * sd_mspe2 / sqrt(n_mspe2)

#cat("The CI of the MSPE of V2 is [High =",high_CI_mspe2, ",Low =",low_CI_mspe2, "]" )
#https://moodle2.cs.huji.ac.il/nu21/pluginfile.php/714931/mod_resource/content/0/Lect10B_22_slides.Rmd

RmspeV2Ridge= RMSPE(train_resp2[,2],pred_ridgereg2)

#cat("The estimate RMSPE of the best model of V2 is ",RmspeV2Ridge)
# CI ==https://cran.r-project.org/web/packages/distributions3/vignettes/one-sample-t-confidence-interval.html

sd_rmspe2 = sd(pred_ridgereg2)

n_rmspe2= nrow(pred_ridgereg2)

q_rmspe2= StudentsT(df = n_rmspe2)

high_CI_rmspe2 = RmspeV2Ridge + quantile(q_rmspe2, 1 - 0.10 / 2) * sd_rmspe2 / sqrt(n_rmspe2)

low_CI_rmspe2 = RmspeV2Ridge - quantile(q_rmspe2, 1 - 0.10 / 2) * sd_rmspe2 / sqrt(n_rmspe2)

#cat("The CI of the RMSPE of V2 is [High =",high_CI_rmspe2, ",Low =",low_CI_rmspe2, "]" )

#Vovel 3 = response 3

#https://moodle2.cs.huji.ac.il/nu21/pluginfile.php/714931/mod_resource/content/0/Lect10B_22_slides.Rmd

MspeV3Ridge= MSPE(train_resp2[,3],pred_ridgereg3)

library("plotrix")

Mspe.se3 = std.error(pred_ridgereg3)

#cat("Our MSPE estimation of V3 is",MspeV3Ridge, ", and the standard-error for this estimate is",Mspe.se3,"."  )
# CI

sd_mspe3 = sd(pred_ridgereg3)

n_mspe3= nrow(pred_ridgereg3)

q_mspe3= StudentsT(df = n_mspe3)

high_CI_mspe3 = MspeV3Ridge + quantile(q_mspe3, 1 - 0.10 / 2) * sd_mspe3 / sqrt(n_mspe3)

low_CI_mspe3 = MspeV3Ridge - quantile(q_mspe3, 1 - 0.10 / 2) * sd_mspe3 / sqrt(n_mspe3)

#cat("The CI of the MSPE of V3 is [High =",high_CI_mspe3, ",Low =",low_CI_mspe3, "]" )
#https://moodle2.cs.huji.ac.il/nu21/pluginfile.php/714931/mod_resource/content/0/Lect10B_22_slides.Rmd

RmspeV3Ridge= RMSPE(train_resp2[,3],pred_ridgereg3)

#cat("The estimate RMSPE of the best model of V3 is ",RmspeV3Ridge)
# CI ==https://cran.r-project.org/web/packages/distributions3/vignettes/one-sample-t-confidence-interval.html

sd_rmspe3 = sd(pred_ridgereg3)

n_rmspe3= nrow(pred_ridgereg3)

q_rmspe3= StudentsT(df = n_rmspe3)

high_CI_rmspe3 = RmspeV3Ridge + quantile(q_rmspe3, 1 - 0.10 / 2) * sd_rmspe3 / sqrt(n_rmspe3)

low_CI_rmspe3 = RmspeV3Ridge - quantile(q_rmspe3, 1 - 0.10 / 2) * sd_rmspe3 / sqrt(n_rmspe3)

#cat("The CI of the RMSPE of V3 is [High =",high_CI_rmspe3, ",Low =",low_CI_rmspe3, "]" )
  1. Generate 250 predictions for the test data.
# https://moodle2.cs.huji.ac.il/nu21/pluginfile.php/708447/mod_resource/content/0/Lect9A_slides_22.pdf P 61

ridge_test_pred1= as.matrix(feature_test2[,-1]) %*% best_ridgereg1$beta + best_ridgereg1$a0

ridge_test_pred2= as.matrix(feature_test2[,-1]) %*% best_ridgereg2$beta + best_ridgereg2$a0

ridge_test_pred3= as.matrix(feature_test2[,-1]) %*% best_ridgereg3$beta + best_ridgereg3$a0

library(data.table)

#pred_250 = as.matrix(ridge_test_pred1,ridge_test_pred2,ridge_test_pred3)

pred_250= data.frame(as.matrix(ridge_test_pred1),as.matrix(ridge_test_pred2),as.matrix(ridge_test_pred3))

colnames(pred_250) = c("Test data & V1", "Test data & V2","Test data & V3") 

#pred_250= as.data.frame(pred_250)

pred_250_=head(pred_250,10)

knitr::kable(pred_250_ , format = "html",align = "r", caption = "10 out of 250 - predictions for the test data")
10 out of 250 - predictions for the test data
Test data & V1 Test data & V2 Test data & V3
-0.5726907 0.1933722 0.0777290
1.1198055 0.7938964 -0.1452843
-0.8415920 -1.0942912 -1.0330336
0.1657358 -0.5045688 0.3330934
0.7303642 -0.8724151 -0.1264165
0.5814317 0.4825118 -0.5815839
-0.4398713 -0.5939171 -1.0770294
0.3404550 -0.2584595 -0.2265490
0.0769158 -0.3786619 -0.2063820
0.0859376 0.5600531 -0.1157465

#2.1.2 Presenting results

Present the results for the three responses in a table, detailing for each response

  1. the chosen model (ridge or lasso),

  2. the chosen lambda,

  3. the average cross-validation score (for best model),

  4. the estimated MSPE from validation with a confidence interval, and

  5. the estimated RMSPE with a confidence interval.

response_Voxel= c("Voxel 1","Voxel 2", "Voxel 3")
chosen_model= c( "Ridge", "Ridge","Ridge")

#average_cross_validation_score= c(MSE$value[1],MSE$value[2],MSE$value[3] )

average_cross_validation_score= c(min(ridgereg1$cvm),min(ridgereg2$cvm),min(ridgereg3$cvm))

chosen_lambda= c(ridgereg1$lambda.min,ridgereg2$lambda.min,ridgereg3$lambda.min)

CI_for_MSPE=c(paste(MspeV1Ridge, "+/-",signif((high_CI_mspe-low_CI_mspe), digits=1)),paste(MspeV2Ridge, "+/-",signif((high_CI_mspe2-low_CI_mspe2),digits = 1)),paste(MspeV3Ridge, "+/-",signif((high_CI_mspe3-low_CI_mspe3), digits=1)))

CI_for_RMSPE=c(paste(RmspeV1Ridge, "+/-",signif((high_CI_rmspe-low_CI_rmspe), digits = 1)),paste(RmspeV2Ridge, "+/-",signif((high_CI_rmspe2-low_CI_rmspe2), digits = 1)),paste(RmspeV3Ridge, "+/-",signif((high_CI_rmspe3-low_CI_rmspe3), digits = 1)))


res_2.1.2 =cbind(response_Voxel ,chosen_model ,average_cross_validation_score,  chosen_lambda  ,CI_for_MSPE ,CI_for_RMSPE )
#res_2.1.2 = as.data.frame(res_2.1.2 )

res_2.1.2 = data.frame(res_2.1.2 )


#knitr::kable(t(res_2.1.2) , format = "html",align = "r")

knitr::kable(t(res_2.1.2) , format = "html",align = "r", caption = "Results for the three responses")
Results for the three responses
response_Voxel Voxel 1 Voxel 2 Voxel 3
chosen_model Ridge Ridge Ridge
average_cross_validation_score 0.761640950224556 0.92702126989321 1.14838554508127
chosen_lambda 1.40572380699375 1.40863994636126 1.41253754462275
CI_for_MSPE 0.321 +/- 0.06 0.393 +/- 0.05 0.486 +/- 0.04
CI_for_RMSPE 0.567 +/- 0.06 0.627 +/- 0.05 0.697 +/- 0.04

#Discussion:

There is a significant difference in the prediction accuracy for the three responses. We can see that the lower the average cross validation parameter is, the thicker the CI will be. Secondly, the estimation of MSPE and RMSPE value will be low along low value of average cross validation. Meaning, the voxel 1 model is the best. However, there is small difeerent between the estimation values of V1 and V2. ###

We see a weak relation between the chosen lambda value and the accuracy of prediction. We can see that the values of the lambda are not so distinct, all of which around 1.4, as we sampled from the recommendation of glment package. ###

We do not see significant difference between the cross-validation results and the results on the validation data we set aside due to the lower values of the predictions. However, we do see relation between the mean cross-validation of each voxel and the low (hopefully) value of the MSPE estimations.

#2.1.3 Submit predictions

We uploaded the file “section_2.1.3.results_file.RData”.

rmspes = c(MSE$value[MSE$Y==1 & MSE$type == "ridge"],MSE$value[MSE$Y==2 & MSE$type == "ridge"],MSE$value[MSE$Y==3 & MSE$type == "ridge"]  )

voxel_1_file= predict(ridgereg1, s = ridgereg1$lambda.min,newx = as.matrix(feature_test2[,-1]), exact = T)

voxel_2_file = predict(ridgereg2, s = ridgereg2$lambda.min,newx = as.matrix(feature_test2[,-1]), exact = T)

voxel_3_file = predict(ridgereg3, s = ridgereg3$lambda.min, newx = as.matrix(feature_test2[,-1]), exact = T)

preds = cbind( voxel_1_file, voxel_2_file, voxel_3_file)

colnames(preds) = c("voxel 1", "voxel 2", "voxel 3")

save(preds, rmspes, file = "section_2.1.3.results_file.RData")

#2.2 Interpreting the results

We will choose the voxel (1 response of the 3) for which you model works best. Our goal is to get insight about how our model works and try to use what we learned to further improve the model.

Our data here is train stim xx xx.Rdata files, each with 250 × 16384 images os training data

We will complete 2 analyses. The goal is to visualize and discuss what we have found.

#Linearity of response:

We will take the single most important feature and plot its relation with the response.

feature_corr =cbind(feature_train2[,-1], train_resp2[,1])

feature_corr2 = cor(feature_corr)

feature_corr3 = feature_corr2[-nrow(feature_corr2),ncol(feature_corr2)]

maximum_index_covid_records= which.max(abs(feature_corr3)) 

max_cor = max(abs(feature_corr3), na.rm = T) #cor(most_important_feature,train_resp2[,1] )

most_important_feature = feature_train2[,maximum_index_covid_records+1]

par(mfrow=c(1,2))

plot(most_important_feature,train_resp2[,1], xlim = c(-0.5,2.3), xlab = "Most important chosen feature",
     main = "Most important feature(1739) VS Voxel 1", ylab = "Voxel 1")
abline(lm(train_resp2[,1] ~ most_important_feature), col = "red", lwd = 2)

linear_reg = lm(train_resp2[,1] ~ most_important_feature)

my_fitted_vals = linear_reg$fitted.values

plot(my_fitted_vals,train_resp2[,1], 
     main = "The prediction to Voxel 1 VS given Voxel 1", xlab = "Prediction for voxel 1", ylab = "Voxel 1") +geom_point(alpha = .9) +
abline(lm(train_resp2[,1] ~ my_fitted_vals), col = "red", lwd= 2)

## integer(0)

We From the right plot, we can see the prediction seems linear at first sight, but there is a big spread, meaning the regression line is far from many data points.

In The left plot shows how influential column are compared with Voxel 1.

As The two do not follow a linear connection

Given that the higher values of the Voxel correspond to the lower values of the feature, and the correlation between them is 0.3616, we didn’t expect much of a linear relationship between them.

In We can see the relationship between voxel 1 and the prediction of the voxel based only on the most important feature. But we do not see much linearity.

In both graphs there is no much linearity, and in both of them we can catch a vertical cloud of point around -0.05 , 0 which makes the pattern less linear, which make sense because we used penalised regression.

#The example domain: Look at images set aside for the test set.

Which images get the five_first predictions? Which images get the five_second predictions?

Can you learn anything by comparing the two sets? Can you find a nice way to visualise what you found?

# loading and assigning the images to a file  
load("train_stim_1_250.Rdata") ; assign("file0", train_stim_1_250)
load("train_stim_251_500.Rdata") ;assign("file1", train_stim_251_500)
load("train_stim_501_750.Rdata"); assign("file2", train_stim_501_750)
load("train_stim_751_1000.Rdata") ;assign("file3", train_stim_751_1000)
load("train_stim_1001_1250.Rdata");assign("file4", train_stim_1001_1250)
load("train_stim_1251_1500.Rdata") ;assign("file5", train_stim_1251_1500)

#create DF of prediced brain reactions to different pics in the FMRI
brain_reactionsDF = as.data.frame(cbind(seq(1:length(pred_ridgereg1)),pred_ridgereg1))
colnames(brain_reactionsDF) = c("n","reaction")

five_first = arrange(.data = brain_reactionsDF, desc(reaction))$n[1:5]
five_first = data.frame(pic = five_first, file = five_first%/%251, row = five_first%%251)

five_second = arrange(.data = brain_reactionsDF, reaction)$n[1:5]
five_second = data.frame(pic = five_second, file = five_second%/%251, row = five_second%%251)

par(mfrow = c(2,5))

plot_ed= for (i in 1:10) {
  if(i < 6){
    df = get(paste("file", five_first$file[i], sep = ""))
    m = t(matrix(df[five_first$row[i],], nrow = 128))
    image(m[,nrow(m):1],col = grey.colors(100), main = paste("Picture from FMRI", i), cex.main = 2.6)
  } 
  if(i >=6){
    df = get(paste("file", five_second$file[i-5], sep = ""))
    m = t(matrix(df[five_second$row[i-5],], nrow = 128))
    image(m[,nrow(m):1],col = grey.colors(100), main = paste("Picture from FMRI",i), cex.main = 2.6)
  } }

plot_ed
## NULL

Generaly, fMRI showed a maximum mapping accuracy at 5 mm for both motor and language mapping. MEG showed a maximum mapping accuracy of 40 mm for motor and 15 mm for language mapping.

There are five pictures on the upper row to which we predict V1 in the brain will react most, and five pictures on the lower row to which the model predicts lowest reactions.

In Based on our perspectives, V1 is the most visual area of the brain, and it responds to basic shapes oriented in different directions.

Hence, it’s reasonable to see that the picture with the highest reaction are characterized by straight lines and or round circles, which are basic geometrical shapes.

The pictures in the lower row are characterized by more complex stimuli, that cannot be approximated to a group of basic shapes.

We also know that V1 contains the part of the visual system that projects to or originates from large neurons in the two most ventral layers (the magnocellular layers) of the lateral geniculate nucleu, the magnocellular system, which is sensitive to changes in shades and brightness.

This, the 2 pictures in the left side of the bottom row got our attention, since we can see changes from white color to dark shades of gray.

However, we can assume that it’s part of the error that regression models have, we don’t expect to get 100% accuracy.

## integer(0)

As shown in the plot, we did not gain insights on how to improve the model from our analysis. We can see the the Skewness of the distribution. When we plot theoretical quantiles on the x-axis and the sample quantiles whose distribution we want to know on the y-axis then we see a very peculiar shape of a Normally distributed Q-Q plot for skewness.

The points seem to fall about a straight line. Notice the x-axis plots the theoretical quantiles. Those are the quantiles from the standard Normal distribution with mean 0 and standard deviation 1.

Here, the normal Q-Q plots that look like this usually mean our data are with small skewed.IT exhibit this behavior what mean our data dont have many more extreme values than would be expected if they truly came from a Normal distribution.

Since we cannot add more data to the model, we suggest using the current model for Voxel 1 to 3.


#3 Covid-19 Mortality Data

#Our goal is to estimate the rate of change in Covid-19 (Coronavirus) case data in Israel.

We will prepare the following plots:

  1. A figure showing the number of new detected Covid-19 cases per day, with both the observed values and a regression curve.

The regression should be estimated outside the figure.

As we can see in the above plot, there is an intersing pattern of covid cases through two years of pendemic. We can also see the high values of recent monthes in IL. In addiotion we cant wonder if we (or other researches) would imply polynomial regression on the data, would they recive negetaive values as learned in class? Thier predictions may be accurate or not? Its a nice plot - but we lived it.

  1. A figure showing the daily change in rate of new detections per day
change <- covid_plot$New_cases-lag(covid_plot$New_cases)
change_reg <- covid_plot$`kernel reg 5`-lag(covid_plot$`kernel reg 5`)
change_reg2 <- covid_plot$`kernel reg 10`-lag(covid_plot$`kernel reg 10`)
change_reg3 <- covid_plot$`kernel reg 90`-lag(covid_plot$`kernel reg 90`)

covid_plot_2 <- cbind(covid_plot, change_reg, change_reg2, change_reg3, change)

covid_plot_2[1, c(8:11)] <- 0


covid_2 <- data.frame(date = as.Date(rep(covid_plot_2$Dates, each = 3)),
                       regs_value = c(rbind(covid_plot_2$`kernel reg 5`, covid_plot_2$`kernel reg 10`, covid_plot_2$`kernel reg 90`)),
                       reg = c(rbind(covid_plot_2$change_reg, covid_plot_2$change_reg2, covid_plot_2$change_reg3)),
                       new_cases = rep(covid_plot_2$New_cases, each = 3),
                       change_daily = rep(covid_plot_2$change, each = 3),
                       type = rep(seq(1:3), 831),
                       color = rep(c("violetred", "palegreen3", "dodgerblue4"), 831))
covid_2$new_cases[covid$types != 1] <- NA 
covid_2$change_daily[covid$types != 1] <- NA 



ggplot(data = covid_2, aes(x=date, y=reg, group= type, color=color))+geom_line(size = 0.6)+geom_point(aes(y=change_daily), size = 0.9, alpha = 0.3, color = "darkred", fill = "lightpink", shape = 21)+scale_color_manual(values = c("violetred", "palegreen3", "dodgerblue4"), labels = c("5", "10", "90")) + ggtitle("Change in rate of new detections per day") + ylab("change rate")

We wanted to check a few bandwidths in order to show how different the smooths might be with different values. The bigger the bandwidth, the smoother the line. It is harder to identify the highs and lows when the bandwidths is smaller.

\(Main Sources:\)

-https://www.cedricscherer.com/2019/08/05/a-ggplot2-tutorial-for-beautiful-plotting-in-r/