require(ISLR)
## Loading required package: ISLR
data(Weekly)
summary(Weekly)
## Year Lag1 Lag2 Lag3
## Min. :1990 Min. :-18.1950 Min. :-18.1950 Min. :-18.1950
## 1st Qu.:1995 1st Qu.: -1.1540 1st Qu.: -1.1540 1st Qu.: -1.1580
## Median :2000 Median : 0.2410 Median : 0.2410 Median : 0.2410
## Mean :2000 Mean : 0.1506 Mean : 0.1511 Mean : 0.1472
## 3rd Qu.:2005 3rd Qu.: 1.4050 3rd Qu.: 1.4090 3rd Qu.: 1.4090
## Max. :2010 Max. : 12.0260 Max. : 12.0260 Max. : 12.0260
## Lag4 Lag5 Volume Today
## Min. :-18.1950 Min. :-18.1950 Min. :0.08747 Min. :-18.1950
## 1st Qu.: -1.1580 1st Qu.: -1.1660 1st Qu.:0.33202 1st Qu.: -1.1540
## Median : 0.2380 Median : 0.2340 Median :1.00268 Median : 0.2410
## Mean : 0.1458 Mean : 0.1399 Mean :1.57462 Mean : 0.1499
## 3rd Qu.: 1.4090 3rd Qu.: 1.4050 3rd Qu.:2.05373 3rd Qu.: 1.4050
## Max. : 12.0260 Max. : 12.0260 Max. :9.32821 Max. : 12.0260
## Direction
## Down:484
## Up :605
##
##
##
##
pairs(Weekly)
Answer 10A:
There is no visible strong relationships between the Lag variables. There appears to be positive correlation between Year and Volume variables.
fit.glm <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, data=Weekly, family=binomial)
summary(fit.glm)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = binomial, data = Weekly)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6949 -1.2565 0.9913 1.0849 1.4579
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.26686 0.08593 3.106 0.0019 **
## Lag1 -0.04127 0.02641 -1.563 0.1181
## Lag2 0.05844 0.02686 2.175 0.0296 *
## Lag3 -0.01606 0.02666 -0.602 0.5469
## Lag4 -0.02779 0.02646 -1.050 0.2937
## Lag5 -0.01447 0.02638 -0.549 0.5833
## Volume -0.02274 0.03690 -0.616 0.5377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1496.2 on 1088 degrees of freedom
## Residual deviance: 1486.4 on 1082 degrees of freedom
## AIC: 1500.4
##
## Number of Fisher Scoring iterations: 4
Answer 10B:Lag2 variable is the only statistically significant.
prob <- predict(fit.glm, Weekly, type="response")
preds <- rep("Down", 1089)
preds[prob > 0.5] = "Up"
table(preds, Weekly$Direction)
##
## preds Down Up
## Down 54 48
## Up 430 557
Answer 10C: The confusion matrix is telling us that when prediction is “Down”, model is correct 54/(54+48)=52.9% and when prediction is “Up”, model is correct 557/(430+557)=56.4%. The model is more accurate when the prediction direction is “Up”.
train <- (Weekly$Year < 2009)
W_train <- Weekly[train,]
W_test <- Weekly[!train,]
Direction_train <- W_train$Direction
Direction_test <- W_test$Direction
logistic_wk <- glm(Direction ~ Lag2, data = W_train,family = binomial)
summary(logistic_wk)
##
## Call:
## glm(formula = Direction ~ Lag2, family = binomial, data = W_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.536 -1.264 1.021 1.091 1.368
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.20326 0.06428 3.162 0.00157 **
## Lag2 0.05810 0.02870 2.024 0.04298 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1354.7 on 984 degrees of freedom
## Residual deviance: 1350.5 on 983 degrees of freedom
## AIC: 1354.5
##
## Number of Fisher Scoring iterations: 4
logistic_prob <- predict(logistic_wk, W_test, type = "response")
logistic_pred = rep("Down", length(Direction_test))
logistic_pred[logistic_prob > 0.5] <- "Up"
table(logistic_pred, Direction_test)
## Direction_test
## logistic_pred Down Up
## Down 9 5
## Up 34 56
mean(logistic_pred == Direction_test)
## [1] 0.625
Answer 10D: We can see that most of the samples go Up. This single variable model is not good.
require(MASS)
## Loading required package: MASS
lda_wk <- lda(Direction ~ Lag2, data = Weekly, subset = train)
lda_wk
## Call:
## lda(Direction ~ Lag2, data = Weekly, subset = train)
##
## Prior probabilities of groups:
## Down Up
## 0.4477157 0.5522843
##
## Group means:
## Lag2
## Down -0.03568254
## Up 0.26036581
##
## Coefficients of linear discriminants:
## LD1
## Lag2 0.4414162
plot(lda_wk)
lda_prob <- predict(lda_wk, W_test)
table(lda_prob$class, Direction_test)
## Direction_test
## Down Up
## Down 9 5
## Up 34 56
mean(lda_prob$class == Direction_test)
## [1] 0.625
require(MASS)
qda_wk <- qda(Direction ~ Lag2, data = Weekly, subset = train)
qda_wk
## Call:
## qda(Direction ~ Lag2, data = Weekly, subset = train)
##
## Prior probabilities of groups:
## Down Up
## 0.4477157 0.5522843
##
## Group means:
## Lag2
## Down -0.03568254
## Up 0.26036581
qda_pred <- predict(qda_wk, W_test)
table(qda_pred$class, Direction_test)
## Direction_test
## Down Up
## Down 0 0
## Up 43 61
mean(qda_pred$class == Direction_test)
## [1] 0.5865385
require(class)
## Loading required package: class
train_X <- as.matrix(Weekly$Lag2[train])
test_X <- as.matrix(Weekly$Lag2[!train])
set.seed(1)
knn_pred <- knn(train_X, test_X, Direction_train, k = 1)
table(knn_pred, Direction_test)
## Direction_test
## knn_pred Down Up
## Down 21 30
## Up 22 31
mean(knn_pred == Direction_test)
## [1] 0.5
Answer 10h: The logistic regression model and the Linear Discriminant Analysis (LDA) model have the best performance in regards to accuracy.
require(MASS)
require(class)
# Logistic Model
logistic_wk3 <- glm(Direction ~ Lag2:Lag1, data = W_train, family = binomial)
summary(logistic_wk3)
##
## Call:
## glm(formula = Direction ~ Lag2:Lag1, family = binomial, data = W_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.368 -1.269 1.077 1.089 1.353
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.21333 0.06421 3.322 0.000893 ***
## Lag2:Lag1 0.00717 0.00697 1.029 0.303649
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1354.7 on 984 degrees of freedom
## Residual deviance: 1353.6 on 983 degrees of freedom
## AIC: 1357.6
##
## Number of Fisher Scoring iterations: 4
logistic_probs3 <- predict(logistic_wk3, W_test, type = "response")
logistic_pred3 = rep("Down", length(Direction_test))
logistic_pred3[logistic_probs3 > 0.5] <- "Up"
table(logistic_pred3, Direction_test)
## Direction_test
## logistic_pred3 Down Up
## Down 1 1
## Up 42 60
mean(logistic_pred3 == Direction_test)
## [1] 0.5865385
# LDA Model
lda_wk2 <- lda(Direction ~ Lag2:Lag1,data = Weekly, subset = train)
lda_wk2
## Call:
## lda(Direction ~ Lag2:Lag1, data = Weekly, subset = train)
##
## Prior probabilities of groups:
## Down Up
## 0.4477157 0.5522843
##
## Group means:
## Lag2:Lag1
## Down -0.8014495
## Up -0.1393632
##
## Coefficients of linear discriminants:
## LD1
## Lag2:Lag1 0.1013404
plot(lda_wk2)
lda_probs2 <- predict(lda_wk2, W_test)
table(lda_probs2$class, Direction_test)
## Direction_test
## Down Up
## Down 0 1
## Up 43 60
mean(lda_probs2$class == Direction_test)
## [1] 0.5769231
#QDA Model
qda_wk2 <- qda(Direction ~ Lag2 + sqrt(abs(Lag2)),data = Weekly,subset = train)
qda_wk2
## Call:
## qda(Direction ~ Lag2 + sqrt(abs(Lag2)), data = Weekly, subset = train)
##
## Prior probabilities of groups:
## Down Up
## 0.4477157 0.5522843
##
## Group means:
## Lag2 sqrt(abs(Lag2))
## Down -0.03568254 1.140078
## Up 0.26036581 1.169635
qda_pred2 <- predict(qda_wk2, W_test)
table(qda_pred2$class, Direction_test)
## Direction_test
## Down Up
## Down 12 13
## Up 31 48
mean(qda_pred2$class == Direction_test)
## [1] 0.5769231
#KNN Model
train_X <- as.matrix(Weekly$Lag2[train])
test_X <- as.matrix(Weekly$Lag2[!train])
set.seed(1)
knn_pred3 <- knn(train_X, test_X, Direction_train, k = 3)
table(knn_pred3, Direction_test)
## Direction_test
## knn_pred3 Down Up
## Down 16 20
## Up 27 41
mean(knn_pred3 == Direction_test)
## [1] 0.5480769
train_X <- as.matrix(Weekly$Lag2[train])
test_X <- as.matrix(Weekly$Lag2[!train])
set.seed(1)
knn_pred100 <- knn(train_X, test_X, Direction_train, k = 100)
table(knn_pred100, Direction_test)
## Direction_test
## knn_pred100 Down Up
## Down 10 11
## Up 33 50
mean(knn_pred100 == Direction_test)
## [1] 0.5769231
Answer 10i: We can see that higher k values for KNN generate the best results when using the Lag2 as predictor.
#require(dplyr)
#glimpse(Auto)
Auto$mpg01 <- factor(as.numeric(Auto$mpg > median(Auto$mpg)))
table(Auto$mpg01)
##
## 0 1
## 196 196
library("dplyr")
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("ggplot2")
#library("gridExtra")
library("egg")
## Loading required package: gridExtra
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
Auto$name <- NULL
Auto$mpg <- NULL
Auto$origin <- factor(Auto$origin, labels = c("American", "European", "Japanese"))
glimpse(Auto)
## Rows: 392
## Columns: 8
## $ cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 6, 6, 6, ...
## $ displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 390, 383,...
## $ horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, 190, 170,...
## $ weight <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312, 4425, ...
## $ acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 10.0, 8....
## $ year <dbl> 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70...
## $ origin <fct> American, American, American, American, American, Amer...
## $ mpg01 <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...
g1 <- ggplot(Auto, aes(x = mpg01, y = cylinders, col = mpg01)) +
geom_jitter() +
theme(legend.position = "none") +
ggtitle("Cylinders vs mpg01 - Jitter Plot")
g2 <- ggplot(Auto, aes(x = mpg01, y = displacement, fill = mpg01)) +
geom_boxplot() +
theme(legend.position = "none") +
ggtitle("Displacement vs mpg01 - Boxplot")
g3 <- ggplot(Auto, aes(x = mpg01, y = horsepower, fill = mpg01)) +
geom_boxplot() +
theme(legend.position = "none") +
ggtitle("Horsepower vs mpg01 - Boxplot")
g4 <- ggplot(Auto, aes(x = mpg01, y = weight, fill = mpg01)) +
geom_boxplot() +
theme(legend.position = "none") +
ggtitle("Weight vs mpg01 - Boxplot")
g5 <- ggplot(Auto, aes(x = mpg01, y = acceleration, fill = mpg01)) +
geom_boxplot() +
theme(legend.position = "none") +
ggtitle("Acceleration vs mpg01 - Boxplot")
g6 <- ggplot(Auto, aes(x = mpg01, y = year, fill = mpg01)) +
geom_boxplot() +
theme(legend.position = "none") +
ggtitle("Year vs mpg01 - Boxplot")
g7 <- ggplot(Auto, aes(x = origin, fill = mpg01)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent_format()) +
theme(axis.title.y = element_blank()) +
ggtitle("Origin vs mpg01 - Bar Plot")
grid.arrange(g1, g2, g3, g4, g5, g6, g7, ncol = 2, nrow = 4)
Answer 11B: As we can see in the plots, the following are the best predictors for mpg01: cylinders, displacement and weight.
library("caret")
## Loading required package: lattice
set.seed(444)
index <- createDataPartition(y = Auto$mpg01, p = 0.5, list = F)
train <- Auto[index, ]
test <- Auto[-index, ]
nrow(train) / nrow(Auto)
## [1] 0.5
Answer 11C: The train and test sets have been split in half from the Auto dataset.
library("caret")
ctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
set.seed(1)
lda_mpg <- train(mpg01 ~ cylinders + displacement + weight + horsepower, data = train, method = "lda", trControl = ctrl)
lda_mpg$results$Accuracy #train cv error
## [1] 0.9087427
predicted_lda <- predict(lda_mpg, newdata = test, type = "raw")
mean(predicted_lda != test$mpg01) #test error
## [1] 0.122449
Answer 11D: The test error of the LDA model = 0.122449, around 12%.
set.seed(2)
qda_mpg <- train(mpg01 ~ cylinders + displacement + weight + horsepower, data = train, method = "qda", trControl = ctrl)
#train cv error
qda_mpg$results$Accuracy
## [1] 0.9100877
#test error
predicted_qda <- predict(qda_mpg, newdata = test, type = "raw")
mean(predicted_qda != test$mpg01)
## [1] 0.1173469
Answer 11E: The test error of the QDA model = 0.1173469, around 11.7%. This test error result is a bit better than the LDA model.
set.seed(3)
logr_mpg <- train(mpg01 ~ cylinders + displacement + weight + horsepower, data = train, method = "glm", family = "binomial", trControl = ctrl)
#train cv error:
logr_mpg$results$Accuracy
## [1] 0.8915595
#test error:
predict_log <- predict(logr_mpg, newdata = test, type = "raw")
mean(predict_log != test$mpg01)
## [1] 0.1173469
Answer 11F: The test error of the Logistic Regression model = .1173469, around 11.7%. This test error result is the same as the QDA model.
set.seed(4)
knn_mpg <- train(mpg01 ~ cylinders + displacement + weight + horsepower, data = train, method = "knn", preProcess = c("center", "scale"), trControl = ctrl, tuneGrid = expand.grid(k = seq(1, 100, 3)))
knn_mpg
## k-Nearest Neighbors
##
## 196 samples
## 4 predictor
## 2 classes: '0', '1'
##
## Pre-processing: centered (4), scaled (4)
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 176, 177, 176, 176, 176, 176, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 1 0.8779532 0.7554422
## 4 0.8981287 0.7958529
## 7 0.9033138 0.8062631
## 10 0.9033138 0.8062631
## 13 0.9033138 0.8062631
## 16 0.9033138 0.8062631
## 19 0.9033138 0.8062631
## 22 0.9033138 0.8062631
## 25 0.9033138 0.8062631
## 28 0.9033138 0.8062631
## 31 0.9033138 0.8062631
## 34 0.9033138 0.8062631
## 37 0.9033138 0.8062631
## 40 0.9033138 0.8062631
## 43 0.9033138 0.8062631
## 46 0.9033138 0.8062631
## 49 0.9033138 0.8062631
## 52 0.9033138 0.8062631
## 55 0.9033138 0.8062631
## 58 0.9033138 0.8062631
## 61 0.9033138 0.8062631
## 64 0.9033138 0.8062631
## 67 0.9049805 0.8095964
## 70 0.9049805 0.8095964
## 73 0.9100682 0.8197621
## 76 0.9066472 0.8129297
## 79 0.9084016 0.8164288
## 82 0.9049805 0.8095964
## 85 0.9034016 0.8065072
## 88 0.8966472 0.7929693
## 91 0.8931287 0.7860096
## 94 0.8845224 0.7689215
## 97 0.8827680 0.7653833
## 100 0.8777680 0.7553833
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 73.
#train cv error
max(knn_mpg$results$Accuracy)
## [1] 0.9100682
#test error
predict_knn <- predict(knn_mpg, newdata = test, type = "raw")
mean(predict_knn != test$mpg01)
## [1] 0.1122449
Answer 11G: The test error obtained for the KNN model = 0.1122449, around 11.2%. The K=7 value seems to be performing best with an accuracy of 0.9033138
library("egg")
library("caret")
Boston$crim <- factor(ifelse(Boston$crim > median(Boston$crim), 1, 0))
glimpse(Boston)
## Rows: 506
## Columns: 14
## $ crim <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,...
## $ zn <dbl> 18.0, 0.0, 0.0, 0.0, 0.0, 0.0, 12.5, 12.5, 12.5, 12.5, 12.5...
## $ indus <dbl> 2.31, 7.07, 7.07, 2.18, 2.18, 2.18, 7.87, 7.87, 7.87, 7.87,...
## $ chas <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ nox <dbl> 0.538, 0.469, 0.469, 0.458, 0.458, 0.458, 0.524, 0.524, 0.5...
## $ rm <dbl> 6.575, 6.421, 7.185, 6.998, 7.147, 6.430, 6.012, 6.172, 5.6...
## $ age <dbl> 65.2, 78.9, 61.1, 45.8, 54.2, 58.7, 66.6, 96.1, 100.0, 85.9...
## $ dis <dbl> 4.0900, 4.9671, 4.9671, 6.0622, 6.0622, 6.0622, 5.5605, 5.9...
## $ rad <int> 1, 2, 2, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4,...
## $ tax <dbl> 296, 242, 242, 222, 222, 222, 311, 311, 311, 311, 311, 311,...
## $ ptratio <dbl> 15.3, 17.8, 17.8, 18.7, 18.7, 18.7, 15.2, 15.2, 15.2, 15.2,...
## $ black <dbl> 396.90, 396.90, 392.83, 394.63, 396.90, 394.12, 395.60, 396...
## $ lstat <dbl> 4.98, 9.14, 4.03, 2.94, 5.33, 5.21, 12.43, 19.15, 29.93, 17...
## $ medv <dbl> 24.0, 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9,...
ctrl <- trainControl(method = "repeatedcv",number = 10, repeats = 5)
#Logistic Regression Model
set.seed(1)
logr_crim <- train(crim ~ ., data = Boston, method = "glm", family = "binomial", trControl = ctrl)
logr_crim
## Generalized Linear Model
##
## 506 samples
## 13 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 5 times)
## Summary of sample sizes: 456, 455, 456, 454, 456, 456, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9028549 0.8057423
#LDA Model
set.seed(2)
lda_crim <- train(crim ~ ., data = Boston, method = "lda", trControl = ctrl)
lda_crim
## Linear Discriminant Analysis
##
## 506 samples
## 13 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 5 times)
## Summary of sample sizes: 456, 455, 456, 456, 455, 454, ...
## Resampling results:
##
## Accuracy Kappa
## 0.8498998 0.6996644
#QDA Model
set.seed(3)
qda_crim <- train(crim ~ ., data = Boston, method = "qda", trControl = ctrl)
qda_crim
## Quadratic Discriminant Analysis
##
## 506 samples
## 13 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 5 times)
## Summary of sample sizes: 456, 456, 455, 455, 455, 456, ...
## Resampling results:
##
## Accuracy Kappa
## 0.8894561 0.7788975
#KNN Model
set.seed(4)
knn_crim <- train(crim ~ ., data = Boston, method = "knn", preProcess = c("center", "scale"), trControl = ctrl, tuneGrid = expand.grid(k = seq(1, 30, 1)))
knn_crim
## k-Nearest Neighbors
##
## 506 samples
## 13 predictor
## 2 classes: '0', '1'
##
## Pre-processing: centered (13), scaled (13)
## Resampling: Cross-Validated (10 fold, repeated 5 times)
## Summary of sample sizes: 455, 456, 456, 455, 455, 456, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 1 0.9145970 0.8292052
## 2 0.9098202 0.8196255
## 3 0.9181104 0.8362439
## 4 0.9158202 0.8316519
## 5 0.9153345 0.8306748
## 6 0.9019822 0.8039259
## 7 0.9047128 0.8094000
## 8 0.8921228 0.7842111
## 9 0.8779164 0.7557756
## 10 0.8731554 0.7462775
## 11 0.8664404 0.7327928
## 12 0.8616404 0.7231720
## 13 0.8664323 0.7327720
## 14 0.8687475 0.7374457
## 15 0.8703940 0.7407136
## 16 0.8656335 0.7312272
## 17 0.8684169 0.7367859
## 18 0.8668323 0.7336384
## 19 0.8648404 0.7296460
## 20 0.8601336 0.7201990
## 21 0.8558115 0.7115722
## 22 0.8494974 0.6989068
## 23 0.8494588 0.6988438
## 24 0.8435285 0.6869392
## 25 0.8427753 0.6854568
## 26 0.8427674 0.6854505
## 27 0.8443207 0.6885400
## 28 0.8427442 0.6854461
## 29 0.8439285 0.6877884
## 30 0.8391445 0.6781827
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 3.
max(knn_crim$results$Accuracy) #k = 3 at 0.9181104
## [1] 0.9181104
Answer 13: The KNN model performed the best compared to the rest of the models. The KNN results show the accuracy for k=3 at 0.9181104