Load necessary packages and data.
library(dplyr)
##
## 載入套件:'dplyr'
## 下列物件被遮斷自 'package:stats':
##
## filter, lag
## 下列物件被遮斷自 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(caret)
## 載入需要的套件:lattice
library(skimr)
library(psych)
##
## 載入套件:'psych'
## 下列物件被遮斷自 'package:ggplot2':
##
## %+%, alpha
library(e1071)
pokemon <- read.csv(
"https://www.dropbox.com/s/znbta9u9tub2ox9/pokemon.csv?dl=1"
)
Subsetting and renaming data.
pokemon_sub <- pokemon[ ,
c("is_legendary",
"hp",
"weight_kg",
"height_m",
"sp_attack",
"sp_defense",
"type1",
"capture_rate")
]
pokemon_sub <- pokemon_sub %>%
rename(type = type1)
table(pokemon_sub$type) ## 18 types.
##
## bug dark dragon electric fairy fighting fire flying
## 72 29 27 39 18 28 52 3
## ghost grass ground ice normal poison psychic rock
## 27 78 32 23 105 32 53 45
## steel water
## 24 114
Converting NAs and variable classes.
pokemon_sub$weight_kg[is.na(pokemon_sub$weight_kg)] <- 0
pokemon_sub$height_m[is.na(pokemon_sub$height_m)] <- 0
class(pokemon_sub$capture_rate) ## Character class, means something's off.
## [1] "character"
pokemon_sub$is_legendary <- as.factor(pokemon_sub$is_legendary)
If we convert “capture_rate” to factor class, there would be a new NA value. We first deal with this problem.
poke <- pokemon_sub ## Temporarily assign a new df to solve the problem.
poke$capture_rate <- as.numeric(poke$capture_rate) ## 強制變更過程中產生了 NA
## Warning: 強制變更過程中產生了 NA
sum(is.na(poke$capture_rate)) ## 1 NA in this column.
## [1] 1
which(is.na(poke$capture_rate), arr.ind = T) ## The 774th row is the NA.
## [1] 774
rm(poke) ## Remove the temporarily created data frame.
## Now we go back to the original data and see what's in the 774th row.
pokemon[774, c("japanese_name", "capture_rate")] ## Metenoメテノ, 30 (Meteorite)255 (Core)
## japanese_name capture_rate
## 774 Metenoメテノ 30 (Meteorite)255 (Core)
pokemon[774, c("capture_rate")] <- 30 ## Keep its capture rate to only 30.
pokemon[774, c("capture_rate")] ## Now it has only one capture rate: 30.
## [1] "30"
## Then we can convert this column to factor class.
pokemon_sub$capture_rate <- as.numeric(pokemon_sub$capture_rate)
## Warning: 強制變更過程中產生了 NA
One-hot-encoding.
## Assigning is.legendary to y.
y <- pokemon_sub$is_legendary
## One-hot-encoding. First convert is.legendary to numeric class.
pokemon_sub$is_legendary <- as.numeric(pokemon_sub$is_legendary)
pokemon_ohe_model <- dummyVars(type ~ ., data = pokemon_sub) ## Creating dummies.
data_mat <- predict(pokemon_ohe_model, newdata = pokemon_sub) ## OHE.
pokemon_ohe <- data.frame(data_mat)
## Add y back in.
pokemon_ohe$is_legendary <- y
Only 2 more features were created: is_legendary.0 and is_legendary.1.
dim(pokemon_ohe)
## [1] 801 7
colnames(pokemon_ohe)
## [1] "is_legendary" "hp" "weight_kg" "height_m" "sp_attack"
## [6] "sp_defense" "capture_rate"
75-25 train-test split.
set.seed(123)
sample <- createDataPartition(pokemon_ohe$is_legendary, p = .75, list = F)
train_pokemon_ohe <- pokemon_ohe[sample, ]
test_pokemon_ohe <- pokemon_ohe[-sample, ]
Train an SVM using train data, then predict the outcome on test data and create a confusion matrix.
## SVM on original data.
svm.fit.ohe <- svm(
is_legendary ~ .,
data = train_pokemon_ohe
)
predict_train_svm <- predict(svm.fit.ohe, test_pokemon_ohe)
confmat_train_svm <- table(
Predicted = predict_train_svm,
Actual = test_pokemon_ohe$is_legendary
)
## Take a look at the confusion matrix.
confmat_train_svm
## Actual
## Predicted 0 1
## 0 181 10
## 1 1 7
## Calculate prediction accuracy. We get 94.47236% accuracy.
(confmat_train_svm[1, 1] + confmat_train_svm[2, 2]) / sum(confmat_train_svm) *100
## [1] 94.47236
Reload libraries and data for next part.
pokemon <- read.csv(
"https://www.dropbox.com/s/znbta9u9tub2ox9/pokemon.csv?dl=1"
)
Subset data, convert capture_rate to numeric class and is_legendary to factor class.
pokemon_sub_2 <- pokemon[ ,
c("is_legendary",
"hp",
"weight_kg",
"height_m",
"sp_attack",
"sp_defense",
"type1",
"capture_rate")
]
pokemon_sub_2 <-pokemon_sub_2 %>%
rename(type = type1)
pokemon_sub_2$capture_rate <- as.numeric(pokemon_sub_2$capture_rate)
## Warning: 強制變更過程中產生了 NA
pokemon_sub_2$is_legendary <- as.factor(pokemon_sub_2$is_legendary)
Check for NAs using colSums(is.na()).
# 1 NA in capture_rate, 20 in both height_m and weight_kg.
colSums(is.na(pokemon_sub_2))
## is_legendary hp weight_kg height_m sp_attack sp_defense
## 0 0 20 20 0 0
## type capture_rate
## 0 1
Using KNN to impute NAs.
library(RANN)
knn_na <- preProcess(as.data.frame(pokemon_sub_2), method = "knnImpute")
pokemon_knn <- predict(knn_na, newdata = pokemon_sub_2)
## Check for NAs again.
colSums(is.na(pokemon_knn)) ## Not anymore.
## is_legendary hp weight_kg height_m sp_attack sp_defense
## 0 0 0 0 0 0
## type capture_rate
## 0 0
Repeat OHE, then fit another SVM model.
y <- pokemon_knn$is_legendary
pokemon_knn$is_legendary <- as.numeric(pokemon_knn$is_legendary)
pokemon_ohe_model_2 <- dummyVars(type ~ ., data = pokemon_knn) ## Creating dummies.
data_mat <- predict(pokemon_ohe_model, newdata = pokemon_knn) ## OHE.
pokemon_knn <- data.frame(data_mat)
pokemon_knn$is_legendary <- y
sample <- createDataPartition(pokemon_knn$is_legendary, p = 0.75, list = F)
train_pokemon_knn <- pokemon_knn[sample, ]
test_pokemon_knn <- pokemon_knn[-sample, ]
svm.fit.knn <- svm(
is_legendary ~ .,
data = train_pokemon_knn
)
predict_train_knn <- predict(svm.fit.knn, newdata = test_pokemon_knn)
confmat_train_knn <- table(
Predicted = predict_train_knn,
Actual = test_pokemon_knn$is_legendary
)
## Take a look at the KNN confusion matrix.
confmat_train_knn
## Actual
## Predicted 0 1
## 0 181 7
## 1 1 10
## Calculate prediction accuracy. We got 93.96985% accuracy.
(confmat_train_knn[1, 1] + confmat_train_knn[2, 2]) / sum(confmat_train_knn) *100
## [1] 95.9799
The KNN imputed method DOES NOT have better prediction accuracy. OHE method has 95.9799% accuacy, while KNN method has 93.96985%.
Load necessary packages and data.
library(gbm)
## Loaded gbm 2.2.2
## This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## 載入套件:'pROC'
## 下列物件被遮斷自 'package:stats':
##
## cov, smooth, var
library(plotROC)
##
## 載入套件:'plotROC'
## 下列物件被遮斷自 'package:pROC':
##
## ggroc
library(ISLR)
default <- as.data.frame(Default)
Train-test split.
set.seed(123)
sample <- createDataPartition(default$default, p = 0.75, list = F)
train_default <- default[sample, ]
test_default <- default[-sample, ]
Models fitting: logit, probit, SVM, GBM.
## Logit fitting.
logit.fit.default <- glm(
default ~ .,
family = binomial(link = "logit"),
data = train_default
)
## Probit fitting.
probit.fit.default <- glm(
default ~ .,
family = binomial(link = "probit"),
data = train_default
)
## SVM fitting.
svm.fit.default <- svm(
default ~ .,
data = train_default
)
## Hyperparameters tuning first.
hyper_grid <- expand.grid(
shrinkage = c(0.01, 0.1, 0.2),
interaction.depth = c(1, 3, 5),
n.minobsinnode = c(5, 10, 15),
bag.fraction = c(0.5, 0.75, 1),
optimal_trees = 0,
min_RMSE = 0
)
nrow(hyper_grid) ## 81 unique value combinations.
## [1] 81
## Converting values.
default$default <- ifelse(default$default == "Yes", 1, 0)
## GBM fitting.
random_index <- sample(1:nrow(default), nrow(train_default))
random_train <- default[random_index, ]
for(i in 1:nrow(hyper_grid)) {
gbm.fit.default <- gbm(
default ~ .,
data = random_train,
distribution = "bernoulli",
cv.folds = 5,
interaction.depth = hyper_grid$interaction.depth[i],
shrinkage = hyper_grid$shrinkage[i],
n.trees = 500,
n.minobsinnode = hyper_grid$n.minobsinnode[i],
bag.fraction = hyper_grid$bag.fraction[i],
train.fraction = 0.75,
n.cores = NULL,
verbose = F
)
hyper_grid$optimal_trees[i] <- which.min(gbm.fit.default$valid.error)
hyper_grid$min_RMSE[i] <- sqrt(min(gbm.fit.default$valid.error))
}
## Optimal hyperparameter from GBM.
optimal_para <- hyper_grid %>%
dplyr::arrange(min_RMSE) %>%
head(1)
## Now fit the optimal GBM using the optimal hyperparameters.
gbm.tuned.default <- gbm(
default ~ .,
data = random_train,
distribution = "bernoulli",
cv.folds = 5,
interaction.depth = optimal_para$interaction.depth,
shrinkage = optimal_para$shrinkage,
n.trees = 500,
n.minobsinnode = optimal_para$n.minobsinnode,
bag.fraction = optimal_para$bag.fraction,
train.fraction = 0.75,
n.cores = NULL,
verbose = F
)
Making predictions on all 4 models.
## Logit prediction, calculate prediction accuracy.
logit.pred <- predict(logit.fit.default, test_default, type = "response")
logit.pred.class <- as.numeric(ifelse(logit.pred > 0.5, 1, 0))
confmat.logit <- table(
Predicted = logit.pred.class,
Actual = test_default$default
)
(confmat.logit[1, 1] + confmat.logit[2, 2]) / sum(confmat.logit)
## [1] 0.9711885
## Probit prediction, calculate prediction accuracy.
probit.pred <- predict(probit.fit.default, test_default, type = "response")
probit.pred.class <- as.numeric(ifelse(probit.pred > 0.5, 1, 0))
confmat.probit <- table(
Predicted = probit.pred.class,
Actual = test_default$default
)
(confmat.probit[1, 1] + confmat.probit[2, 2]) / sum(confmat.probit)
## [1] 0.9707883
## SVM prediction, calculate prediction accuracy.
svm.pred <- predict(svm.fit.default, test_default, type = "response")
svm.pred.class <- as.numeric(ifelse(svm.pred == "Yes", 1, 0))
confmat.svm <- table(
Predicted = svm.pred,
Actual = test_default$default
)
(confmat.svm[1, 1] + confmat.svm[2, 2]) / sum(confmat.svm)
## [1] 0.9707883
## GBM prediction, calculate prediction accuracy.
gbm.pred <- predict(gbm.tuned.default, test_default, type = "response")
## Using 193 trees...
gbm.pred.class <- as.numeric(ifelse(gbm.pred > 0.5, 1, 0))
confmat.gbm <- table(
Predicted = gbm.pred.class,
Actual = test_default$default
)
(confmat.gbm[1, 1] + confmat.gbm[2, 2]) / sum(confmat.gbm)
## [1] 0.9743898
Generate ROC.
train_default$default <- as.factor(train_default$default)
test_default$default <- as.factor(test_default$default)
## Assign values to roc() to compute classification accuracy.
logit.roc <- roc(test_default$default, logit.pred.class)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
probit.roc <- roc(test_default$default, probit.pred.class)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
svm.roc <- roc(test_default$default, svm.pred.class)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
gbm.roc <- roc(test_default$default, gbm.pred.class)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
Calculate AUC for all four models.
auc(logit.roc) ## 0.6477.
## Area under the curve: 0.6477
auc(probit.roc) ## 0.6184.
## Area under the curve: 0.6184
auc(svm.roc) ## 0.601.
## Area under the curve: 0.601
auc(gbm.roc) ## 0.7075.
## Area under the curve: 0.7075
## The GBM has the largest AUC.
Plot all ROC. The closer the line to the upper left corner, the better the performance. From visual inspection, GBM (the last one) seems to have the best performance.
plot.roc(logit.roc)
plot.roc(probit.roc)
plot.roc(svm.roc)
plot.roc(gbm.roc)
##
Starting Problem 3. Load and initiate h2o, convert data to h2o
format.
library(h2o)
##
## ----------------------------------------------------------------------
##
## Your next step is to start H2O:
## > h2o.init()
##
## For H2O package documentation, ask for help:
## > ??h2o
##
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit https://docs.h2o.ai
##
## ----------------------------------------------------------------------
##
## 載入套件:'h2o'
## 下列物件被遮斷自 'package:pROC':
##
## var
## 下列物件被遮斷自 'package:stats':
##
## cor, sd, var
## 下列物件被遮斷自 'package:base':
##
## %*%, %in%, &&, ||, apply, as.factor, as.numeric, colnames,
## colnames<-, ifelse, is.character, is.factor, is.numeric, log,
## log10, log1p, log2, round, signif, trunc
h2o.init() ## Initiate h2o.
##
## H2O is not running yet, starting it now...
## Warning in .h2o.startJar(ip = ip, port = port, name = name, nthreads = nthreads, : You have a 32-bit version of Java. H2O works best with 64-bit Java.
## Please download the latest Java SE JDK from the following URL:
## http://docs.h2o.ai/h2o/latest-stable/h2o-docs/welcome.html#java-requirements
##
## Note: In case of errors look at the following log files:
## C:\Users\JACKCH~1\AppData\Local\Temp\RtmpQ5wk06\file69c06e743366/h2o_Jack_Chen_started_from_r.out
## C:\Users\JACKCH~1\AppData\Local\Temp\RtmpQ5wk06\file69c027cb6e1f/h2o_Jack_Chen_started_from_r.err
##
##
## Starting H2O JVM and connecting: Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 3 seconds 957 milliseconds
## H2O cluster timezone: Asia/Taipei
## H2O data parsing timezone: UTC
## H2O cluster version: 3.44.0.3
## H2O cluster version age: 1 year, 5 months and 7 days
## H2O cluster name: H2O_started_from_R_Jack_Chen_qlm670
## H2O cluster total nodes: 1
## H2O cluster total memory: 0.96 GB
## H2O cluster total cores: 12
## H2O cluster allowed cores: 12
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## H2O Internal Security: FALSE
## R Version: R version 4.3.3 (2024-02-29 ucrt)
## Warning in h2o.clusterInfo():
## Your H2O cluster version is (1 year, 5 months and 7 days) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
train_default_h2o <- as.h2o(train_default)
## | | | 0% | |======================================================================| 100%
test_default_h2o <- as.h2o(test_default)
## | | | 0% | |======================================================================| 100%
Hard-voting fitting.
## Fitting the base models first. Define X and Y.
Y_default <- "default"
X_default <- names(default)[-1]
## Base logit.
base_logit <- h2o.glm(
x = X_default, y = Y_default, train = train_default_h2o,
family = "binomial",
nfold = 5,
keep_cross_validation_predictions = T,
auc_type = "AUTO",
seed = 123
)
## | | | 0% | |======================================================================| 100%
## Base Naive Bayes.
base_nb <- h2o.naiveBayes(
x = X_default, y = Y_default, train = train_default_h2o,
nfold = 5,
keep_cross_validation_predictions = T,
auc_type = "AUTO",
seed = 123
)
## | | | 0% | |======================================================================| 100%
## Base GBM.
base_gbm <- h2o.gbm(
x = X_default, y = Y_default, train = train_default_h2o,
ntrees = 500,
max_depth = optimal_para$interaction.depth,
learn_rate = optimal_para$shrinkage,
min_rows = optimal_para$n.minobsinnode,
sample_rate = optimal_para$bag.fraction,
nfold = 5,
keep_cross_validation_predictions = T,
auc_type = "AUTO",
seed = 123
)
## | | | 0% | |== | 2% | |================= | 24% | |========================================================== | 83% | |================================================================== | 94% | |======================================================================| 100%
Hard-voting accuracy: 96.7587%; AUC: 0.7098.
## Hard voting.
## Predict using logit.
base_logit.pred <- as.vector(
h2o.predict(base_logit, test_default_h2o)$predict
)
## | | | 0% | |======================================================================| 100%
## Predict using Naive Bayes.
base_nb.pred <- as.vector(
h2o.predict(base_nb, test_default_h2o)$predict
)
## | | | 0% | |======================================================================| 100%
## Predict using GBM.
base_gbm.pred <- as.vector(
h2o.predict(base_gbm, test_default_h2o)$predict
)
## | | | 0% | |======================================================================| 100%
predictions <- data.frame(base_logit.pred, base_gbm.pred, base_nb.pred)
## Define function for hard voting.
hard_vote <- function(preds) {
apply(preds, 1, function(row) {
names(sort(table(row), decreasing = TRUE))[1]
})
}
## Final voting.
hv_prediction <- hard_vote(predictions)
## Convert to factor class with the right ordering.
hv_prediction <- as.factor(hv_prediction)
confmat.hv <- table(Predicted = hv_prediction, Actual = test_default$default)
(confmat.hv[1, 1] + confmat.hv[2, 2]) / sum(confmat.hv) * 100 ## 96.7587%.
## [1] 96.7587
hv_prediction_class <- as.numeric(ifelse(hv_prediction == "Yes", 1, 0))
hv_roc <- roc(test_default$default, hv_prediction_class)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
auc(hv_roc) ## AUC is 0.7098.
## Area under the curve: 0.7098
Soft-voting accuracy.
## Create a model list.
base_models <- c(
base_logit@model_id,
base_nb@model_id,
base_gbm@model_id
)
## Create a stacked ensemble.
sv_ensemble <- h2o.stackedEnsemble(
x = X_default, y = Y_default, training_frame = train_default_h2o,
base_models = base_models
)
## | | | 0% | |======================================================================| 100%
## Predict using soft voting model.
sv_pred <- h2o.predict(sv_ensemble, newdata = test_default_h2o)
## | | | 0% | |======================================================================| 100%
## Define AUC function and get AUCs.
get_auc <- function(model) {
results <- h2o.performance(model, newdata = test_default_h2o)
h2o.auc(results)
}
voting_perf <- list(base_logit, base_nb, base_gbm, sv_ensemble) %>%
purrr::map_dbl(get_auc)
tab_voting_auc <- data.frame(AUC = voting_perf)
rownames(tab_voting_auc) <- c("Logit", "Naive Bayes", "GBM", "Soft Voting")
tab_voting_auc
## AUC
## Logit 0.9538942
## Naive Bayes 0.9483538
## GBM 0.9499746
## Soft Voting 0.9539441
## I somehow cannot use grid.arrange(), so I look at tab_voting_auc directly instead.
## theme_1 <- ttheme_default()
## grid.arrange(tableGrob(tab_voting_auc, theme = theme_1))
h2o.shutdown(prompt = F)
Soft Voting has the best AUC: 0.9539441, showing that using a voting model does have some benefit. ## Starting Problem 4. Load necessary packages and data.
library(dplyr)
library(rsample)
##
## 載入套件:'rsample'
## 下列物件被遮斷自 'package:e1071':
##
## permutations
library(boot)
##
## 載入套件:'boot'
## 下列物件被遮斷自 'package:psych':
##
## logit
## 下列物件被遮斷自 'package:lattice':
##
## melanoma
library(purrr)
##
## 載入套件:'purrr'
## 下列物件被遮斷自 'package:caret':
##
## lift
default <- as.data.frame(Default)
Estimate a logit model and perform a 10-fold CV.
## Define a function to calculate the k-fold CV MSE.
kf_cv_error <- function(x) {
set.seed(123)
logit.fit.kf <- glm(
default ~ student + balance + poly(income, x),
family = binomial(link = "logit"),
data = default
)
cv.glm(default, logit.fit.kf, K = 10)$delta[1]
}
1:10 %>% map_dbl(kf_cv_error) ## The first degree has the smallest MSE.
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## Warning: glm.fit:擬合機率算出來是數值零或一
## [1] 0.02140615 0.02140915 0.02144320 0.02142835 0.02144654 0.02143820
## [7] 0.02144703 0.02147494 0.02147267 0.02148085
Bootstrapping.
logit.boot <- glm(
default ~ student + balance + income,
family = binomial(link = "logit"),
data = default
)
boot_result <- function(data, index) {
logit.boot <- glm(
default ~ student + balance + income,
family = binomial(link = "logit"),
data = data,
subset = index
)
coef(logit.boot)
}
## Use boot() to bootstrap.
set.seed(123)
boot(default, boot_result, R = 1000)
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = default, statistic = boot_result, R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* -1.086905e+01 -3.291558e-02 4.797385e-01
## t2* -6.467758e-01 -3.118653e-03 2.427747e-01
## t3* 5.736505e-03 1.681596e-05 2.294104e-04
## t4* 3.033450e-06 1.160860e-07 8.249317e-06
The t3* term (balance) has the largest bias. ## End of Homework 4.