library(ISLR)
library(naivebayes)
## naivebayes 0.9.7 loaded
library(e1071)
library(class)
library(MASS)
head(Weekly)
## Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today Direction
## 1 1990 0.816 1.572 -3.936 -0.229 -3.484 0.1549760 -0.270 Down
## 2 1990 -0.270 0.816 1.572 -3.936 -0.229 0.1485740 -2.576 Down
## 3 1990 -2.576 -0.270 0.816 1.572 -3.936 0.1598375 3.514 Up
## 4 1990 3.514 -2.576 -0.270 0.816 1.572 0.1616300 0.712 Up
## 5 1990 0.712 3.514 -2.576 -0.270 0.816 0.1537280 1.178 Up
## 6 1990 1.178 0.712 3.514 -2.576 -0.270 0.1544440 -1.372 Down
dim(Weekly)
## [1] 1089 9
attach(Weekly)
par(mfrow = c(2,2))
summary(Weekly)
## Year Lag1 Lag2 Lag3
## Min. :1990 Min. :-18.1950 Min. :-18.1950 Min. :-18.1950
## 1st Qu.:1995 1st Qu.: -1.1540 1st Qu.: -1.1540 1st Qu.: -1.1580
## Median :2000 Median : 0.2410 Median : 0.2410 Median : 0.2410
## Mean :2000 Mean : 0.1506 Mean : 0.1511 Mean : 0.1472
## 3rd Qu.:2005 3rd Qu.: 1.4050 3rd Qu.: 1.4090 3rd Qu.: 1.4090
## Max. :2010 Max. : 12.0260 Max. : 12.0260 Max. : 12.0260
## Lag4 Lag5 Volume Today
## Min. :-18.1950 Min. :-18.1950 Min. :0.08747 Min. :-18.1950
## 1st Qu.: -1.1580 1st Qu.: -1.1660 1st Qu.:0.33202 1st Qu.: -1.1540
## Median : 0.2380 Median : 0.2340 Median :1.00268 Median : 0.2410
## Mean : 0.1458 Mean : 0.1399 Mean :1.57462 Mean : 0.1499
## 3rd Qu.: 1.4090 3rd Qu.: 1.4050 3rd Qu.:2.05373 3rd Qu.: 1.4050
## Max. : 12.0260 Max. : 12.0260 Max. :9.32821 Max. : 12.0260
## Direction
## Down:484
## Up :605
##
##
##
##
plot(Weekly)
There appears to be growing volume as the years progress.
cor(Weekly[,-9])
## Year Lag1 Lag2 Lag3 Lag4
## Year 1.00000000 -0.032289274 -0.03339001 -0.03000649 -0.031127923
## Lag1 -0.03228927 1.000000000 -0.07485305 0.05863568 -0.071273876
## Lag2 -0.03339001 -0.074853051 1.00000000 -0.07572091 0.058381535
## Lag3 -0.03000649 0.058635682 -0.07572091 1.00000000 -0.075395865
## Lag4 -0.03112792 -0.071273876 0.05838153 -0.07539587 1.000000000
## Lag5 -0.03051910 -0.008183096 -0.07249948 0.06065717 -0.075675027
## Volume 0.84194162 -0.064951313 -0.08551314 -0.06928771 -0.061074617
## Today -0.03245989 -0.075031842 0.05916672 -0.07124364 -0.007825873
## Lag5 Volume Today
## Year -0.030519101 0.84194162 -0.032459894
## Lag1 -0.008183096 -0.06495131 -0.075031842
## Lag2 -0.072499482 -0.08551314 0.059166717
## Lag3 0.060657175 -0.06928771 -0.071243639
## Lag4 -0.075675027 -0.06107462 -0.007825873
## Lag5 1.000000000 -0.05851741 0.011012698
## Volume -0.058517414 1.00000000 -0.033077783
## Today 0.011012698 -0.03307778 1.000000000
This once agains shows the only significant relationships being volume and year
glm.fit1 <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, family = binomial, data = Weekly)
summary(glm.fit1)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = binomial, data = Weekly)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6949 -1.2565 0.9913 1.0849 1.4579
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.26686 0.08593 3.106 0.0019 **
## Lag1 -0.04127 0.02641 -1.563 0.1181
## Lag2 0.05844 0.02686 2.175 0.0296 *
## Lag3 -0.01606 0.02666 -0.602 0.5469
## Lag4 -0.02779 0.02646 -1.050 0.2937
## Lag5 -0.01447 0.02638 -0.549 0.5833
## Volume -0.02274 0.03690 -0.616 0.5377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1496.2 on 1088 degrees of freedom
## Residual deviance: 1486.4 on 1082 degrees of freedom
## AIC: 1500.4
##
## Number of Fisher Scoring iterations: 4
Lag 2 has a p-value of .0296 evidencing .05 significance as indicated by the asterisk.
glm.prob1 <- predict(glm.fit1, type = "response")
glm.pred <- rep("Down", 1089)
glm.pred[glm.prob1 >0.5] = "Up"
table(glm.pred, Weekly$Direction)
##
## glm.pred Down Up
## Down 54 48
## Up 430 557
(54+557)/1089
## [1] 0.5610652
(430+48)/1089
## [1] 0.4389348
This confusion matrix shows that there is a 56% accuracy rate, with 44% false positive.
train = (Weekly$Year < 2009)
glm.fit = glm(Direction ~ Lag2, data = Weekly, subset = train, family = "binomial")
summary(glm.fit)
##
## Call:
## glm(formula = Direction ~ Lag2, family = "binomial", data = Weekly,
## subset = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.536 -1.264 1.021 1.091 1.368
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.20326 0.06428 3.162 0.00157 **
## Lag2 0.05810 0.02870 2.024 0.04298 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1354.7 on 984 degrees of freedom
## Residual deviance: 1350.5 on 983 degrees of freedom
## AIC: 1354.5
##
## Number of Fisher Scoring iterations: 4
glm.probs = predict(glm.fit, Weekly[!train, ], type = "response")
glm.pred = rep("Down", dim(Weekly[!train, ])[1])
glm.pred[glm.probs > 0.5] = "Up"
table(glm.pred, Weekly[!train, ]$Direction)
##
## glm.pred Down Up
## Down 9 5
## Up 34 56
(34+5)/104
## [1] 0.375
(9+56)/104
## [1] 0.625
The confusion matrix still shows that this model is unreliable with an accurate prediction only 62.5% of the time.
library(MASS)
lda.fit = lda(Direction ~ Lag2, data = Weekly, subset = train)
lda.fit
## Call:
## lda(Direction ~ Lag2, data = Weekly, subset = train)
##
## Prior probabilities of groups:
## Down Up
## 0.4477157 0.5522843
##
## Group means:
## Lag2
## Down -0.03568254
## Up 0.26036581
##
## Coefficients of linear discriminants:
## LD1
## Lag2 0.4414162
lda.pred = predict(lda.fit, Weekly[!train, ])
table(lda.pred$class, Weekly[!train, ]$Direction)
##
## Down Up
## Down 9 5
## Up 34 56
The results from the LDA output are identical to the results from part d.
qda.fit = qda(Direction ~ Lag2, data = Weekly, subset = train)
qda.fit
## Call:
## qda(Direction ~ Lag2, data = Weekly, subset = train)
##
## Prior probabilities of groups:
## Down Up
## 0.4477157 0.5522843
##
## Group means:
## Lag2
## Down -0.03568254
## Up 0.26036581
qda.pred = predict(qda.fit, Weekly[!train, ])
table(qda.pred$class, Weekly[!train, ]$Direction)
##
## Down Up
## Down 0 0
## Up 43 61
61/104
## [1] 0.5865385
The results from this show that predictions for the “Up category are 100%, but 0% for the down proving again to be reliable only 59% of the time which is only slighlty better than the previous two models.
library(class)
train.X = data.frame(Weekly[train, ]$Lag2)
test.X = data.frame(Weekly[!train, ]$Lag2)
train.Direction = Weekly[train, ]$Direction
set.seed(1)
knn.pred = knn(train.X, test.X, train.Direction, k = 1)
table(knn.pred, Weekly[!train, ]$Direction)
##
## knn.pred Down Up
## Down 21 30
## Up 22 31
This model only has a 50% accuracy rate meaning it is less accururate than the previous models
nb.fit <- naiveBayes(Direction ~ Lag2, data = Weekly, subset = train)
nb.class<- predict(nb.fit, Weekly[!train,])
table(nb.class, Weekly[!train, ]$Direction)
##
## nb.class Down Up
## Down 0 0
## Up 43 61
mean(nb.class ==Weekly[!train,]$Direction)
## [1] 0.5865385
This model is 59% accurate, which is very similar to other models, but better than the KNN.
Problem 14 (A)
Auto = na.omit(Auto)
head(Auto)
## mpg cylinders displacement horsepower weight acceleration year origin
## 1 18 8 307 130 3504 12.0 70 1
## 2 15 8 350 165 3693 11.5 70 1
## 3 18 8 318 150 3436 11.0 70 1
## 4 16 8 304 150 3433 12.0 70 1
## 5 17 8 302 140 3449 10.5 70 1
## 6 15 8 429 198 4341 10.0 70 1
## name
## 1 chevrolet chevelle malibu
## 2 buick skylark 320
## 3 plymouth satellite
## 4 amc rebel sst
## 5 ford torino
## 6 ford galaxie 500
mpg01 = rep(0, dim(Auto)[1])
mpg01[Auto$mpg > median(Auto$mpg)] = 1
Auto = data.frame(Auto, mpg01)
attach(Auto)
## The following object is masked _by_ .GlobalEnv:
##
## mpg01
par(mfrow = c(2,3))
boxplot(cylinders~mpg01, data=Auto, xlab="mpg01", ylab="Cylinders", main="Cylinders to MPG01")
boxplot(displacement~mpg01, data=Auto, xlab="mpg01", ylab="Displacement", main="Displacement to MPG01")
boxplot(horsepower~mpg01, data=Auto, xlab="mpg01", ylab="Horsepower", main="Horsepower to MPG01")
boxplot(weight~mpg01, data=Auto, xlab="mpg01", ylab="Weight", main="Weight to MPG01")
boxplot(acceleration~mpg01, data=Auto, xlab="mpg01", ylab="Acceleration", main="Acceleration to MPG01")
boxplot(year~mpg01, data=Auto, xlab="mpg01", ylab="Year", main="Year to MPG01")
I am not sure if I did something wrong, but the scatterplots weren’t particularly helpful. Items in the boxplots came back with pretty much what was expected. The more cylinders, higher displacement, higher horsepower, heavier, quicker, and older the car, the worse it is on gas. It does appear that there are a couple of outliers in with 8 cylinders that still achieved over the median mpg which was surprising.
set.seed(1)
train = sample(dim(Auto)[1], size = 0.75*dim(Auto)[1])
I had numerous issues with the following steps, but following along with similar items, I found it easier to just set the training data and remove the the train data manually below.
lda.fit <- lda(mpg01 ~ cylinders+displacement, data = Auto, subset = train)
lda.pred = predict(lda.fit, Auto[-train, ])
table(lda.pred$class, Auto[-train, "mpg01"], dnn = c("Predicted", "Actual"))
## Actual
## Predicted 0 1
## 0 41 2
## 1 12 43
Overall, this model had an accuracy of 86% based on the two variables cylinders and displacement. Error rate of 14%
qda.fit = qda(mpg01 ~ cylinders + displacement, data = Auto, subset = train)
qda.pred = predict(qda.fit, Auto[-train, ])
table(qda.pred$class, Auto[-train, "mpg01"], dnn = c("Predicted", "Actual"))
## Actual
## Predicted 0 1
## 0 45 3
## 1 8 42
mean(qda.pred$class == Auto[-train, "mpg01"])
## [1] 0.8877551
This model is slightly better than the previous with an 89% accuracy rating. Overall, there were less false positive, but this primarily impacted the accuracy on the non-efficient or 0 portion. Error rate of 11%
glm.fit = glm(mpg01 ~ cylinders + displacement, data = Auto, subset = train, family = "binomial")
summary(glm.fit)
##
## Call:
## glm(formula = mpg01 ~ cylinders + displacement, family = "binomial",
## data = Auto, subset = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6093 -0.1964 0.3058 0.4688 3.2678
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.743895 0.978458 5.870 4.35e-09 ***
## cylinders -0.112651 0.382326 -0.295 0.768264
## displacement -0.029077 0.007526 -3.864 0.000112 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 407.35 on 293 degrees of freedom
## Residual deviance: 176.80 on 291 degrees of freedom
## AIC: 182.8
##
## Number of Fisher Scoring iterations: 6
glm.probs = predict(glm.fit, Auto[-train, ], type = "response")
glm.pred = rep(0, dim(Auto[-train, ])[1])
glm.pred[glm.probs > 0.5] = 1
table(glm.pred, Auto[-train, "mpg01"], dnn = c("Predicted", "Actual"))
## Actual
## Predicted 0 1
## 0 41 1
## 1 12 44
mean(glm.pred == Auto[-train, "mpg01"])
## [1] 0.8673469
This model falls right between the two previous models at 87%, but overall has properly classified the most “1”s and mis classified the lease false postive “1”s. Error rate of 13%
nb.fit <- naiveBayes(mpg01 ~ cylinders + displacement, data = Auto, subset = train)
nb.class <- predict(nb.fit, Auto[-train, ])
table (nb.class , Auto[-train, "mpg01"])
##
## nb.class 0 1
## 0 45 3
## 1 8 42
The bayes model has an accuracy of 89% and error rate of 11%
train.x<-cbind(cylinders, displacement)[train, ]
test.x<-cbind(cylinders, displacement)[-train,]
train.mpg01 = Auto[train, "mpg01"]
set.seed(1)
knn.pred <- knn(train.x, test.x, train.mpg01 , k = 1)
table(knn.pred, Auto[-train, "mpg01"])
##
## knn.pred 0 1
## 0 44 2
## 1 9 43
mean(knn.pred == Auto[-train, "mpg01"])
## [1] 0.8877551
KNN with a value of 1 has an 89% accuracy and 11% error rate.
set.seed(1)
knn.pred <- knn(train.x, test.x, train.mpg01, k = 5)
table(knn.pred, Auto[-train, "mpg01"])
##
## knn.pred 0 1
## 0 41 0
## 1 12 45
mean(knn.pred == Auto[-train, "mpg01"])
## [1] 0.877551
nb.class <- predict(nb.fit , Auto[-train, "mpg01"])
## Warning in predict.naiveBayes(nb.fit, Auto[-train, "mpg01"]): Type mismatch
## between training and new data for variable 'cylinders'. Did you use factors with
## numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(nb.fit, Auto[-train, "mpg01"]): Type mismatch
## between training and new data for variable 'displacement'. Did you use factors
## with numeric labels for training, and numeric values for new data?
knn.pred <- knn(train.x, test.x, train.mpg01, k = 10)
table(knn.pred, Auto[-train, "mpg01"])
##
## knn.pred 0 1
## 0 41 0
## 1 12 45
mean(knn.pred == Auto[-train, "mpg01"])
## [1] 0.877551
knn.pred <- knn(train.x, test.x, train.mpg01, k = 15)
table(knn.pred, Auto[-train, "mpg01"])
##
## knn.pred 0 1
## 0 41 1
## 1 12 44
mean(knn.pred == Auto[-train, "mpg01"])
## [1] 0.8673469
With KNN values at intervals of 5 between 5 and 15, there were no significant changes.
Problem 16
Boston = na.omit(Boston)
head(Boston)
## crim zn indus chas nox rm age dis rad tax ptratio black lstat
## 1 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98
## 2 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14
## 3 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03
## 4 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94
## 5 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33
## 6 0.02985 0 2.18 0 0.458 6.430 58.7 6.0622 3 222 18.7 394.12 5.21
## medv
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7
attach(Boston)
crim01 = rep(0, dim(Boston)[1])
crim01[Boston$crim > median(Boston$crim)] = 1
Boston = data.frame(Boston, crim01)
attach(Boston)
## The following object is masked _by_ .GlobalEnv:
##
## crim01
## The following objects are masked from Boston (pos = 3):
##
## age, black, chas, crim, dis, indus, lstat, medv, nox, ptratio, rad,
## rm, tax, zn
cor(Boston)
## crim zn indus chas nox
## crim 1.00000000 -0.20046922 0.40658341 -0.055891582 0.42097171
## zn -0.20046922 1.00000000 -0.53382819 -0.042696719 -0.51660371
## indus 0.40658341 -0.53382819 1.00000000 0.062938027 0.76365145
## chas -0.05589158 -0.04269672 0.06293803 1.000000000 0.09120281
## nox 0.42097171 -0.51660371 0.76365145 0.091202807 1.00000000
## rm -0.21924670 0.31199059 -0.39167585 0.091251225 -0.30218819
## age 0.35273425 -0.56953734 0.64477851 0.086517774 0.73147010
## dis -0.37967009 0.66440822 -0.70802699 -0.099175780 -0.76923011
## rad 0.62550515 -0.31194783 0.59512927 -0.007368241 0.61144056
## tax 0.58276431 -0.31456332 0.72076018 -0.035586518 0.66802320
## ptratio 0.28994558 -0.39167855 0.38324756 -0.121515174 0.18893268
## black -0.38506394 0.17552032 -0.35697654 0.048788485 -0.38005064
## lstat 0.45562148 -0.41299457 0.60379972 -0.053929298 0.59087892
## medv -0.38830461 0.36044534 -0.48372516 0.175260177 -0.42732077
## crim01 0.40939545 -0.43615103 0.60326017 0.070096774 0.72323480
## rm age dis rad tax ptratio
## crim -0.21924670 0.35273425 -0.37967009 0.625505145 0.58276431 0.2899456
## zn 0.31199059 -0.56953734 0.66440822 -0.311947826 -0.31456332 -0.3916785
## indus -0.39167585 0.64477851 -0.70802699 0.595129275 0.72076018 0.3832476
## chas 0.09125123 0.08651777 -0.09917578 -0.007368241 -0.03558652 -0.1215152
## nox -0.30218819 0.73147010 -0.76923011 0.611440563 0.66802320 0.1889327
## rm 1.00000000 -0.24026493 0.20524621 -0.209846668 -0.29204783 -0.3555015
## age -0.24026493 1.00000000 -0.74788054 0.456022452 0.50645559 0.2615150
## dis 0.20524621 -0.74788054 1.00000000 -0.494587930 -0.53443158 -0.2324705
## rad -0.20984667 0.45602245 -0.49458793 1.000000000 0.91022819 0.4647412
## tax -0.29204783 0.50645559 -0.53443158 0.910228189 1.00000000 0.4608530
## ptratio -0.35550149 0.26151501 -0.23247054 0.464741179 0.46085304 1.0000000
## black 0.12806864 -0.27353398 0.29151167 -0.444412816 -0.44180801 -0.1773833
## lstat -0.61380827 0.60233853 -0.49699583 0.488676335 0.54399341 0.3740443
## medv 0.69535995 -0.37695457 0.24992873 -0.381626231 -0.46853593 -0.5077867
## crim01 -0.15637178 0.61393992 -0.61634164 0.619786249 0.60874128 0.2535684
## black lstat medv crim01
## crim -0.38506394 0.4556215 -0.3883046 0.40939545
## zn 0.17552032 -0.4129946 0.3604453 -0.43615103
## indus -0.35697654 0.6037997 -0.4837252 0.60326017
## chas 0.04878848 -0.0539293 0.1752602 0.07009677
## nox -0.38005064 0.5908789 -0.4273208 0.72323480
## rm 0.12806864 -0.6138083 0.6953599 -0.15637178
## age -0.27353398 0.6023385 -0.3769546 0.61393992
## dis 0.29151167 -0.4969958 0.2499287 -0.61634164
## rad -0.44441282 0.4886763 -0.3816262 0.61978625
## tax -0.44180801 0.5439934 -0.4685359 0.60874128
## ptratio -0.17738330 0.3740443 -0.5077867 0.25356836
## black 1.00000000 -0.3660869 0.3334608 -0.35121093
## lstat -0.36608690 1.0000000 -0.7376627 0.45326273
## medv 0.33346082 -0.7376627 1.0000000 -0.26301673
## crim01 -0.35121093 0.4532627 -0.2630167 1.00000000
train = sample(dim(Boston)[1], size = 0.7*dim(Boston)[1])
test = Boston[-train,]
glm.fit = glm(crim01 ~ indus+nox+age+dis+rad+tax, data = Boston, subset = train, family = "binomial")
summary(glm.fit)
##
## Call:
## glm(formula = crim01 ~ indus + nox + age + dis + rad + tax, family = "binomial",
## data = Boston, subset = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.94918 -0.30759 -0.04657 0.01061 2.80525
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -28.398069 4.687537 -6.058 1.38e-09 ***
## indus -0.068147 0.047921 -1.422 0.15501
## nox 46.760746 8.465762 5.524 3.32e-08 ***
## age 0.019822 0.010246 1.935 0.05305 .
## dis 0.499578 0.174007 2.871 0.00409 **
## rad 0.555643 0.130216 4.267 1.98e-05 ***
## tax -0.006519 0.002840 -2.295 0.02171 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 490.34 on 353 degrees of freedom
## Residual deviance: 182.39 on 347 degrees of freedom
## AIC: 196.39
##
## Number of Fisher Scoring iterations: 8
glm.probs = predict(glm.fit, Boston[-train, ], type = "response")
glm.pred = rep(0, dim(Boston[-train, ])[1])
glm.pred[glm.probs > 0.5] = 1
table(glm.pred, Boston[-train, "crim01"], dnn = c("Predicted", "Actual"))
## Actual
## Predicted 0 1
## 0 60 7
## 1 10 75
lda.fit <- lda(crim01 ~ indus+nox+age+dis+rad+tax, data = Boston, subset = train)
lda.pred = predict(lda.fit, Boston[-train, ])
table(lda.pred$class, Boston[-train, "crim01"], dnn = c("Predicted", "Actual"))
## Actual
## Predicted 0 1
## 0 65 17
## 1 5 65
nb.fit <- naiveBayes(crim01 ~ indus+nox+age+dis+rad+tax, data = Boston)
nb.class <- predict(nb.fit, Boston[-train, ])
table (nb.class , Boston[-train, "crim01"])
##
## nb.class 0 1
## 0 64 18
## 1 6 64
After numerous hours of errors on KNN, I finally gave up on trying. The linear regression provided the most accurate readings with an error rate of ~11%. upon observing the P values, it appeasr that the most significant items were age followed by tax.