## A. Michael Bauer
## CVEN 5454: Quantitative Methods Homework 5
library(MASS)
library(BSDA)
## Loading required package: e1071
## Loading required package: class
## Loading required package: lattice
## Attaching package: 'BSDA'
## The following object is masked from 'package:datasets':
##
## Orange
library(e1071)
library(formatR)
library(akima)
library(knitr)
## Problem 1. 11-77 11-77 a)
kwdata = read.table("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess5/Prob-11-77-data.txt")
x = kwdata[, 1]
y = kwdata[, 2]
plot(x, y, main = "kWh vs. kW", xlab = "kilowatt hours", ylab = "kilowatts")
## 11-77 b)
qqnorm(kwdata[, 1])
qqnorm(kwdata[, 2]) ### Data is normally distributed, with a few outliers
kw.lm = lm(y ~ x)
plot(x, predict(kw.lm), type = "l", ylim = range(y))
points(x, y)
## 11-77 c)
summary(kw.lm)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.326 -0.949 -0.218 1.338 2.967
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.881927 0.452124 -1.95 0.057 .
## x 0.003848 0.000348 11.05 8.8e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.56 on 48 degrees of freedom
## Multiple R-squared: 0.718, Adjusted R-squared: 0.712
## F-statistic: 122 on 1 and 48 DF, p-value: 8.81e-15
## Since p-value=8.81x10^-15 << 0.05=alpha, we reject the null hypothesis
## that Beta=0, concluding that at least one of the coefficients is not
## zero.
## 11-77 d)
zz = lsfit(x, y)
x1 = residuals(zz)
yhat = y - zz$res
plot(yhat, x1, xlab = "Y Estimate", ylab = "Residuals")
## The residuals seem to increase with respect to either y or x, as seen
## in the quasi-funnel shaped scatterplot. These are random, independent
## samples, but we cannot assume equality of variance; residuals are not
## normally distributed. We can see if the residuals are related to one
## another by using ACF, below
library(sm)
## Package `sm', version 2.2-4.1 Copyright (C) 1997, 2000, 2005, 2007, 2008,
## A.W.Bowman & A.Azzalini Type help(sm) for summary information
z1 = acf(x1)
## The residuals show heterscedasticity, confirming prior conclusions. If
## we wanted to further test, we can explore the Coefficient of
## Determination, R^2
SSR = sum((yhat - mean(y))^2)
SST = sum((y - mean(y))^2)
SSE = sum((zz$res)^2)
COD = SSR/SST
COD
## [1] 0.7177
# The model only accounts for 72% of the variability in the data; this is
# not a very good model, as we saw in the plot of residuals versus y-hat
## 11-77 e)
yy = log(y)
xx = log(x)
zz = lsfit(xx, yy)
x1 = residuals(zz)
yhat = yy - zz$res
plot(yhat, x1, xlab = "Transformed Y Estimate", ylab = "Residuals")
## This does not seem to have fully stabilized the relationship we saw in
## d, but does produce more randomized scatter plot, indicating it
## improves.
## Problem 2. 11-78
## 11-78 a) For the simple linear regression model, we chose Beta-hat(0)
## and Beta-hat(1) such that the residuals add up to zero. This comes from
## the method of moments for least squares. As such, we have: Let Y(i) =
## b0 + b1*x + e(i) To estimate betas, we calculate error:
## e(i) = Y(i) - b0 - b1*x We then minimize this by least squares method:
## Explained sum of squares = SSE = e^2(1)+ e^2(2)+ ....+ e^2(n) = (Y(1) -
## b0 - b1X(1))² + (Y(2) - b(0) - b(2)X(2))² + ... + (Y(n) - b(0) -
## b(1)X(n))²
## To minimize wrt b0 and b1, we differentiante wrt both and set equal to
## zero. This gives us: (1) Derivative wrt b0: 2(Y(1) - b0 - b1X(i))(-1)
## + 2(Y(2) - b0 - b1X(2))(-1) + ... + 2(Y(n) - b0 - b1X(n))(-1) = 0
## Simplifying gives: (Y(1) - b0 - b1X(i)) + (Y(2) - b0 - b1X(2)) + ... +
## (Y(n) - b0 - b1X(n)) = 0, where (Y(1) - b0 - b1X(i)) = e1, and (Y(2) -
## b0 - b1X(2)) = e2 Therefore, e(1) + e(2) +... + e(n) = 0
## 11-78 b) Similarly to 11-78 a): Differentiate wrt b1, giving 2(Y(1) -
## b0 - b1X(i)) (-X(1)) + 2(Y(2) - b0 - b1X(2))(-X(2)) + ... + 2(Y(n) - b0
## - b1X(n))(-X(n)) = 0 Therefore, e(1)X(1) + e(2)X(2) + ...+e(n)X(n) = 0
## 11-78 c) Similarly to a) and b), the mean of the fitted values of y is
## equal to the sample mean by the following: y(i) = b0 + b1*x
## (1/n)Sigma(y-hat(i)) = (1/n)Sigma(b0 + b1*Xi) = (1/n)Sigma(ybar-b1*x+
## b1*Xi) = ybar-b1*x+ b1*Xi = ybar
## Problem 3. 11-86
## 11-86 a)
temp.data = read.csv("bauerstein/documents/school/2013 spring/numerical methods/R/Data_1186.csv")
## Warning: cannot open file 'bauerstein/documents/school/2013
## spring/numerical methods/R/Data_1186.csv': No such file or directory
## Error: cannot open the connection
x = temp.data[, 1]
## Error: object 'temp.data' not found
y = temp.data[, 2]
## Error: object 'temp.data' not found
plot(x, y, main = "IR temp vs. Thermocouple temp", xlab = "Thermocouple", ylab = "IR")
qqnorm(x)
qqnorm(y) ## QQ Plots show data is close to normally distributed, with a few outliers; this is expected for a small sample such as this.
## 11-86 b)
temp.lm = lm(y ~ x)
plot(x, predict(temp.lm), type = "l", ylim = range(y), main = "Regression of Thermocouple vs. IR",
xlab = "Thermocouple", ylab = "Predicted IR")
points(x, y)
## 11-86 c)
summary(temp.lm)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.326 -0.949 -0.218 1.338 2.967
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.881927 0.452124 -1.95 0.057 .
## x 0.003848 0.000348 11.05 8.8e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.56 on 48 degrees of freedom
## Multiple R-squared: 0.718, Adjusted R-squared: 0.712
## F-statistic: 122 on 1 and 48 DF, p-value: 8.81e-15
z2 = lsfit(x, y)
x2 = residuals(z2)
yhat = y - z2$res
SSR = sum((yhat - mean(y))^2)
SST = sum((y - mean(y))^2)
SSE = sum((z2$res)^2)
COD = SSR/SST
COD
## [1] 0.7177
# It appears that our regression model only accounts for 71% of the
# variability in the data; this is not a very good model, however as the
# p-value=.002 < alpha=0.05, we reject the null hypothesis that beta = 0.
# The manually computed coefficient of determination agrees with that from
# summary(), i.e. 0.71
## 11-86 d)
c(mean(x), mean(y), var(x), var(y))
## [1] 1.133e+03 3.476e+00 4.103e+05 8.465e+00
anova(temp.lm)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 298 297.7 122 8.8e-15 ***
## Residuals 48 117 2.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## H0: mu(x) = mu(y) H1: mu(x) does not equal mu(y). Reject H0 if either
## mu(x) or mu(y) is less than or equal to w(0.05)=78 Using Wilxcoxon
## rank-sum test: w0=78 at alpha =.05 and nx=ny=10; this is a small sample
## size.
print(wilcox.test(x, y, alternative = c("two.sided", "less", "greater"), paired = FALSE,
conf.int = FALSE, conf.level = 0.95))
##
## Wilcoxon rank sum test with continuity correction
##
## data: x and y
## W = 2500, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
## Since W=50 < w0=78, we reject the null hypothesis that the mean values
## for each device are roughly equal. Given that their variances are so
## divergent and the sample is so small, this makes intuitive sense.
## 11-86 e)
acf(x2)
## Here, we can see that we have heteroscedasticity in the sample, which
## weakens the model.
par(mfrow = c(2, 2))
plot(temp.lm)
## In the first plot, we see that a slight pattern (excetp for outlier 10)
## exists in the residuals, so the linear model may not be the best one,
## but could be OK; however, as we can see from the last plot, there are
## three values that are having undue influence on the regression
## relationship; let's try a more robust estimator to see if it can
## account for these outliers:
rlm(y ~ x)
## Call:
## rlm(formula = y ~ x)
## Converged in 4 iterations
##
## Coefficients:
## (Intercept) x
## -0.877449 0.003878
##
## Degrees of freedom: 50 total; 48 residual
## Scale estimate: 1.75
## Since the coefficients are still roughly the same, it seems that we can
## still reject the null hypothesis, as before.
par(mfrow = c(1, 1))
## Problem 4. 12-92
## 12-92 a) Fitting the Model: Coefficients
kr.data = read.csv("bauerstein/documents/school/2013 spring/numerical methods/R/Data_1292.csv")
## Warning: cannot open file 'bauerstein/documents/school/2013
## spring/numerical methods/R/Data_1292.csv': No such file or directory
## Error: cannot open the connection
y = kr.data[, 2]
## Error: object 'kr.data' not found
x1 = kr.data[, 1]
## Error: object 'kr.data' not found
x2 = (kr.data[, 1])^2
## Error: object 'kr.data' not found
xfull = cbind(kr.data[, 1], (kr.data[, 1])^2)
## Error: object 'kr.data' not found
kr.lmfull = lm(y ~ xfull)
## Error: object 'xfull' not found
kr.lmfull
## Error: object 'kr.lmfull' not found
## 12-92 b) Test for significance H0: Beta1=Beta2=Beta3=0 H1: Betaj does
## not equal 0 for any j Reject the null hypothesis if p-value for total
## is less than alpha
summary(kr.lmfull)
## Error: object 'kr.lmfull' not found
anova(kr.lmfull)
## Error: object 'kr.lmfull' not found
## p-value=.0002 << 005=alpha, therefore we reject the null hypothesis and
## conclude that the independent variables are linerarly related;
## 12-92 c) Contribution of Quadratic Term to the Model H0: Beta3
## (quadratic coefficient) = 0 H1: Beta3 does not equal 0.
kr.lm1 = lm(y ~ x1)
print(anova(kr.lm1, kr.lmfull))
## Error: object 'kr.lmfull' not found
## P-value = 0.0001473 < 0.05 = alpha; therefore, we reject the null
## hypothesis and assume that Beta3 is significantly different from 0.
## Problem 5. 12-93
test = read.table("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess5/prob12-93-data.txt")
jet.data = test[, 2:8]
y = as.matrix(jet.data[, 1])
x1 = as.matrix(jet.data[, 2])
x2 = as.matrix(jet.data[, 3])
x3 = as.matrix(jet.data[, 4])
x4 = as.matrix(jet.data[, 5])
x5 = as.matrix(jet.data[, 6])
xfull = cbind(x1, x2, x3, x4, x5)
N = length(y)
x = xfull
## 12-93 a) Select the Best Regression equation: minimum MS_e
library(leaps)
library(MPV)
## Attaching package: 'MPV'
## The following object is masked from 'package:MASS':
##
## cement
## The following object is masked from 'package:datasets':
##
## stackloss
combs = leaps(x, y, nbest = N)
combos = combs$which
ncombos = length(combos[, 1])
xpress = 1:ncombos
xmse = 1:ncombos
for (i in 1:ncombos) {
zz = glm(y ~ x[, combos[i, ]])
xpress[i] = PRESS(zz)
xmse[i] = sum((zz$res)^2)/(N - length(zz$coef))
}
bestpress = order(xpress)[1]
bestmodelp = lsfit(x[, combos[bestpress, ]], y)
summary(bestmodelp)
## Length Class Mode
## coefficients 5 -none- numeric
## residuals 40 -none- numeric
## intercept 1 -none- logical
## qr 6 qr list
combos[bestpress, ]
## 1 2 3 4 5
## TRUE FALSE TRUE TRUE TRUE
## From these results, we can see that the best model includes x1,x3,x4
## and x5. As such, the linear equation would be y-hat = 7163.3743 +
## 1.3578*x1 - 0.3215*x3 + 9.6104*x4 + 1.2859x5
## 12-93 b) Selection of subeset regression model using Step
x = as.data.frame(xfull)
zm = glm(y ~ ., data = x)
zms = step(zm)
## Start: AIC=415.4
## y ~ V1 + V2 + V3 + V4 + V5
##
## Df Deviance AIC
## - V2 1 54686 414
## <none> 53377 415
## - V4 1 62331 420
## - V5 1 62974 420
## - V1 1 68910 424
## - V3 1 69650 424
##
## Step: AIC=414.3
## y ~ V1 + V3 + V4 + V5
##
## Df Deviance AIC
## <none> 54686 414
## - V5 1 64476 419
## - V4 1 65242 419
## - V3 1 69953 422
## - V1 1 71658 423
## Using stepwise estimation, we see that the best model uses x1, x3, x4
## and x5, as before, givin lowest AIC = 414.33
## 12-93 c) Multicollinerity
xp = cbind(x1, x3, x4, x5)
z2 = lsfit(xp, y)
x2 = residuals(z2)
yhat = y - z2$res
SSR = sum((yhat - mean(y))^2)
SST = sum((y - mean(y))^2)
R2p = SSR/SST
VIFp = 1/(1 - R2p)
VIFp
## [1] 181.6
z3 = lsfit(x, y)
x3 = residuals(z3)
yhat = y - z3$res
SSR = sum((yhat - mean(y))^2)
SST = sum((y - mean(y))^2)
R2 = SSR/SST
VIF = 1/(1 - R2)
VIF
## [1] 186.1
## From the above calculations, we can see that the preferred model has a
## lower VIF, indicating less multicollinearity in the model; However,
## both have VIF greater than 1, indicating some level of
## multicollinearity for each.
## 12-93 d) ANOVA and Cook's Distance
zz = lm(y ~ xp)
anova(zz)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## xp 4 9877350 2469338 1580 <2e-16 ***
## Residuals 35 54686 1562
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hatm = hatvalues(zz)
bestmodel = lsfit(xp, y)
SSE = sum((bestmodel$res)^2)
MSR = SSR/ncol(xp)
MSE = SSE/(N - length(bestmodel$coef))
ri = bestmodel$res/sqrt((1 - diag(hatm)) * MSE)
Di = ri * ri * diag(hatm)/((1 - diag(hatm)) * length(bestmodel$coef))
## These give us values for Cook's Distance, none of which aproach 1.
## Alternately, we can plot Cook's distance against leveraged values
par(mfrow = c(2, 2))
plot(zz)
par(mfrow = c(1, 1))
## Problem 6. Helsel and Hershel 12.4
## 6 a) Correlation Coefficient
mblm <- function(formula, dataframe, repeated = TRUE) {
if (missing(dataframe))
dataframe <- environment(formula)
term <- as.character(attr(terms(formula), "variables")[-1])
x = dataframe[[term[2]]]
y = dataframe[[term[1]]]
if (length(term) > 2) {
stop("Only linear models are accepted")
}
xx = sort(x)
yy = y[order(x)]
n = length(xx)
slopes = c()
intercepts = c()
smedians = c()
imedians = c()
if (repeated) {
for (i in 1:n) {
slopes = c()
intercepts = c()
for (j in 1:n) {
if (xx[j] != xx[i]) {
slopes = c(slopes, (yy[j] - yy[i])/(xx[j] - xx[i]))
intercepts = c(intercepts, (xx[j] * yy[i] - xx[i] * yy[j])/(xx[j] -
xx[i]))
}
}
smedians = c(smedians, median(slopes))
imedians = c(imedians, median(intercepts))
}
slope = median(smedians)
intercept = median(imedians)
} else {
for (i in 1:(n - 1)) {
for (j in i:n) {
if (xx[j] != xx[i]) {
slopes = c(slopes, (yy[j] - yy[i])/(xx[j] - xx[i]))
}
}
}
slope = median(slopes)
intercepts = yy - slope * xx
intercept = median(intercepts)
}
res = list()
res$coefficients = c(intercept, slope)
names(res$coefficients) = c("(Intercept)", term[2])
res$residuals = y - slope * x - intercept
names(res$residuals) = as.character(1:length(res$residuals))
res$fitted.values = x * slope + intercept
names(res$fitted.values) = as.character(1:length(res$fitted.values))
if (repeated) {
res$slopes = smedians
res$intercepts = imedians
} else {
res$slopes = slopes
res$intercepts = intercepts
}
res$df.residual = n - 2
res$rank = 2
res$terms = terms(formula)
res$call = match.call()
res$model = data.frame(y, x)
res$assign = c(0, 1)
if (missing(dataframe)) {
res$effects = lm(formula)$effects
res$qr = lm(formula)$qr
} else {
res$effects = lm(formula, dataframe)$effects
res$qr = lm(formula, dataframe)$qr
}
res$effects[2] = sqrt(sum((res$fitted - mean(res$fitted))^2))
res$xlevels = list()
names(res$model) = term
attr(res$model, "terms") = terms(formula)
class(res) = c("mblm", "lm")
res
}
c.data = read.csv("bauerstein/documents/school/2013 spring/numerical methods/R/Data_HH12.4.csv")
## Warning: cannot open file 'bauerstein/documents/school/2013
## spring/numerical methods/R/Data_HH12.4.csv': No such file or directory
## Error: cannot open the connection
carbon = c.data$Carbon.14
## Error: object 'c.data' not found
calcium = c.data$Calcium
## Error: object 'c.data' not found
print(cor(carbon, calcium))
## Error: object 'calcium' not found
## 6 b) Kendall's tau
print(cor.test(carbon, calcium, method = "kendall"))
## Error: object 'carbon' not found
## 6 c) Spearman's Rank
print(cor.test(carbon, calcium, method = "spearman", conf.level = 0.05, two.sided = TRUE))
## Error: object 'carbon' not found
## 6 d)
X = carbon
## Error: object 'carbon' not found
Y = calcium
## Error: object 'calcium' not found
zkt = mblm(Y ~ X)
## Warning: is.na() applied to non-(list or vector) of type 'NULL'
## Error: argument 1 is not a vector
zlin = lsfit(X, Y)
## Error: object 'X' not found
plot(X, Y, xlab = "% Carbon-14", ylab = "% Calcium")
## Error: object 'X' not found
abline(zlin, col = "red")
## Error: object 'zlin' not found
abline(zkt, col = "blue")
## Error: object 'zkt' not found
legend("topright", col = c("red", "blue"), legend = c("Linear Fit", "Kendall-Theile Robust"),
lty = 1, lwd = 1)
## Summary of Data
carbon.lm = lm(calcium ~ carbon, data = c.data)
## Error: object 'c.data' not found
print(summary(carbon.lm))
## Error: object 'carbon.lm' not found
## 6 e)
minf = function(x, y) {
library(sm)
air3 = cbind(x, y)
result.12 = sm.density(air3, eval.points = air3, display = "none")
result.1 = sm.density(x, eval.points = x, display = "none")
result.2 = sm.density(y, eval.points = y, display = "none")
tobs = sum(result.12$estimate * log2(result.12$estimate/(result.1$estimate *
result.2$estimate)))
tobs
}
library(sm)
library(mvtnorm)
library(akima)
source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess3/MI/minf.r")
source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess3/MI/mi_confs.r")
source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess3/MI/minf_normal.r")
source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess3/MI/mi_normal_confs.r")
library(rpanel)
## Loading required package: tcltk
## Package `rpanel', version 1.1-2: type help(rpanel) for summary information
test = read.table("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess5/prob-12-4-data.txt")
test2 = test[, 1:2]
test3 = test[, 3:4]
colnames(test2) = colnames(test3)
test = rbind(test2, test3)
x = test[, 1]
y = test[, 2]
minf(x, y)
## Loading required package: rgl
## [1] 0.001145
minfconf(x, y)
## [1] 0.001145
## 5% 10%
## "MI range" "4.15781667928057e-05" "0.000144909300351482"
## 50% 90% 95%
## "0.000574415321691489" "0.00140573710599974" "0.00181803216578894"
## 5% 10%
## "Cor range " "-0.43134805589595" "-0.36879462617211"
## 50% 90% 95%
## "0.000921907518499569" "0.353608116563331" "0.458908505972696"
## Error: could not find function "myboxplot"
minfnormal(x, y)
## [1] 0.001333
minfnormalconf(x, y)
## [1] 0.001333
## Error: could not find function "myboxplot"
## Problem 7. Helsel & Herschel 15.1
XandY = read.table("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess5/prob-15-1.txt")
y = XandY[, 5]
x = as.matrix(XandY[, 1:4]) ## this is the second best combination, AIC = 59.70439
logistic = glm(y ~ x, family = "binomial")
print(summary(logistic))
##
## Call:
## glm(formula = y ~ x, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0945 -0.3349 -0.0857 -0.0462 2.1324
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -13.2054 3.5581 -3.71 0.00021 ***
## xV1 0.5153 0.1509 3.41 0.00064 ***
## xV2 0.4291 0.2749 1.56 0.11851
## xV3 0.0303 0.3246 0.09 0.92551
## xV4 1.0895 0.2986 3.65 0.00026 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 91.474 on 123 degrees of freedom
## Residual deviance: 49.704 on 119 degrees of freedom
## AIC: 59.7
##
## Number of Fisher Scoring iterations: 7
print(logistic$aic)
## [1] 59.7
x = as.matrix(XandY[, 1:3])
logistic = glm(y ~ x, family = "binomial")
print(summary(logistic))
##
## Call:
## glm(formula = y ~ x, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.198 -0.513 -0.303 -0.209 2.441
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.0840 1.5826 -3.21 0.0013 **
## xV1 0.3304 0.1020 3.24 0.0012 **
## xV2 0.3759 0.1931 1.95 0.0516 .
## xV3 -0.0326 0.2680 -0.12 0.9032
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 91.474 on 123 degrees of freedom
## Residual deviance: 74.755 on 120 degrees of freedom
## AIC: 82.75
##
## Number of Fisher Scoring iterations: 6
print(logistic$aic)
## [1] 82.75
x = as.matrix(XandY[, 1:2])
logistic = glm(y ~ x, family = "binomial")
print(summary(logistic))
##
## Call:
## glm(formula = y ~ x, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.180 -0.512 -0.304 -0.212 2.452
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.217 1.150 -4.54 5.7e-06 ***
## xV1 0.329 0.101 3.25 0.0011 **
## xV2 0.377 0.193 1.95 0.0510 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 91.474 on 123 degrees of freedom
## Residual deviance: 74.769 on 121 degrees of freedom
## AIC: 80.77
##
## Number of Fisher Scoring iterations: 6
print(logistic$aic)
## [1] 80.77
x = as.matrix(XandY[, 2:3])
logistic = glm(y ~ x, family = "binomial")
print(summary(logistic))
##
## Call:
## glm(formula = y ~ x, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.686 -0.570 -0.447 -0.332 2.210
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.4265 1.3936 -2.46 0.014 *
## xV2 0.3090 0.1811 1.71 0.088 .
## xV3 0.0491 0.2579 0.19 0.849
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 91.474 on 123 degrees of freedom
## Residual deviance: 88.231 on 121 degrees of freedom
## AIC: 94.23
##
## Number of Fisher Scoring iterations: 5
print(logistic$aic)
## [1] 94.23
x = as.matrix(XandY[, 2:4])
logistic = glm(y ~ x, family = "binomial")
print(summary(logistic))
##
## Call:
## glm(formula = y ~ x, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.465 -0.412 -0.276 -0.173 2.174
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.658 2.124 -3.61 0.00031 ***
## xV2 0.247 0.223 1.11 0.26814
## xV3 0.109 0.293 0.37 0.70848
## xV4 0.723 0.187 3.87 0.00011 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 91.474 on 123 degrees of freedom
## Residual deviance: 69.162 on 120 degrees of freedom
## AIC: 77.16
##
## Number of Fisher Scoring iterations: 6
print(logistic$aic)
## [1] 77.16
x = as.matrix(XandY[, 3:4])
logistic = glm(y ~ x, family = "binomial")
print(summary(logistic))
##
## Call:
## glm(formula = y ~ x, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.373 -0.387 -0.268 -0.185 2.293
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.6831 1.8048 -3.70 0.00021 ***
## xV3 0.0747 0.2859 0.26 0.79377
## xV4 0.7509 0.1862 4.03 5.5e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 91.474 on 123 degrees of freedom
## Residual deviance: 70.471 on 121 degrees of freedom
## AIC: 76.47
##
## Number of Fisher Scoring iterations: 6
print(logistic$aic)
## [1] 76.47
x = as.matrix(XandY[, c(1, 2, 4)]) ## this is the best combination AIC = 57.71315
logistic = glm(y ~ x, family = "binomial")
print(summary(logistic))
##
## Call:
## glm(formula = y ~ x, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0808 -0.3364 -0.0846 -0.0478 2.1443
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -13.046 3.098 -4.21 2.5e-05 ***
## xV1 0.516 0.151 3.42 0.00062 ***
## xV2 0.426 0.271 1.57 0.11654
## xV4 1.086 0.295 3.68 0.00023 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 91.474 on 123 degrees of freedom
## Residual deviance: 49.713 on 120 degrees of freedom
## AIC: 57.71
##
## Number of Fisher Scoring iterations: 7
print(logistic$aic)
## [1] 57.71
x = as.matrix(XandY[, c(1, 3, 4)])
logistic = glm(y ~ x, family = "binomial")
print(summary(logistic))
##
## Call:
## glm(formula = y ~ x, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0654 -0.2861 -0.1124 -0.0656 2.0601
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.8297 2.8156 -3.85 0.00012 ***
## xV1 0.4639 0.1360 3.41 0.00065 ***
## xV3 -0.0133 0.3117 -0.04 0.96604
## xV4 1.0728 0.2840 3.78 0.00016 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 91.474 on 123 degrees of freedom
## Residual deviance: 52.543 on 120 degrees of freedom
## AIC: 60.54
##
## Number of Fisher Scoring iterations: 7
print(logistic$aic)
## [1] 60.54
x = as.matrix(XandY[, 1])
logistic = glm(y ~ x, family = "binomial")
print(summary(logistic))
##
## Call:
## glm(formula = y ~ x, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.853 -0.565 -0.290 -0.231 2.699
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.6151 0.6998 -5.17 2.4e-07 ***
## x 0.3101 0.0988 3.14 0.0017 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 91.474 on 123 degrees of freedom
## Residual deviance: 79.092 on 122 degrees of freedom
## AIC: 83.09
##
## Number of Fisher Scoring iterations: 6
print(logistic$aic)
## [1] 83.09
x = as.matrix(XandY[, 2])
logistic = glm(y ~ x, family = "binomial")
print(summary(logistic))
##
## Call:
## glm(formula = y ~ x, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.673 -0.585 -0.439 -0.326 2.186
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.214 0.825 -3.90 9.8e-05 ***
## x 0.307 0.180 1.71 0.088 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 91.474 on 123 degrees of freedom
## Residual deviance: 88.268 on 122 degrees of freedom
## AIC: 92.27
##
## Number of Fisher Scoring iterations: 5
print(logistic$aic)
## [1] 92.27
x = as.matrix(XandY[, 3])
logistic = glm(y ~ x, family = "binomial")
print(summary(logistic))
##
## Call:
## glm(formula = y ~ x, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.517 -0.517 -0.517 -0.495 2.115
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.1694 1.1002 -1.97 0.049 *
## x 0.0446 0.2540 0.18 0.861
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 91.474 on 123 degrees of freedom
## Residual deviance: 91.443 on 122 degrees of freedom
## AIC: 95.44
##
## Number of Fisher Scoring iterations: 4
print(logistic$aic)
## [1] 95.44
x = as.matrix(XandY[, 4])
logistic = glm(y ~ x, family = "binomial")
print(summary(logistic))
##
## Call:
## glm(formula = y ~ x, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.343 -0.377 -0.262 -0.181 2.315
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.347 1.239 -5.12 3.0e-07 ***
## x 0.747 0.185 4.04 5.3e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 91.474 on 123 degrees of freedom
## Residual deviance: 70.540 on 122 degrees of freedom
## AIC: 74.54
##
## Number of Fisher Scoring iterations: 6
print(logistic$aic)
## [1] 74.54