Bike <- read.csv("/Users/Lorraine/Desktop/Handlebars.csv", header = TRUE)
hand1 <- c(120, 17)
hand2 <- c(578, 154)
BikeTable <- cbind(hand1, hand2)
chisq.test(BikeTable)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: BikeTable
## X-squared = 4.9049, df = 1, p-value = 0.02678
There was a significant association between nationalities and whether people ride bikes with one or two hands x^2(1) = 4.9049, P< 0.05.
Exam <- read.csv("/Users/Lorraine/Desktop/LogRegProj.csv", header = TRUE)
Exam.Model1 <- glm(honcomp ~ science + read + ses, data = Exam, family = binomial())
summary (Exam.Model1)
##
## Call:
## glm(formula = honcomp ~ science + read + ses, family = binomial(),
## data = Exam)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7870 -0.6745 -0.3734 -0.1605 2.6044
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.58790 1.54277 -6.215 5.14e-10 ***
## science 0.06569 0.02710 2.424 0.015343 *
## read 0.08058 0.02413 3.339 0.000841 ***
## ses 0.20991 0.27507 0.763 0.445404
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 222.71 on 199 degrees of freedom
## Residual deviance: 173.62 on 196 degrees of freedom
## AIC: 181.62
##
## Number of Fisher Scoring iterations: 5
Exam.Model0 <- glm(honcomp ~ 1, data = Exam, family = binomial())
summary(Exam.Model0)
##
## Call:
## glm(formula = honcomp ~ 1, family = binomial(), data = Exam)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.7497 -0.7497 -0.7497 -0.7497 1.6772
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.1255 0.1644 -6.845 7.62e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 222.71 on 199 degrees of freedom
## Residual deviance: 222.71 on 199 degrees of freedom
## AIC: 224.71
##
## Number of Fisher Scoring iterations: 4
modelChisq <- Exam.Model1$null.deviance - Exam.Model1$deviance
modelChisq
## [1] 49.09341
#degrees of freedom
chidf <- Exam.Model1$df.null - Exam.Model1$df.residual
chidf
## [1] 3
#the probability
chisq.sig <- 1 - pchisq(modelChisq, chidf)
chisq.sig
## [1] 1.246075e-10
predicted <- predict(Exam.Model1, Exam, type="response")
predicted <- ifelse(predicted > 0.5,1,0)
misClasificError <- mean(predicted != Exam$honcomp)
print(paste('Accuracy',1-misClasificError))
## [1] "Accuracy 0.78"
The overall model is significant x^2(3) = 49.09, P< 0.05. 78% of the cases were correctly classified as either pass or fail by the model. Both science and read are statistically significant predictors.
log.model <- glm(honcomp ~ female + race + schtyp + prog + math +socst + science + read + ses, data = Exam, family = binomial())
library(MASS)
STEPWISE<- stepAIC(log.model, direction = "both", trace=FALSE)
summary(STEPWISE)
##
## Call:
## glm(formula = honcomp ~ female + math + science + read, family = binomial(),
## data = Exam)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.90016 -0.59534 -0.29969 -0.09823 2.56334
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -13.12273 1.99662 -6.572 4.95e-11 ***
## female 1.17317 0.44517 2.635 0.00841 **
## math 0.10260 0.03270 3.137 0.00171 **
## science 0.05544 0.03195 1.735 0.08271 .
## read 0.04682 0.02711 1.727 0.08422 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 222.71 on 199 degrees of freedom
## Residual deviance: 153.07 on 195 degrees of freedom
## AIC: 163.07
##
## Number of Fisher Scoring iterations: 5
summary(log.model)
##
## Call:
## glm(formula = honcomp ~ female + race + schtyp + prog + math +
## socst + science + read + ses, family = binomial(), data = Exam)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.00799 -0.57389 -0.29279 -0.08559 2.62867
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -13.17580 2.33643 -5.639 1.71e-08 ***
## female 1.20246 0.45649 2.634 0.00844 **
## race -0.15410 0.23632 -0.652 0.51436
## schtyp -0.26350 0.54907 -0.480 0.63130
## prog 0.04006 0.35428 0.113 0.90997
## math 0.10028 0.03327 3.014 0.00258 **
## socst 0.01599 0.02776 0.576 0.56459
## science 0.05754 0.03369 1.708 0.08766 .
## read 0.03633 0.03069 1.184 0.23650
## ses 0.24412 0.30948 0.789 0.43023
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 222.71 on 199 degrees of freedom
## Residual deviance: 151.31 on 190 degrees of freedom
## AIC: 171.31
##
## Number of Fisher Scoring iterations: 5
model <- glm(honcomp ~ female + race + schtyp + prog + math +socst, data = Exam, family = binomial())
summary(model)
##
## Call:
## glm(formula = honcomp ~ female + race + schtyp + prog + math +
## socst, family = binomial(), data = Exam)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8016 -0.6428 -0.3244 -0.1097 2.7790
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -11.648447 2.077163 -5.608 2.05e-08 ***
## female 0.894922 0.419937 2.131 0.0331 *
## race -0.013027 0.221980 -0.059 0.9532
## schtyp -0.224723 0.531017 -0.423 0.6722
## prog 0.001672 0.343604 0.005 0.9961
## math 0.142212 0.029214 4.868 1.13e-06 ***
## socst 0.043693 0.025178 1.735 0.0827 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 222.71 on 199 degrees of freedom
## Residual deviance: 157.84 on 193 degrees of freedom
## AIC: 171.84
##
## Number of Fisher Scoring iterations: 5
modelChisq1 <- model$null.deviance - model$deviance
modelChisq1
## [1] 64.86826
#degrees of freedom
chidf1 <- model$df.null - model$df.residual
chidf1
## [1] 6
#the probability
chisq.sig1 <- 1 - pchisq(modelChisq1, chidf1)
chisq.sig1
## [1] 4.589551e-12
anova(Exam.Model1, model)
## Analysis of Deviance Table
##
## Model 1: honcomp ~ science + read + ses
## Model 2: honcomp ~ female + race + schtyp + prog + math + socst
## Resid. Df Resid. Dev Df Deviance
## 1 196 173.62
## 2 193 157.84 3 15.775
The second model is not sognificant x^2(4) = 6.476, P>0.05. The second model is worse than the model used to answer question 1 x^2(3) = 49.09, P< 0.05.
TV <- read.csv("/Users/Lorraine/Desktop/eastenders.csv", header = TRUE)
EastEnders <- TV$EASTEND
Friends <- TV$FRIENDS
NG <- TV$WHALES
datafre <- data.frame(EastEnders, Friends, NG)
friedman.test(as.matrix(datafre))
##
## Friedman rank sum test
##
## data: as.matrix(datafre)
## Friedman chi-squared = 7.5859, df = 2, p-value = 0.02253
library(pgirmess)
friedmanmc(as.matrix(datafre))
## Multiple comparisons between groups after Friedman test
## p.value: 0.05
## Comparisons
## obs.dif critical.dif difference
## 1-2 82.0 43.09164 TRUE
## 1-3 195.5 43.09164 TRUE
## 2-3 113.5 43.09164 TRUE
Friedman’s ANOVA suggested that TV programs do have significant effects on couple’s domestic life. X^2(2)= 7.5859, p < 0.05. Specificly, the post hoc tests revealed that each TV program groups differs significicantly.