library(readr)
library(bestglm)
library(Stat2Data)
SpeedDating = read_csv("http://mclean.web.unc.edu/files/2020/04/SpeedDating.csv")
SpeedDatingM = read_csv("http://mclean.web.unc.edu/files/2020/04/SpeedDatingM.csv")
SpeedDatingF = read_csv("http://mclean.web.unc.edu/files/2020/04/SpeedDatingF.csv")
#1
ageDiff = SpeedDating$AgeM - SpeedDating$AgeF
mod1 = glm(DecisionM ~ ageDiff, family = binomial, data = SpeedDating)
summary(mod1)
##
## Call:
## glm(formula = DecisionM ~ ageDiff, family = binomial, data = SpeedDating)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5849 -1.2048 0.8798 1.0968 1.3354
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.14998 0.15291 0.981 0.3267
## ageDiff -0.08565 0.04003 -2.140 0.0324 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 246.53 on 178 degrees of freedom
## Residual deviance: 241.72 on 177 degrees of freedom
## AIC: 245.72
##
## Number of Fisher Scoring iterations: 4
#2
B0 = summary(mod1)$coef[1]
B1 = summary(mod1)$coef[2]
plot(jitter(DecisionM, amount = 0.1) ~ ageDiff, data = SpeedDating)
curve(exp(B0+B1*x)/(1+exp(B0+B1*x)), add = TRUE, col = "red")
According to the plot, at an age difference of one to three years do males have approximately a 50% chance of wanting another date with the female.
#3
exp(confint.default(mod1))
## 2.5 % 97.5 %
## (Intercept) 0.8609474 1.5678025
## ageDiff 0.8486446 0.9928374
For every five year increase in the age gap, the model predicts that the odds of the male wanting another date with the female will increase about 5.1%.
#4
mod2 = glm(DecisionM ~ ageDiff + RaceM + ageDiff*RaceM, family = binomial, data = SpeedDating)
summary(mod2)
##
## Call:
## glm(formula = DecisionM ~ ageDiff + RaceM + ageDiff * RaceM,
## family = binomial, data = SpeedDating)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5612 -1.1935 0.9062 1.0707 1.3959
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.727188 0.403018 1.804 0.0712 .
## ageDiff 0.008147 0.124442 0.065 0.9478
## RaceM -0.215247 0.138798 -1.551 0.1210
## ageDiff:RaceM -0.031859 0.039541 -0.806 0.4204
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 246.53 on 178 degrees of freedom
## Residual deviance: 239.02 on 175 degrees of freedom
## AIC: 247.02
##
## Number of Fisher Scoring iterations: 4
The null hypothesis is that the race of the male does not play a role in the relationship between if males would like another date based on how much older the male is than the female. The alternative hypothesis is that the race of the male does play a role in the relationship between if males would like another date based on how much older the male is than the female. The p-values were not significant so the null hypothesis cannot be rejected and we cannot conclude that the model is effective in predicting if the male would like another date.
#5
Full2 = lm(DecisionM ~ ageDiff + AttractiveM + SincereM + IntelligentM + FunM + AmbitiousM + SharedInterestsM, family = binomial, data = SpeedDating)
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'family' will be disregarded
MSE=(summary(Full2)$sigma)^2
none = lm(DecisionM~1, data = SpeedDating)
step(none, scope = list(upper = Full2), scale = MSE)
## Start: AIC=69.73
## DecisionM ~ 1
##
## Df Sum of Sq RSS Cp
## + AttractiveM 1 11.1176 33.229 9.8762
## + SharedInterestsM 1 2.7817 41.565 56.2550
## + FunM 1 1.5247 42.822 63.2483
## + AmbitiousM 1 1.2613 43.085 64.7139
## + ageDiff 1 1.1664 43.180 65.2419
## + IntelligentM 1 0.4745 43.872 69.0914
## <none> 44.346 69.7314
## + SincereM 1 0.2246 44.122 70.4817
##
## Step: AIC=9.88
## DecisionM ~ AttractiveM
##
## Df Sum of Sq RSS Cp
## + IntelligentM 1 1.2567 31.972 4.8844
## + ageDiff 1 1.0533 32.176 6.0161
## + AmbitiousM 1 0.4687 32.760 9.2687
## + SincereM 1 0.4209 32.808 9.5343
## + FunM 1 0.4172 32.812 9.5548
## <none> 33.229 9.8762
## + SharedInterestsM 1 0.0671 33.162 11.5029
## - AttractiveM 1 11.1176 44.346 69.7314
##
## Step: AIC=4.88
## DecisionM ~ AttractiveM + IntelligentM
##
## Df Sum of Sq RSS Cp
## + ageDiff 1 0.9978 30.974 1.3328
## <none> 31.972 4.8844
## + SharedInterestsM 1 0.2464 31.726 5.5133
## + FunM 1 0.0373 31.935 6.6770
## + AmbitiousM 1 0.0237 31.948 6.7526
## + SincereM 1 0.0218 31.950 6.7633
## - IntelligentM 1 1.2567 33.229 9.8762
## - AttractiveM 1 11.8997 43.872 69.0914
##
## Step: AIC=1.33
## DecisionM ~ AttractiveM + IntelligentM + ageDiff
##
## Df Sum of Sq RSS Cp
## <none> 30.974 1.3328
## + SharedInterestsM 1 0.1594 30.815 2.4460
## + SincereM 1 0.0129 30.961 3.2613
## + FunM 1 0.0111 30.963 3.2708
## + AmbitiousM 1 0.0063 30.968 3.2975
## - ageDiff 1 0.9978 31.972 4.8844
## - IntelligentM 1 1.2012 32.176 6.0161
## - AttractiveM 1 11.7110 42.685 64.4896
##
## Call:
## lm(formula = DecisionM ~ AttractiveM + IntelligentM + ageDiff,
## data = SpeedDating)
##
## Coefficients:
## (Intercept) AttractiveM IntelligentM ageDiff
## -0.09451 0.17339 -0.07013 -0.01885
summary(Full2)
##
## Call:
## lm(formula = DecisionM ~ ageDiff + AttractiveM + SincereM + IntelligentM +
## FunM + AmbitiousM + SharedInterestsM, data = SpeedDating,
## family = binomial)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.96598 -0.33243 0.04258 0.36070 0.90041
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.068251 0.200215 -0.341 0.7336
## ageDiff -0.017374 0.008123 -2.139 0.0339 *
## AttractiveM 0.171541 0.024159 7.101 3.19e-11 ***
## SincereM -0.009497 0.025462 -0.373 0.7096
## IntelligentM -0.061412 0.035385 -1.736 0.0844 .
## FunM -0.008816 0.024670 -0.357 0.7213
## AmbitiousM -0.008180 0.025659 -0.319 0.7503
## SharedInterestsM 0.019786 0.018120 1.092 0.2764
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.424 on 171 degrees of freedom
## Multiple R-squared: 0.3069, Adjusted R-squared: 0.2786
## F-statistic: 10.82 on 7 and 171 DF, p-value: 2.9e-11
newmod = lm(DecisionM ~ AttractiveM + IntelligentM + ageDiff, family = binomial, data = SpeedDating)
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'family' will be disregarded
SpeedDatingData = data.frame(ageDiff = 0, AttractiveM = 7, SincereM = 7, IntelligentM = 7, FunM = 7, AmbitiousM = 7, SharedInterestsM = 7)
predict.lm(newmod, SpeedDatingData, interval = "confidence", level = 0.95)
## fit lwr upr
## 1 0.628276 0.5542543 0.7022976
predict.lm(newmod, SpeedDatingData, interval = "prediction", level = 0.95)
## fit lwr upr
## 1 0.628276 -0.2053334 1.461885
The predicted probability that the male would like another date, with these specific form answers, is 0.629. We are 95% confident that the probability of a male liking another date falls between 0.0554 and 0.702. We are 95% confident that the price of an individual male wanting another date falls between -0.205 and 1.46.
#6
Full3 = lm(Like ~., family = binomial, data = SpeedDatingF)
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'family' will be disregarded
MSE=(summary(Full2)$sigma)^2
none = lm(Like~1, data = SpeedDatingF)
step(none, scope = list(upper = Full3), scale = MSE)
## Start: AIC=2918.67
## Like ~ 1
##
## Df Sum of Sq RSS Cp
## + Fun 1 232.246 324.16 1628.5
## + SharedInterests 1 211.363 345.04 1744.7
## + Attractive 1 197.764 358.64 1820.4
## + Sincere 1 118.500 437.90 2261.4
## + Intelligent 1 106.002 450.40 2330.9
## + PartnerYes 1 92.303 464.10 2407.1
## + Ambitious 1 63.539 492.86 2567.2
## <none> 556.40 2918.7
##
## Step: AIC=1628.52
## Like ~ Fun
##
## Df Sum of Sq RSS Cp
## + SharedInterests 1 50.209 273.95 1351.2
## + Attractive 1 49.476 274.68 1355.2
## + Intelligent 1 28.766 295.39 1470.5
## + Sincere 1 22.662 301.49 1504.4
## + PartnerYes 1 16.146 308.01 1540.7
## + Ambitious 1 5.024 319.13 1602.6
## <none> 324.16 1628.5
## - Fun 1 232.246 556.40 2918.7
##
## Step: AIC=1351.17
## Like ~ Fun + SharedInterests
##
## Df Sum of Sq RSS Cp
## + Attractive 1 35.229 238.72 1157.2
## + Intelligent 1 23.776 250.17 1220.9
## + Sincere 1 19.738 254.21 1243.4
## + Ambitious 1 6.766 267.18 1315.5
## + PartnerYes 1 1.484 272.46 1344.9
## <none> 273.95 1351.2
## - SharedInterests 1 50.209 324.16 1628.5
## - Fun 1 71.091 345.04 1744.7
##
## Step: AIC=1157.17
## Like ~ Fun + SharedInterests + Attractive
##
## Df Sum of Sq RSS Cp
## + Intelligent 1 19.327 219.39 1051.6
## + Sincere 1 16.438 222.28 1067.7
## + Ambitious 1 6.021 232.70 1125.7
## + PartnerYes 1 1.594 237.12 1150.3
## <none> 238.72 1157.2
## - Fun 1 29.549 268.27 1319.6
## - Attractive 1 35.229 273.95 1351.2
## - SharedInterests 1 35.961 274.68 1355.2
##
## Step: AIC=1051.64
## Like ~ Fun + SharedInterests + Attractive + Intelligent
##
## Df Sum of Sq RSS Cp
## + Sincere 1 3.799 215.59 1032.5
## + PartnerYes 1 3.519 215.87 1034.1
## <none> 219.39 1051.6
## + Ambitious 1 0.354 219.04 1051.7
## - Intelligent 1 19.327 238.72 1157.2
## - Fun 1 19.549 238.94 1158.4
## - Attractive 1 30.780 250.17 1220.9
## - SharedInterests 1 32.968 252.36 1233.1
##
## Step: AIC=1032.5
## Like ~ Fun + SharedInterests + Attractive + Intelligent + Sincere
##
## Df Sum of Sq RSS Cp
## + PartnerYes 1 2.034 213.56 1023.2
## <none> 215.59 1032.5
## + Ambitious 1 0.218 215.38 1033.3
## - Sincere 1 3.799 219.39 1051.6
## - Intelligent 1 6.688 222.28 1067.7
## - Fun 1 14.955 230.55 1113.7
## - Attractive 1 30.225 245.82 1198.7
## - SharedInterests 1 32.891 248.48 1213.5
##
## Step: AIC=1023.18
## Like ~ Fun + SharedInterests + Attractive + Intelligent + Sincere +
## PartnerYes
##
## Df Sum of Sq RSS Cp
## <none> 213.56 1023.2
## + Ambitious 1 0.2249 213.33 1023.9
## - PartnerYes 1 2.0343 215.59 1032.5
## - Sincere 1 2.3140 215.87 1034.1
## - Intelligent 1 8.2037 221.76 1066.8
## - Fun 1 13.9780 227.54 1099.0
## - SharedInterests 1 20.9934 234.55 1138.0
## - Attractive 1 30.2642 243.82 1189.6
##
## Call:
## lm(formula = Like ~ Fun + SharedInterests + Attractive + Intelligent +
## Sincere + PartnerYes, data = SpeedDatingF)
##
## Coefficients:
## (Intercept) Fun SharedInterests Attractive
## -0.19326 0.19510 0.20776 0.25215
## Intelligent Sincere PartnerYes
## 0.19787 0.09035 0.05806
summary(Full3)
##
## Call:
## lm(formula = Like ~ ., data = SpeedDatingF, family = binomial)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2767 -0.5590 0.0454 0.5757 4.2051
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.23443 0.52643 -0.445 0.65665
## PartnerYes 0.05816 0.04547 1.279 0.20253
## Attractive 0.25246 0.05120 4.931 1.93e-06 ***
## Sincere 0.08850 0.06648 1.331 0.18490
## Intelligent 0.18625 0.08188 2.275 0.02416 *
## Fun 0.18909 0.05998 3.153 0.00191 **
## Ambitious 0.02401 0.05656 0.425 0.67168
## SharedInterests 0.20941 0.05080 4.123 5.83e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.117 on 171 degrees of freedom
## Multiple R-squared: 0.6166, Adjusted R-squared: 0.6009
## F-statistic: 39.28 on 7 and 171 DF, p-value: < 2.2e-16
mod2 = glm(factor(Like) ~ Fun + SharedInterests + Attractive + Intelligent + Sincere + PartnerYes, family = binomial, data = SpeedDatingF)
summary(mod2)
##
## Call:
## glm(formula = factor(Like) ~ Fun + SharedInterests + Attractive +
## Intelligent + Sincere + PartnerYes, family = binomial, data = SpeedDatingF)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.66519 0.00285 0.01149 0.04162 1.53106
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.8765 4.5289 -2.402 0.0163 *
## Fun -0.2870 0.6037 -0.475 0.6345
## SharedInterests 0.8958 0.7473 1.199 0.2306
## Attractive 0.1770 0.4346 0.407 0.6838
## Intelligent 0.5108 0.4461 1.145 0.2523
## Sincere 1.0195 0.5648 1.805 0.0711 .
## PartnerYes 0.7078 0.4454 1.589 0.1120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 45.638 on 178 degrees of freedom
## Residual deviance: 15.726 on 172 degrees of freedom
## AIC: 29.726
##
## Number of Fisher Scoring iterations: 10
plot(mod2)
abline(mod2)
## Warning in abline(mod2): only using the first two of 7 regression coefficients
The model appears to be sort of linear (qqplot), but not exactly the best model (resid vs fitted).
#7
summary(mod2)
##
## Call:
## glm(formula = factor(Like) ~ Fun + SharedInterests + Attractive +
## Intelligent + Sincere + PartnerYes, family = binomial, data = SpeedDatingF)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.66519 0.00285 0.01149 0.04162 1.53106
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.8765 4.5289 -2.402 0.0163 *
## Fun -0.2870 0.6037 -0.475 0.6345
## SharedInterests 0.8958 0.7473 1.199 0.2306
## Attractive 0.1770 0.4346 0.407 0.6838
## Intelligent 0.5108 0.4461 1.145 0.2523
## Sincere 1.0195 0.5648 1.805 0.0711 .
## PartnerYes 0.7078 0.4454 1.589 0.1120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 45.638 on 178 degrees of freedom
## Residual deviance: 15.726 on 172 degrees of freedom
## AIC: 29.726
##
## Number of Fisher Scoring iterations: 10
G = 45.638 - 15.726
G
## [1] 29.912
1 - pchisq(G, 6)
## [1] 4.08519e-05
The value of the p-value allows us to reject the null hypothesis that the model from part 6 is useful in predicting the ratings for how much males like their female date.