library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library (foreign)
data_hw3 <- read.spss ("HBSC2014OAed1.1_F1.sav",to.data.frame=T, use.value.labels=T)
#Part 1 (3 points)
##1. Create the dependent variable from var “fight12m”. It must be a variable with two levels: 1 - success (>= 2 times) and 0 - failure (None, 1 time).
data_hw3$fighting <- ifelse(data_hw3$fight12m == "None", 0,
ifelse(data_hw3$fight12m == "1 time", 0,
ifelse(data_hw3$fight12m == "2 times", 1,
ifelse(data_hw3$fight12m == "3 times", 1,
ifelse(data_hw3$fight12m == "4 times or more", 1, NA)))))
table(data_hw3$fight12m)
##
## None 1 time 2 times 3 times 4 times or more
## 138771 30061 14182 7146 13031
table(data_hw3$fighting)
##
## 0 1
## 168832 34359
#1
data_hw3$friendhelp_1 <- ifelse(data_hw3$friendhelp == "Very strongly disagree", 1,
ifelse(data_hw3$friendhelp == "Very strongly agree", 7, data_hw3$friendhelp))
class(data_hw3$friendhelp_1)
## [1] "numeric"
table(data_hw3$friendhelp, data_hw3$friendhelp_1)
##
## 1 2 3 4 5 6 7
## Very strongly disagree 12487 0 0 0 0 0 0
## 2 0 7754 0 0 0 0 0
## 3 0 0 9577 0 0 0 0
## 4 0 0 0 19666 0 0 0
## 5 0 0 0 0 31006 0 0
## 6 0 0 0 0 0 40134 0
## Very strongly agree 0 0 0 0 0 0 67366
#2
data_hw3$friendcounton_1 <- ifelse(data_hw3$friendcounton == "Very strongly disagree", 1,
ifelse(data_hw3$friendcounton == "Very strongly agree", 7, data_hw3$friendcounton))
class(data_hw3$friendcounton_1)
## [1] "numeric"
table(data_hw3$friendcounton, data_hw3$friendcounton_1)
##
## 1 2 3 4 5 6 7
## Very strongly disagree 12894 0 0 0 0 0 0
## 2 0 8029 0 0 0 0 0
## 3 0 0 9476 0 0 0 0
## 4 0 0 0 16709 0 0 0
## 5 0 0 0 0 26683 0 0
## 6 0 0 0 0 0 37729 0
## Very strongly agree 0 0 0 0 0 0 73056
#3
data_hw3$friendshare_1 <- ifelse(data_hw3$friendshare == "Very strongly disagree", 1,
ifelse(data_hw3$friendshare == "Very strongly agree", 7, data_hw3$friendshare))
class(data_hw3$friendshare_1)
## [1] "numeric"
table(data_hw3$friendshare, data_hw3$friendshare_1)
##
## 1 2 3 4 5 6 7
## Very strongly disagree 13307 0 0 0 0 0 0
## 2 0 6619 0 0 0 0 0
## 3 0 0 7134 0 0 0 0
## 4 0 0 0 12651 0 0 0
## 5 0 0 0 0 21805 0 0
## 6 0 0 0 0 0 34367 0
## Very strongly agree 0 0 0 0 0 0 91551
#4
data_hw3$friendtalk_1 <- ifelse(data_hw3$friendtalk == "Very strongly disagree", 1,
ifelse(data_hw3$friendtalk == "Very strongly agree", 7, data_hw3$friendtalk))
class(data_hw3$friendtalk_1)
## [1] "numeric"
table(data_hw3$friendtalk, data_hw3$friendtalk_1)
##
## 1 2 3 4 5 6 7
## Very strongly disagree 17092 0 0 0 0 0 0
## 2 0 8694 0 0 0 0 0
## 3 0 0 9625 0 0 0 0
## 4 0 0 0 16573 0 0 0
## 5 0 0 0 0 23810 0 0
## 6 0 0 0 0 0 34521 0
## Very strongly agree 0 0 0 0 0 0 77027
data_hw3$frndsup <- rowMeans(data_hw3[,c('friendhelp_1', 'friendcounton_1', 'friendshare_1', 'friendtalk_1')], na.rm=TRUE)
summary(data_hw3$frndsup)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 4.500 6.000 5.411 7.000 7.000 25083
data_hw3$lifesat_1 <- ifelse(data_hw3$lifesat == "0, worst possible life", 1,
ifelse(data_hw3$lifesat == "10, best possible life", 11, data_hw3$lifesat))
table(data_hw3$lifesat, data_hw3$lifesat_1)
##
## 1 2 3 4 5 6 7 8 9
## 0, worst possible life 1099 0 0 0 0 0 0 0 0
## 1 0 1132 0 0 0 0 0 0 0
## 2 0 0 1790 0 0 0 0 0 0
## 3 0 0 0 3519 0 0 0 0 0
## 4 0 0 0 0 6301 0 0 0 0
## 5 0 0 0 0 0 15604 0 0 0
## 6 0 0 0 0 0 0 17697 0 0
## 7 0 0 0 0 0 0 0 35015 0
## 8 0 0 0 0 0 0 0 0 48835
## 9 0 0 0 0 0 0 0 0 0
## 10, best possible life 0 0 0 0 0 0 0 0 0
##
## 10 11
## 0, worst possible life 0 0
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## 7 0 0
## 8 0 0
## 9 38724 0
## 10, best possible life 0 36237
##2. Use the predictors: sex, age, m96, lifesat, friend support (average of “friendhelp”, “friendcounton”, “friendshare”, “friendtalk).
m_hw3 <- select(data_hw3, c(fighting, frndsup, AGE, sex, m96,lifesat_1))
sapply(m_hw3, function(x) sum(is.na(x)))
## fighting frndsup AGE sex m96 lifesat_1
## 10889 25083 1854 0 27764 8127
hw3 <- na.omit(m_hw3)
##3. Do pairwise comparisons for the relationship between the dependent variable and each independent variable. Don’t foget about assumptions.
chisq.test(hw3$sex, hw3$fighting,correct = TRUE)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: hw3$sex and hw3$fighting
## X-squared = 9405.3, df = 1, p-value < 2.2e-16
chisq.test(hw3$sex, hw3$fighting,correct = TRUE)$stdres
## hw3$fighting
## hw3$sex 0 1
## Boy -96.98758 96.98758
## Girl 96.98758 -96.98758
Interpretation: p-value is less than 0.05 (< 2.2e-16), thus we reject the null hypothesis, so there is a statistically significant relationship between two categories: fighting and sex. As for standardized residuals suggest that there is a gender difference, girls are more likely to not engage into fighting (-96.98758), while boys are more likely to engage into fighting (96.98758).
As we are interested in “Why do girls fight”. I propose to make a subset with only girls.
hw3_g <- subset(hw3, sex == 'Girl')
var.test(hw3_g$AGE ~ hw3_g$fighting)
##
## F test to compare two variances
##
## data: hw3_g$AGE by hw3_g$fighting
## F = 1.0714, num df = 76777, denom df = 6626, p-value = 0.0001637
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 1.033806 1.109859
## sample estimates:
## ratio of variances
## 1.071445
shapiro.test(hw3_g$AGE[hw3_g$fighting == 1][0:5000])
##
## Shapiro-Wilk normality test
##
## data: hw3_g$AGE[hw3_g$fighting == 1][0:5000]
## W = 0.93484, p-value < 2.2e-16
shapiro.test(hw3_g$AGE[hw3_g$fighting == 0][0:5000])
##
## Shapiro-Wilk normality test
##
## data: hw3_g$AGE[hw3_g$fighting == 0][0:5000]
## W = 0.90611, p-value < 2.2e-16
t.test(hw3_g$AGE ~ hw3_g$fighting, var.equal = F)
##
## Welch Two Sample t-test
##
## data: hw3_g$AGE by hw3_g$fighting
## t = 4.4907, df = 7902.4, p-value = 7.199e-06
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## 0.05171002 0.13182667
## sample estimates:
## mean in group 0 mean in group 1
## 13.58641 13.49464
Interpretation: p-value of the t-test is lower than 0.05 (7.199e-06), which suggests that we reject the null hypothesis, thus there is a significant difference between mean values of age between those who engage into fighting and not fighting. For non-fighters the mean is 13.58 which is a bit higher than for fighters 13.49. As for assumptions: variance test’s p-value is lower than 0.05 (0.0001), meaning that variances are not equal. While Shapiro-Wilk tests’ both have p-values lower than 0.05 (< 2.2e-16), meaning that the age is not distributed normally.
chisq.test(hw3_g$m96, hw3_g$fighting)
##
## Pearson's Chi-squared test
##
## data: hw3_g$m96 and hw3_g$fighting
## X-squared = 964.79, df = 3, p-value < 2.2e-16
chisq.test(hw3_g$m96, hw3_g$fighting)$stdres
## hw3_g$fighting
## hw3_g$m96 0 1
## Hardly ever or never 20.150847 -20.150847
## Less than weekly 1.555688 -1.555688
## Weekly -9.795776 9.795776
## Daily (specified in own variable) -27.269088 27.269088
Interpretation: p-value is lower than 0.05, meaning that there is an association between fighting and meeting up with friends after 8 pm (m96). As for stdres we can notice that non-fighters are more likely to meet with friends hardly ever or never after 8 pm (20.15), while fighters are more likely to meet up daily (27.26).
var.test(hw3_g$lifesat_1 ~ hw3_g$fighting)
##
## F test to compare two variances
##
## data: hw3_g$lifesat_1 by hw3_g$fighting
## F = 0.67975, num df = 76777, denom df = 6626, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.6558747 0.7041250
## sample estimates:
## ratio of variances
## 0.6797543
shapiro.test(hw3_g$lifesat_1[hw3_g$fighting == 1][0:5000])
##
## Shapiro-Wilk normality test
##
## data: hw3_g$lifesat_1[hw3_g$fighting == 1][0:5000]
## W = 0.94217, p-value < 2.2e-16
shapiro.test(hw3_g$lifesat_1[hw3_g$fighting == 0][0:5000])
##
## Shapiro-Wilk normality test
##
## data: hw3_g$lifesat_1[hw3_g$fighting == 0][0:5000]
## W = 0.81901, p-value < 2.2e-16
t.test(hw3_g$lifesat_1 ~ hw3_g$fighting, var.equal = F)
##
## Welch Two Sample t-test
##
## data: hw3_g$lifesat_1 by hw3_g$fighting
## t = 30.129, df = 7424.1, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## 0.8399244 0.9568246
## sample estimates:
## mean in group 0 mean in group 1
## 8.566399 7.668025
Interpretation: t-test p-value shows lower than 0.05 value (< 2.2e-16), meaning that we reject null hypothesis, thus there is a significant difference between fighters and non-fighters in terms of life satisfaction. For non-fighters the mean value is a bit higher (~8.57) that for fighters (~7.67). As for assumptions: var test shows that variances are not equal (p-value is < 2.2e-16), while Shapiro-Wilk tests show that life satisfaction is not normally distributed (p-values are < 2.2e-16).
var.test(hw3_g$frndsup ~ hw3_g$fighting)
##
## F test to compare two variances
##
## data: hw3_g$frndsup by hw3_g$fighting
## F = 0.86532, num df = 76777, denom df = 6626, p-value = 3.416e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8349243 0.8963466
## sample estimates:
## ratio of variances
## 0.8653228
shapiro.test(hw3_g$frndsup[hw3_g$fighting == 1][0:5000])
##
## Shapiro-Wilk normality test
##
## data: hw3_g$frndsup[hw3_g$fighting == 1][0:5000]
## W = 0.82866, p-value < 2.2e-16
shapiro.test(hw3_g$frndsup[hw3_g$fighting == 0][0:5000])
##
## Shapiro-Wilk normality test
##
## data: hw3_g$frndsup[hw3_g$fighting == 0][0:5000]
## W = 0.77357, p-value < 2.2e-16
t.test(hw3_g$frndsup ~ hw3_g$fighting, var.equal = F)
##
## Welch Two Sample t-test
##
## data: hw3_g$frndsup by hw3_g$fighting
## t = 10.167, df = 7649.1, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## 0.1840847 0.2720279
## sample estimates:
## mean in group 0 mean in group 1
## 5.637090 5.409034
Interpretation: t-test p-value shows lower than 0.05 value (< 2.2e-16), meaning that we reject null hypothesis, thus there is a significant difference between fighters and non-fighters in terms of friend support. For non-fighters the mean value is a bit higher (~5.64) that for fighters (~5.41). As for assumptions: var test shows that variances are not equal (p-value is 3.416e-16), while Shapiro-Wilk tests show that friend support is not normally distributed (p-values are < 2.2e-16).
#Part 2 (6 points)
##1. Make a regression analysis and choose the best model. Remember, an insignificant result is also a result.
md1 <- glm(fighting ~ AGE + lifesat_1 + frndsup + m96, data = hw3_g, family = binomial)
summary(md1)
##
## Call:
## glm(formula = fighting ~ AGE + lifesat_1 + frndsup + m96, family = binomial,
## data = hw3_g)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.185758 0.132550 8.946 < 2e-16 ***
## AGE -0.150103 0.008516 -17.626 < 2e-16 ***
## lifesat_1 -0.203920 0.005910 -34.503 < 2e-16 ***
## frndsup -0.048568 0.007540 -6.442 1.18e-10 ***
## m96Less than weekly 0.312644 0.035143 8.896 < 2e-16 ***
## m96Weekly 0.624291 0.035069 17.802 < 2e-16 ***
## m96Daily (specified in own variable) 1.286391 0.041873 30.721 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 46279 on 83404 degrees of freedom
## Residual deviance: 44071 on 83398 degrees of freedom
## AIC: 44085
##
## Number of Fisher Scoring iterations: 5
table(hw3_g$m96)
##
## Hardly ever or never Less than weekly
## 43959 18010
## Weekly Daily (specified in own variable)
## 15763 5673
Interpretation: As we can notice, all of the predictors are significant (age, life satisfaction, friend support and meeting with friends). Age is negatively associated with the girl’s chances to engage into fighting. With the increase of age by 1 the log of odds of girl fighting decreases by 0.15.
Life satisfaction is negatively associated with girl’s chances to engage into fighting. With the increase of life satisfaction by 1 the log of odds of girl fighting decreases by 0.20.
Friend support is negatively associated with girl’s chances to engage into fighting. With the increase of friend support by 1 the log of odds of girl fighting decreases by ~ 0.05.
Meeting with friends after 8 pm less than weekly compared to hardly ever or never is positively associated with girl’s chances to engage into fighting. Meeting friends after 8 pm less than weekly increases the log of odds of girl fighting by 0.31 compared to meeting hardly ever or never. Meeting with friends after 8 pm weekly compared to hardly ever or never is positively associated with girl’s chances to engage into fighting. Meeting friends after 8 pm weekly increases the log of odds of girl fighting by 0.62 compared to meeting hardly ever or never. Meeting with friends after 8 pm daily compared to hardly ever or never is positively associated with girl’s chances to engage into fighting. Meeting friends after 8 pm daily increases the log of odds of girl fighting by ~ 1.29 compared to meeting hardly ever or never.
md2 <- glm(fighting ~ log(AGE) + lifesat_1 + frndsup + m96, data = hw3_g, family = binomial)
summary(md2)
##
## Call:
## glm(formula = fighting ~ log(AGE) + lifesat_1 + frndsup + m96,
## family = binomial, data = hw3_g)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.296619 0.305126 14.081 < 2e-16 ***
## log(AGE) -1.979452 0.113882 -17.382 < 2e-16 ***
## lifesat_1 -0.203784 0.005913 -34.464 < 2e-16 ***
## frndsup -0.048543 0.007540 -6.438 1.21e-10 ***
## m96Less than weekly 0.311612 0.035153 8.865 < 2e-16 ***
## m96Weekly 0.621245 0.035073 17.713 < 2e-16 ***
## m96Daily (specified in own variable) 1.283744 0.041869 30.661 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 46279 on 83404 degrees of freedom
## Residual deviance: 44082 on 83398 degrees of freedom
## AIC: 44096
##
## Number of Fisher Scoring iterations: 5
md3 <- glm(fighting ~ AGE + lifesat_1 * frndsup + m96, data = hw3_g, family = binomial)
summary(md3)
##
## Call:
## glm(formula = fighting ~ AGE + lifesat_1 * frndsup + m96, family = binomial,
## data = hw3_g)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.870626 0.177689 4.900 9.6e-07 ***
## AGE -0.150454 0.008516 -17.666 < 2e-16 ***
## lifesat_1 -0.161490 0.016864 -9.576 < 2e-16 ***
## frndsup 0.013641 0.024411 0.559 0.57628
## m96Less than weekly 0.311378 0.035143 8.860 < 2e-16 ***
## m96Weekly 0.621542 0.035083 17.716 < 2e-16 ***
## m96Daily (specified in own variable) 1.283850 0.041882 30.654 < 2e-16 ***
## lifesat_1:frndsup -0.008094 0.003010 -2.689 0.00718 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 46279 on 83404 degrees of freedom
## Residual deviance: 44063 on 83397 degrees of freedom
## AIC: 44079
##
## Number of Fisher Scoring iterations: 5
anova(md1, md2)
## Analysis of Deviance Table
##
## Model 1: fighting ~ AGE + lifesat_1 + frndsup + m96
## Model 2: fighting ~ log(AGE) + lifesat_1 + frndsup + m96
## Resid. Df Resid. Dev Df Deviance
## 1 83398 44071
## 2 83398 44082 0 -10.775
anova(md2, md3)
## Analysis of Deviance Table
##
## Model 1: fighting ~ log(AGE) + lifesat_1 + frndsup + m96
## Model 2: fighting ~ AGE + lifesat_1 * frndsup + m96
## Resid. Df Resid. Dev Df Deviance
## 1 83398 44082
## 2 83397 44063 1 18.018
anova(md1, md3)
## Analysis of Deviance Table
##
## Model 1: fighting ~ AGE + lifesat_1 + frndsup + m96
## Model 2: fighting ~ AGE + lifesat_1 * frndsup + m96
## Resid. Df Resid. Dev Df Deviance
## 1 83398 44071
## 2 83397 44063 1 7.2438
Interpretation: After comparison of three models we can conclude that the best fitting model is md1. As comparing the md1 and md2 we notice that they have the same degree of freedom, and deviance -10.8 suggesting there is no significant difference between these two models. While between md2 and md3 there is a significant difference in deviance with 1 degree of freedom, md3 being better. Comparing md1 and md3 we notice that md3 do not improve md1, model md1 have a slightly higher deviance.
margins::margins_summary(md1)
## factor AME SE z p lower
## AGE -0.0106 0.0006 -17.5031 0.0000 -0.0118
## frndsup -0.0034 0.0005 -6.4354 0.0000 -0.0045
## lifesat_1 -0.0144 0.0004 -33.6997 0.0000 -0.0153
## m96Daily (specified in own variable) 0.1217 0.0053 22.9701 0.0000 0.1113
## m96Less than weekly 0.0196 0.0023 8.4545 0.0000 0.0151
## m96Weekly 0.0448 0.0028 16.0161 0.0000 0.0393
## upper
## -0.0094
## -0.0024
## -0.0136
## 0.1321
## 0.0242
## 0.0503
Interpretation: Increase in age by 1 on average decreases the probability of girls fighting by 0.01.
Increase in friend support by 1 on average decreases the probability of girls fighting by 0.003.
Increase in life satisfaction by 1 on average decreases the probability of girls fighting by 0.001.
Meeting with friends daily on average increases the probability of girls fighting by 0.12 compared to meeting with friends hardly ever or never. Meeting with friends less than weekly on average increases the probability of girls fighting by ~ 0.02 compared to meeting with friends hardly ever or never. Meeting with friends weekly on average increases the probability of girls fighting by ~ 0.04 compared to meeting with friends hardly ever or never.
sjPlot::plot_model(md1, type = 'pred', terms = c('AGE'), axis.title = c('AGE','Probability of girls fighting', title = ''))
## Data were 'prettified'. Consider using `terms="AGE [all]"` to get smooth
## plots.
sjPlot::plot_model(md1, type = 'pred', terms = c('frndsup'), axis.title = c('Friend support','Probability of girls fighting', title = ''))
## Data were 'prettified'. Consider using `terms="frndsup [all]"` to get
## smooth plots.
sjPlot::plot_model(md1, type = 'pred', terms = c('lifesat_1'), axis.title = c('Life satisfaction','Probability of girls fighting', title = ''))
sjPlot::plot_model(md1, type = 'pred', terms = c('m96'), axis.title = c('M96','Probability of girls fighting', title = ''))
Interpretation: On the first 2 plots we can see that y-axis is quite small, the highest fighting probability is 7-8%, which shows that on average girls even with low levels of friend support and 10 years old are not very likely to fight, and this probability decreases with growth of age and friend support. Plot with life satisfaction have more distribution in terms of probability of girls fighting on average, with the highest being around 20% probability on low levels of life satisfaction. On the plot with m96 (meeting friends after 8 pm) we see that on average the probability of girls fighting is the highest in daily category.
##2. Model diagnostics. ### pseudo-R2
pscl::pR2(md1)[4]
## fitting null model for pseudo-r2
## McFadden
## 0.04772505
Interpretation: the pseudo-r2 is 0.04, which is a really low value, good model’s fit should be 0.2 to 0.4 and this is very far away from this model.
sens <- pROC::roc(hw3$fighting, predict(md2, hw3, type = "response"))
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(sens)
#2.1. Area under the curve
fitted = predict(md2, hw3, type = "response")
pROC::auc(hw3$fighting, fitted)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Area under the curve: 0.6287
# 2.2. the percent of correctly predicted:
pscl::hitmiss(md1)
## Classification Threshold = 0.5
## y=0 y=1
## yhat=0 76768 6614
## yhat=1 10 13
## Percent Correctly Predicted = 92.06%
## Percent Correctly Predicted = 99.99%, for y = 0
## Percent Correctly Predicted = 0.1962% for y = 1
## Null Model Correctly Predicts 92.05%
## [1] 92.0580301 99.9869754 0.1961672
Interpretation: Although plot shows that curve is above from the diagonal line, it is not very much above, thus the model is not good. This conclusion is also confirmed by the area under the curve value, which is equal to 0.6. Not a very good, good model should have 0.7 to 0.8. Lastly, the percent of correctly predicted values show a good overall value (92%). However, if we look at the distribution of predictions between fighters and non-fighters cases, we notice that for y = 1 (fighters) percent of correct predictions is 0.19%, which is not even a 1 percent of correct predictions.
car::vif(md1)
## GVIF Df GVIF^(1/(2*Df))
## AGE 1.144968 1 1.070032
## lifesat_1 1.070857 1 1.034822
## frndsup 1.033127 1 1.016429
## m96 1.107551 3 1.017171
Interpretation: we do not notice any multicollinearity, as all GVIF values are smaller that 4.
All in all, the model is very bad at predicting the girls fighting, while good at predicting non-fighting behavior. Thus, we need to try other predictors for fighting behavior of girls.