Logistic regression
#In the model m1log we include only the sociodemographic independent variables while in m2log we add the variables for violence in childhood.
m1log<-svyglm(afsr~clind+relig+urb+nacbc+esco+work, design=des, family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
m2log<-svyglm(afsr~clind+relig+urb+nacbc+esco+work+vabuse+phabuse+sexabuse, design=des, family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(m1log)
##
## Call:
## svyglm(formula = afsr ~ clind + relig + urb + nacbc + esco +
## work, design = des, family = binomial)
##
## Survey design:
## svydesign(ids = ~1, strata = ~estrato, weights = ~pondera, data = yoloe)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.64543 0.18577 3.474 0.000571 ***
## clindindigenous 0.27968 0.70725 0.395 0.692732
## religchristian/protestant/other 0.56241 0.24792 2.269 0.023860 *
## religno religion 0.39582 0.38588 1.026 0.305670
## urbnon-urban -0.25154 0.20107 -1.251 0.211710
## nacbcOther state 0.20875 0.17775 1.174 0.240995
## escoprimary -0.19976 0.27973 -0.714 0.475598
## escoprofesional -0.57036 0.23310 -2.447 0.014865 *
## escosecundary 0.03348 0.19871 0.168 0.866282
## workno worked before 18 -0.30195 0.17693 -1.707 0.088723 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.001881)
##
## Number of Fisher Scoring iterations: 4
summary(m2log)
##
## Call:
## svyglm(formula = afsr ~ clind + relig + urb + nacbc + esco +
## work + vabuse + phabuse + sexabuse, design = des, family = binomial)
##
## Survey design:
## svydesign(ids = ~1, strata = ~estrato, weights = ~pondera, data = yoloe)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.23263 1.07283 1.149 0.2513
## clindindigenous 0.24909 0.68122 0.366 0.7148
## religchristian/protestant/other 0.62952 0.24861 2.532 0.0117 *
## religno religion 0.49747 0.39592 1.257 0.2097
## urbnon-urban -0.27498 0.20164 -1.364 0.1735
## nacbcOther state 0.25543 0.18267 1.398 0.1629
## escoprimary -0.25532 0.28292 -0.902 0.3674
## escoprofesional -0.55291 0.23724 -2.331 0.0203 *
## escosecundary 0.01186 0.20097 0.059 0.9530
## workno worked before 18 -0.32032 0.18235 -1.757 0.0798 .
## vabuseNo -0.61657 0.24917 -2.474 0.0138 *
## phabuseNo 0.14811 0.18877 0.785 0.4332
## sexabuseNo -0.18063 1.04016 -0.174 0.8622
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9973785)
##
## Number of Fisher Scoring iterations: 4
stargazer(m1log, m2log, type="text", style="demography", title = "Logistic regression model for age at first sexual relation", covariate.labels = c("Indigenous", "Christian/Protestant/Other", "No religion", "Non-urban", "Other state", "Primary", "Profesional", "Secundary", "No worked before 18", "No verbal abuse", "No physical abuse", "No sexual abuse"), keep.stat = "n", align=T, ci=T)
##
## Logistic regression model for age at first sexual relation
## ------------------------------------------------------------
## afsr
## Model 1 Model 2
## ------------------------------------------------------------
## Indigenous 0.280 0.249
## (-1.107, 1.666) (-1.086, 1.584)
## Christian/Protestant/Other 0.562* 0.630*
## (0.077, 1.048) (0.142, 1.117)
## No religion 0.396 0.497
## (-0.361, 1.152) (-0.279, 1.273)
## Non-urban -0.252 -0.275
## (-0.646, 0.143) (-0.670, 0.120)
## Other state 0.209 0.255
## (-0.140, 0.557) (-0.103, 0.613)
## Primary -0.200 -0.255
## (-0.748, 0.348) (-0.810, 0.299)
## Profesional -0.570* -0.553*
## (-1.027, -0.113) (-1.018, -0.088)
## Secundary 0.033 0.012
## (-0.356, 0.423) (-0.382, 0.406)
## No worked before 18 -0.302 -0.320
## (-0.649, 0.045) (-0.678, 0.037)
## No verbal abuse -0.617*
## (-1.105, -0.128)
## No physical abuse 0.148
## (-0.222, 0.518)
## No sexual abuse -0.181
## (-2.219, 1.858)
## Constant 0.645*** 1.233
## (0.281, 1.010) (-0.870, 3.335)
## N 871 862
## ------------------------------------------------------------
## *p < .05; **p < .01; ***p < .001
#We mantain the statistical significant variables to make a third model, m3log. These variables were Religion, Education and Verbal abuse. From this model we predicted some cases for this analysis.
m3log<-svyglm(afsr~relig+esco+vabuse, design=des, family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(m3log)
##
## Call:
## svyglm(formula = afsr ~ relig + esco + vabuse, design = des,
## family = binomial)
##
## Survey design:
## svydesign(ids = ~1, strata = ~estrato, weights = ~pondera, data = yoloe)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.14764 0.24105 4.761 2.57e-06 ***
## religchristian/protestant/other 0.54147 0.22450 2.412 0.01625 *
## religno religion 0.23281 0.32342 0.720 0.47198
## escoprimary -0.23270 0.25161 -0.925 0.35551
## escoprofesional -0.65193 0.20564 -3.170 0.00162 **
## escosecundary -0.03741 0.17325 -0.216 0.82913
## vabuseNo -0.54529 0.22659 -2.407 0.01649 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9984609)
##
## Number of Fisher Scoring iterations: 4
#In the following table we present the logistic regression coefficients for the third model in their exponentiate form.
expcoeff<-function(x) (exp(x))
stargazer(m3log, apply.coef = expcoeff, type="text", style="demography", title = "Logistic regression model for age at first sexual relation", covariate.labels = c("Christian/Protestant/Other", "No religion", "Primary", "Profesional", "Secundary", "No verbal abuse"), keep.stat = "n", align=T, ci=T)
##
## Logistic regression model for age at first sexual relation
## -----------------------------------------
## afsr
## -----------------------------------------
## Christian/Protestant/Other 1.719***
## (1.279, 2.159)
## No religion 1.262***
## (0.628, 1.896)
## Primary 0.792**
## (0.299, 1.286)
## Profesional 0.521*
## (0.118, 0.924)
## Secundary 0.963***
## (0.624, 1.303)
## No verbal abuse 0.580*
## (0.136, 1.024)
## Constant 3.151***
## (2.678, 3.623)
## N 1,003
## -----------------------------------------
## *p < .05; **p < .01; ***p < .001
ndata<-expand.grid(relig=levels(yoloe$relig), esco=levels(yoloe$esco), vabuse=levels(yoloe$vabuse))
#fitted values
fit<-predict(m3log, newdata = ndata, type="response")
#adding the values to the new data
ndata$fitted.prob.logreg<-round(fit, 3)
head(ndata, n=24)
## relig esco vabuse fitted.prob.logreg
## 1 catholic high school Yes 0.759
## 2 christian/protestant/other high school Yes 0.844
## 3 no religion high school Yes 0.799
## 4 catholic primary Yes 0.714
## 5 christian/protestant/other primary Yes 0.811
## 6 no religion primary Yes 0.759
## 7 catholic profesional Yes 0.621
## 8 christian/protestant/other profesional Yes 0.738
## 9 no religion profesional Yes 0.674
## 10 catholic secundary Yes 0.752
## 11 christian/protestant/other secundary Yes 0.839
## 12 no religion secundary Yes 0.793
## 13 catholic high school No 0.646
## 14 christian/protestant/other high school No 0.758
## 15 no religion high school No 0.697
## 16 catholic primary No 0.591
## 17 christian/protestant/other primary No 0.713
## 18 no religion primary No 0.646
## 19 catholic profesional No 0.488
## 20 christian/protestant/other profesional No 0.621
## 21 no religion profesional No 0.546
## 22 catholic secundary No 0.638
## 23 christian/protestant/other secundary No 0.751
## 24 no religion secundary No 0.689
#Following to model m3log, we estimated the probability of having the first sexual relation before 18 for a series of «fake» youngsters. For example, the probability of a man raise as catholic, with high school and that doesn't suffer from verbal abuse of having his first sexual relation as underage is about 64%, while if he suffer from verbal abuse, his chances are around 76%. For a non-catholic but religious man with profesional education with no verbal abuse, his chances are about 62%; if this man is catholic then his chances are the lowest, 48%. According to this model, the highest probability that a man had his first sexual relation as underage (84%) correspond to a non-catholic but religious man with high school education that suffer verbal abuse in his home.
ndata[which(ndata$relig=="catholic" & ndata$esco=="high school" & ndata$vabuse=="No"),]
## relig esco vabuse fitted.prob.logreg
## 13 catholic high school No 0.646
ndata[which(ndata$relig=="catholic" & ndata$esco=="high school" & ndata$vabuse=="Yes"),]
## relig esco vabuse fitted.prob.logreg
## 1 catholic high school Yes 0.759
ndata[which(ndata$relig=="christian/protestant/other" & ndata$esco=="profesional" & ndata$vabuse=="No"),]
## relig esco vabuse fitted.prob.logreg
## 20 christian/protestant/other profesional No 0.621
ndata[which(ndata$relig=="catholic" & ndata$esco=="profesional" & ndata$vabuse=="No"),]
## relig esco vabuse fitted.prob.logreg
## 19 catholic profesional No 0.488
ndata[which(ndata$relig=="christian/protestant/other" & ndata$esco=="high school" & ndata$vabuse=="Yes"),]
## relig esco vabuse fitted.prob.logreg
## 2 christian/protestant/other high school Yes 0.844