Reproductive behavior of young males in Baja California.

#The literature suggests that those who were victims of violence in childhood are prone to have a promiscuous sexual activity, with the risks that this entails. Research tends to establish women as subjects of interest. In addition, it has been established that minors who suffered physical, psychological or other negligence are likely to have a sexual onset earlier than those who did not (Black et al., 2009).

#It is intended to achieve an approximation through the information available in the survey "Reproductive Health in Adolescents in Baja California" about traumatic events such as beatings, insults and sexual abuse captured by the survey to know if the abuse suffered in childhood can function as a marker for research on sexual initiation in young males.

#Reference
#Black, Maureen et al., 2009, "Sexual intercourse among adolescents maltreated before age 12: A prospective investigation", Pediatrics, 124. 

Dependent Variable: Age at First Sexual Relation

library(readr)
yoloe <- read_csv("C:/Users/gpe637/Downloads/Base de Adolescentes Hombres con ponderador.csv")
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   ageb2v = col_character(),
##   v212_c = col_character(),
##   v330_2 = col_character(),
##   v333_b = col_character(),
##   v333_c = col_character(),
##   v518_m = col_character(),
##   v622_3 = col_character(),
##   v638 = col_character(),
##   pondera = col_double()
## )
## See spec(...) for full column specifications.
yoloe$afsr<-ifelse(yoloe$v403 %in% c(1:17),1,0)
yoloe$afsr<-recode(yoloe$afsr, recodes = "1='Under 18'; 0='After 18'; else=NA", as.factor.result = T)

Independent Variables: Sociodemographics

#Indigenous condition
yoloe$clind<-recode(yoloe$v104, recodes = "1='indigenous'; 2='no indigenous'", as.factor.result = T)
yoloe$clind<-relevel(yoloe$clind, ref='no indigenous')

#Religion
yoloe$relig<-recode(yoloe$v109, recodes ="1='no religion'; 2='catholic'; 3:10='christian/protestant/other'; else=NA", as.factor.result = T)
yoloe$relig<-relevel(yoloe$relig, ref='catholic')

#Place
yoloe$urb<-recode(yoloe$v129, recodes = "3='urban'; 1:2='non-urban'; else=NA", as.factor.result = T)
yoloe$urb<-relevel(yoloe$urb, ref='urban')
  
#State of birth
yoloe$nacbc<-recode(yoloe$v111e, recodes = "2='Baja California'; 1='Other state'; 3:32='Other state'; else=NA", as.factor.result = T)
yoloe$nacbc<-relevel(yoloe$nacbc, ref='Baja California')

#Education
yoloe$esco<-recode(yoloe$v203n, recodes = "2='primary'; 3:7='secundary'; 8='high school'; 9:12='profesional'; else=NA", as.factor.result = T)
yoloe$esco<-relevel(yoloe$esco, ref='high school')

#Work as underage
yoloe$work<-recode(yoloe$v213, recodes = "8:17='worked before 18'; 18:29='no worked before 18'; else=NA", as.factor.result = T)
yoloe$work<-relevel(yoloe$work, ref='worked before 18')

Independent Variables: Violence in childhood

#Verbal abuse
yoloe$vabuse<-recode(yoloe$v605, recodes = "1='Yes'; 2='No'; else=NA", as.factor.result = T)
yoloe$vabuse<-relevel(yoloe$vabuse, ref='Yes')

#Physical abuse
yoloe$phabuse<-recode(yoloe$v609, recodes = "1='Yes'; 2='No'; else=NA", as.factor.result = T)
yoloe$phabuse<-relevel(yoloe$phabuse, ref='Yes')

#Sexual abuse
yoloe$sexabuse<-recode(yoloe$v616, recodes = "1='Yes'; 2='No'; else=NA", as.factor.result = T)
yoloe$sexabuse<-relevel(yoloe$sexabuse, ref='Yes')

Survey design

options (survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata = ~estrato, weights = ~pondera, data = yoloe)

Logistic regression

#In the model m1log we include only the sociodemographic independent variables while in m2log we add the variables for violence in childhood. 

m1log<-svyglm(afsr~clind+relig+urb+nacbc+esco+work, design=des, family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
m2log<-svyglm(afsr~clind+relig+urb+nacbc+esco+work+vabuse+phabuse+sexabuse, design=des, family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(m1log)
## 
## Call:
## svyglm(formula = afsr ~ clind + relig + urb + nacbc + esco + 
##     work, design = des, family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~estrato, weights = ~pondera, data = yoloe)
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      0.64543    0.18577   3.474 0.000571 ***
## clindindigenous                  0.27968    0.70725   0.395 0.692732    
## religchristian/protestant/other  0.56241    0.24792   2.269 0.023860 *  
## religno religion                 0.39582    0.38588   1.026 0.305670    
## urbnon-urban                    -0.25154    0.20107  -1.251 0.211710    
## nacbcOther state                 0.20875    0.17775   1.174 0.240995    
## escoprimary                     -0.19976    0.27973  -0.714 0.475598    
## escoprofesional                 -0.57036    0.23310  -2.447 0.014865 *  
## escosecundary                    0.03348    0.19871   0.168 0.866282    
## workno worked before 18         -0.30195    0.17693  -1.707 0.088723 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.001881)
## 
## Number of Fisher Scoring iterations: 4
summary(m2log)
## 
## Call:
## svyglm(formula = afsr ~ clind + relig + urb + nacbc + esco + 
##     work + vabuse + phabuse + sexabuse, design = des, family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~estrato, weights = ~pondera, data = yoloe)
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      1.23263    1.07283   1.149   0.2513  
## clindindigenous                  0.24909    0.68122   0.366   0.7148  
## religchristian/protestant/other  0.62952    0.24861   2.532   0.0117 *
## religno religion                 0.49747    0.39592   1.257   0.2097  
## urbnon-urban                    -0.27498    0.20164  -1.364   0.1735  
## nacbcOther state                 0.25543    0.18267   1.398   0.1629  
## escoprimary                     -0.25532    0.28292  -0.902   0.3674  
## escoprofesional                 -0.55291    0.23724  -2.331   0.0203 *
## escosecundary                    0.01186    0.20097   0.059   0.9530  
## workno worked before 18         -0.32032    0.18235  -1.757   0.0798 .
## vabuseNo                        -0.61657    0.24917  -2.474   0.0138 *
## phabuseNo                        0.14811    0.18877   0.785   0.4332  
## sexabuseNo                      -0.18063    1.04016  -0.174   0.8622  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9973785)
## 
## Number of Fisher Scoring iterations: 4
stargazer(m1log, m2log, type="text", style="demography", title = "Logistic regression model for age at first sexual relation", covariate.labels = c("Indigenous", "Christian/Protestant/Other", "No religion", "Non-urban", "Other state", "Primary", "Profesional", "Secundary", "No worked before 18", "No verbal abuse", "No physical abuse", "No sexual abuse"), keep.stat = "n", align=T, ci=T)
## 
## Logistic regression model for age at first sexual relation
## ------------------------------------------------------------
##                                          afsr               
##                                Model 1          Model 2     
## ------------------------------------------------------------
## Indigenous                      0.280            0.249      
##                            (-1.107, 1.666)  (-1.086, 1.584) 
## Christian/Protestant/Other      0.562*           0.630*     
##                             (0.077, 1.048)   (0.142, 1.117) 
## No religion                     0.396            0.497      
##                            (-0.361, 1.152)  (-0.279, 1.273) 
## Non-urban                       -0.252           -0.275     
##                            (-0.646, 0.143)  (-0.670, 0.120) 
## Other state                     0.209            0.255      
##                            (-0.140, 0.557)  (-0.103, 0.613) 
## Primary                         -0.200           -0.255     
##                            (-0.748, 0.348)  (-0.810, 0.299) 
## Profesional                    -0.570*          -0.553*     
##                            (-1.027, -0.113) (-1.018, -0.088)
## Secundary                       0.033            0.012      
##                            (-0.356, 0.423)  (-0.382, 0.406) 
## No worked before 18             -0.302           -0.320     
##                            (-0.649, 0.045)  (-0.678, 0.037) 
## No verbal abuse                                 -0.617*     
##                                             (-1.105, -0.128)
## No physical abuse                                0.148      
##                                             (-0.222, 0.518) 
## No sexual abuse                                  -0.181     
##                                             (-2.219, 1.858) 
## Constant                       0.645***          1.233      
##                             (0.281, 1.010)  (-0.870, 3.335) 
## N                                871              862       
## ------------------------------------------------------------
## *p < .05; **p < .01; ***p < .001
#We mantain the statistical significant variables to make a third model, m3log. These variables were Religion, Education and Verbal abuse. From this model we predicted some cases for this analysis. 

m3log<-svyglm(afsr~relig+esco+vabuse, design=des, family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(m3log)
## 
## Call:
## svyglm(formula = afsr ~ relig + esco + vabuse, design = des, 
##     family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~estrato, weights = ~pondera, data = yoloe)
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      1.14764    0.24105   4.761 2.57e-06 ***
## religchristian/protestant/other  0.54147    0.22450   2.412  0.01625 *  
## religno religion                 0.23281    0.32342   0.720  0.47198    
## escoprimary                     -0.23270    0.25161  -0.925  0.35551    
## escoprofesional                 -0.65193    0.20564  -3.170  0.00162 ** 
## escosecundary                   -0.03741    0.17325  -0.216  0.82913    
## vabuseNo                        -0.54529    0.22659  -2.407  0.01649 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9984609)
## 
## Number of Fisher Scoring iterations: 4
#In the following table we present the logistic regression coefficients for the third model in their exponentiate form.
expcoeff<-function(x) (exp(x))

stargazer(m3log, apply.coef = expcoeff, type="text", style="demography", title = "Logistic regression model for age at first sexual relation", covariate.labels = c("Christian/Protestant/Other", "No religion", "Primary", "Profesional", "Secundary", "No verbal abuse"), keep.stat = "n", align=T, ci=T)
## 
## Logistic regression model for age at first sexual relation
## -----------------------------------------
##                                 afsr     
## -----------------------------------------
## Christian/Protestant/Other    1.719***   
##                            (1.279, 2.159)
## No religion                   1.262***   
##                            (0.628, 1.896)
## Primary                       0.792**    
##                            (0.299, 1.286)
## Profesional                    0.521*    
##                            (0.118, 0.924)
## Secundary                     0.963***   
##                            (0.624, 1.303)
## No verbal abuse                0.580*    
##                            (0.136, 1.024)
## Constant                      3.151***   
##                            (2.678, 3.623)
## N                              1,003     
## -----------------------------------------
## *p < .05; **p < .01; ***p < .001
ndata<-expand.grid(relig=levels(yoloe$relig), esco=levels(yoloe$esco), vabuse=levels(yoloe$vabuse))

#fitted values
fit<-predict(m3log, newdata = ndata, type="response")

#adding the values to the new data
ndata$fitted.prob.logreg<-round(fit, 3)

head(ndata, n=24)
##                         relig        esco vabuse fitted.prob.logreg
## 1                    catholic high school    Yes              0.759
## 2  christian/protestant/other high school    Yes              0.844
## 3                 no religion high school    Yes              0.799
## 4                    catholic     primary    Yes              0.714
## 5  christian/protestant/other     primary    Yes              0.811
## 6                 no religion     primary    Yes              0.759
## 7                    catholic profesional    Yes              0.621
## 8  christian/protestant/other profesional    Yes              0.738
## 9                 no religion profesional    Yes              0.674
## 10                   catholic   secundary    Yes              0.752
## 11 christian/protestant/other   secundary    Yes              0.839
## 12                no religion   secundary    Yes              0.793
## 13                   catholic high school     No              0.646
## 14 christian/protestant/other high school     No              0.758
## 15                no religion high school     No              0.697
## 16                   catholic     primary     No              0.591
## 17 christian/protestant/other     primary     No              0.713
## 18                no religion     primary     No              0.646
## 19                   catholic profesional     No              0.488
## 20 christian/protestant/other profesional     No              0.621
## 21                no religion profesional     No              0.546
## 22                   catholic   secundary     No              0.638
## 23 christian/protestant/other   secundary     No              0.751
## 24                no religion   secundary     No              0.689
#Following to model m3log, we estimated the probability of having the first sexual relation before 18 for a series of «fake» youngsters. For example, the probability of a man raise as catholic, with high school and that doesn't suffer from verbal abuse of having his first sexual relation as underage is about 64%, while if he suffer from verbal abuse, his chances are around 76%. For a non-catholic but religious man with profesional education with no verbal abuse, his chances are about 62%; if this man is catholic then his chances are the lowest, 48%. According to this model, the highest probability that a man had his first sexual relation as underage (84%) correspond to a non-catholic but religious man with high school education that suffer verbal abuse in his home. 

ndata[which(ndata$relig=="catholic" & ndata$esco=="high school" & ndata$vabuse=="No"),]
##       relig        esco vabuse fitted.prob.logreg
## 13 catholic high school     No              0.646
ndata[which(ndata$relig=="catholic" & ndata$esco=="high school" & ndata$vabuse=="Yes"),]
##      relig        esco vabuse fitted.prob.logreg
## 1 catholic high school    Yes              0.759
ndata[which(ndata$relig=="christian/protestant/other" & ndata$esco=="profesional" & ndata$vabuse=="No"),]
##                         relig        esco vabuse fitted.prob.logreg
## 20 christian/protestant/other profesional     No              0.621
ndata[which(ndata$relig=="catholic" & ndata$esco=="profesional" & ndata$vabuse=="No"),]
##       relig        esco vabuse fitted.prob.logreg
## 19 catholic profesional     No              0.488
ndata[which(ndata$relig=="christian/protestant/other" & ndata$esco=="high school" & ndata$vabuse=="Yes"),]
##                        relig        esco vabuse fitted.prob.logreg
## 2 christian/protestant/other high school    Yes              0.844