Summary

Hi folks, Im just cleaning up the file now, making sure that we will be able to directly convert this to a word document

Descriptives (still working on this)

## 
##    0    1 
## 9781  589

## 
##     0     1 
## 13108  1143

## 
##    1    2 
## 5149 5221

## 
##    1    2 
## 7231 7020

##                            Variable       non-Indigenous           Indigenous
## 1                         Dropout %               15.83%               27.55%
## 2                       Cohort 2003               97.96%                2.04%
## 3                       Cohort 2009               96.92%                3.08%
## 4                           Girls %               50.14%               51.20%
## 5                           Urban %               54.16%               45.84%
## 6               Year 10 or Higher %               90.64%               89.98%
## 7          Achievement Index (Mean) -0.09 [-0.13, -0.06] -0.90 [-0.98, -0.82]
## 8 Socioeconomic Status Index (Mean)    0.31 [0.28, 0.33] -0.15 [-0.20, -0.10]

use same as paper

Hypothesis set 1:

H1a: Indigenous Australian Children have higher dropout rates

## Joining, by = "term"

A tibble: 8 × 5

Variables Log Odds -95% CI +95% CI p 1 Intercept 4.77 3.89 5.66 0
2 Indigenous 0.365 0.129 0.6 0.003 3 Cohort -0.275 -0.413 -0.137 0
4 Grade -0.641 -0.726 -0.556 0
5 Urban -0.478 -0.602 -0.354 0
6 Gender (Boys) 0.509 0.406 0.612 0
7 Attrition Flag 0.613 0.498 0.727 0
8 SES -0.618 -0.686 -0.549 0

    0     1

0 10876 1523 1 4458 1280

H1_gender: Is the gap the same for boys and girls

## Joining, by = "term"

## # A tibble: 9 × 5
##   Variables      `Log Odds` `-95% CI` `+95% CI`     p
##   <chr>               <dbl>     <dbl>     <dbl> <dbl>
## 1 Intercept           4.77      3.88      5.65  0    
## 2 Indigenous          0.528     0.154     0.903 0.006
## 3 Gender (Boys)       0.521     0.416     0.626 0    
## 4 Cohort             -0.275    -0.413    -0.137 0    
## 5 Grade              -0.642    -0.727    -0.556 0    
## 6 Urban              -0.478    -0.602    -0.353 0    
## 7 Attrition Flag      0.613     0.499     0.727 0    
## 8 SES                -0.618    -0.686    -0.55  0    
## 9 IndXGen            -0.313    -0.753     0.126 0.16

H1_location: Is the gap the same for urban and rural

## Joining, by = "term"

## # A tibble: 9 × 5
##   Variables      `Log Odds` `-95% CI` `+95% CI`     p
##   <chr>               <dbl>     <dbl>     <dbl> <dbl>
## 1 Intercept           4.78      3.90      5.67  0    
## 2 Indigenous          0.19     -0.074     0.453 0.155
## 3 Urban              -0.493    -0.619    -0.367 0    
## 4 Cohort             -0.275    -0.413    -0.137 0    
## 5 Grade              -0.641    -0.727    -0.556 0    
## 6 Gender (Boys)       0.509     0.406     0.612 0    
## 7 Attrition Flag      0.613     0.499     0.728 0    
## 8 SES                -0.618    -0.686    -0.55  0    
## 9 IndXLoc             0.413     0.016     0.81  0.042

H1_ses: Is the gap the same for Rich and poor and rural

## Rows: 156 Columns: 2

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): term, Vars

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

## Joining, by = "term"

## # A tibble: 9 × 5
##   Variables      `Log Odds` `-95% CI` `+95% CI`     p
##   <chr>               <dbl>     <dbl>     <dbl> <dbl>
## 1 Intercept           4.76      3.87      5.64      0
## 2 Indigenous          0.501     0.276     0.726     0
## 3 SES                -0.636    -0.706    -0.566     0
## 4 Cohort             -0.276    -0.414    -0.138     0
## 5 Grade              -0.64     -0.725    -0.554     0
## 6 Urban              -0.479    -0.604    -0.354     0
## 7 Gender (Boys)       0.511     0.408     0.614     0
## 8 Attrition Flag      0.614     0.5       0.729     0
## 9 IndXSES             0.584     0.368     0.8       0

H1_all three: Gap varies as a function of socioeconomic status, gender and locaion

## Joining, by = "term"

## # A tibble: 11 × 5
##    Variables      `Log Odds` `-95% CI` `+95% CI`     p
##    <chr>               <dbl>     <dbl>     <dbl> <dbl>
##  1 Intercept           4.76      3.87      5.65  0    
##  2 Indigenous          0.529     0.166     0.892 0.005
##  3 SES                -0.636    -0.706    -0.566 0    
##  4 Urban              -0.491    -0.617    -0.365 0    
##  5 Gender (Boys)       0.524     0.418     0.629 0    
##  6 Cohort             -0.275    -0.413    -0.137 0    
##  7 Grade              -0.64     -0.726    -0.555 0    
##  8 Attrition Flag      0.616     0.501     0.73  0    
##  9 IndXSES             0.553     0.342     0.764 0    
## 10 IndXLoc             0.315    -0.065     0.695 0.103
## 11 IndXGen            -0.321    -0.707     0.065 0.102

hypotheses 1_ cohort interaction only. This gap narrowed as a function on a change in legislation that increased the effective age of first selection to 17 between 2008 and 2010.

## Joining, by = "term"

## # A tibble: 9 × 5
##   Variables      `Log Odds` `-95% CI` `+95% CI`     p
##   <chr>               <dbl>     <dbl>     <dbl> <dbl>
## 1 Intercept           4.76      3.88      5.65  0    
## 2 SES                -0.618    -0.686    -0.549 0    
## 3 Urban              -0.479    -0.603    -0.354 0    
## 4 Gender (Boys)       0.51      0.406     0.613 0    
## 5 Cohort             -0.263    -0.406    -0.121 0    
## 6 Indigenous          0.532     0.27      0.795 0    
## 7 Grade              -0.641    -0.726    -0.556 0    
## 8 Attrition Flag      0.612     0.497     0.726 0    
## 9 IndXCoh            -0.317    -0.713     0.079 0.115

Hypothesis 1_all three interactions and cohort interaction.

## Joining, by = "term"

## # A tibble: 12 × 5
##    Variables      `Log Odds` `-95% CI` `+95% CI`     p
##    <chr>               <dbl>     <dbl>     <dbl> <dbl>
##  1 Intercept           4.75      3.86      5.64  0    
##  2 Indigenous          0.729     0.341     1.12  0    
##  3 SES                -0.637    -0.707    -0.567 0    
##  4 Urban              -0.494    -0.62     -0.367 0    
##  5 Gender (Boys)       0.524     0.418     0.629 0    
##  6 Cohort             -0.259    -0.403    -0.115 0.001
##  7 Grade              -0.64     -0.725    -0.554 0    
##  8 Attrition Flag      0.615     0.5       0.729 0    
##  9 IndXSES             0.574     0.365     0.783 0    
## 10 IndXLoc             0.358    -0.03      0.745 0.07 
## 11 IndXGen            -0.321    -0.718     0.076 0.112
## 12 IndXCoh            -0.407    -0.791    -0.022 0.039

Figures hypothesis 1

hypothesis set 2

Hypothesis 2: Indigenous disadvantage still present when comparing equally advantaged and equally achieving Indigenous and non-Indigenous Youth (controling for achivement)

## # A tibble: 9 × 5
##   Parameter      Est `-95% CI` `+95% CI` p     
##   <chr>        <dbl>     <dbl>     <dbl> <chr> 
## 1 (Intercept)  1.99      1.06      2.92  < .001
## 2 GEO         -0.52     -0.648    -0.392 < .001
## 3 ESCS        -0.339    -0.411    -0.267 < .001
## 4 GENDER       0.453     0.344     0.563 < .001
## 5 INDIG        0.011    -0.228     0.25  0.926 
## 6 COHORT2     -0.339    -0.484    -0.193 < .001
## 7 GRADE       -0.378    -0.469    -0.288 < .001
## 8 FLAG_MISS    0.368     0.242     0.494 < .001
## 9 ACH1PV      -0.734    -0.802    -0.666 < .001

Hypothesis 2_ses. Indiginous gap (controlling for achivement) varies as a function of ses

## # A tibble: 10 × 5
##    Parameter      Est `-95% CI` `+95% CI` p     
##    <chr>        <dbl>     <dbl>     <dbl> <chr> 
##  1 (Intercept)  1.98      1.06      2.91  < .001
##  2 GEO         -0.521    -0.649    -0.392 < .001
##  3 GENDER       0.455     0.346     0.564 < .001
##  4 INDIG        0.122    -0.108     0.352 0.292 
##  5 ESCS        -0.355    -0.429    -0.281 < .001
##  6 COHORT2     -0.339    -0.485    -0.193 < .001
##  7 GRADE       -0.378    -0.468    -0.288 < .001
##  8 FLAG_MISS    0.37      0.244     0.496 < .001
##  9 ACH1PV      -0.733    -0.801    -0.664 < .001
## 10 INDIG:ESCS   0.478     0.248     0.707 < .001

hypothess_2_location

## # A tibble: 10 × 5
##    Parameter      Est `-95% CI` `+95% CI` p     
##    <chr>        <dbl>     <dbl>     <dbl> <chr> 
##  1 (Intercept)  2.00      1.07      2.93  < .001
##  2 GEO         -0.538    -0.667    -0.41  < .001
##  3 INDIG       -0.196    -0.469     0.077 0.155 
##  4 GENDER       0.453     0.344     0.562 < .001
##  5 ESCS        -0.339    -0.411    -0.267 < .001
##  6 COHORT2     -0.338    -0.483    -0.193 < .001
##  7 GRADE       -0.379    -0.469    -0.289 < .001
##  8 FLAG_MISS    0.369     0.243     0.494 < .001
##  9 ACH1PV      -0.735    -0.803    -0.667 < .001
## 10 GEO:INDIG    0.49      0.056     0.923 0.027

hypothess_2_gender

## # A tibble: 10 × 5
##    Parameter       Est `-95% CI` `+95% CI` p     
##    <chr>         <dbl>     <dbl>     <dbl> <chr> 
##  1 (Intercept)   1.98      1.06      2.91  < .001
##  2 GEO          -0.52     -0.648    -0.391 < .001
##  3 GENDER        0.468     0.357     0.579 < .001
##  4 INDIG         0.209    -0.186     0.604 0.293 
##  5 ESCS         -0.34     -0.412    -0.268 < .001
##  6 COHORT2      -0.338    -0.484    -0.193 < .001
##  7 GRADE        -0.379    -0.469    -0.289 < .001
##  8 FLAG_MISS     0.369     0.243     0.494 < .001
##  9 ACH1PV       -0.735    -0.803    -0.667 < .001
## 10 GENDER:INDIG -0.381    -0.846     0.084 0.105

Hypotheis2_legislation–This gap narrowed as a function on a change in legislation that increased the effective age of first selection to 17 between 2008 and 2010.

## # A tibble: 10 × 5
##    Parameter        Est `-95% CI` `+95% CI` p     
##    <chr>          <dbl>     <dbl>     <dbl> <chr> 
##  1 (Intercept)    1.98      1.06      2.91  < .001
##  2 GEO           -0.521    -0.649    -0.392 < .001
##  3 GENDER         0.454     0.345     0.563 < .001
##  4 COHORT2       -0.329    -0.479    -0.179 < .001
##  5 INDIG          0.148    -0.12      0.415 0.273 
##  6 ESCS          -0.339    -0.411    -0.267 < .001
##  7 GRADE         -0.378    -0.468    -0.288 < .001
##  8 FLAG_MISS      0.367     0.242     0.493 < .001
##  9 ACH1PV        -0.734    -0.802    -0.666 < .001
## 10 COHORT2:INDIG -0.258    -0.678     0.163 0.224

h2_achivement moderation. Does this gap vary as a function of academic achievement. That is, is the gap between similarly low achieving Indigenous and non-Indigenous children the same as the gap between similarly high achieving Indigenous and non-Indigenous children.

## # A tibble: 10 × 5
##    Parameter       Est `-95% CI` `+95% CI` p     
##    <chr>         <dbl>     <dbl>     <dbl> <chr> 
##  1 (Intercept)   1.98      1.06      2.91  < .001
##  2 GEO          -0.522    -0.65     -0.394 < .001
##  3 GENDER        0.455     0.346     0.564 < .001
##  4 COHORT2      -0.341    -0.487    -0.195 < .001
##  5 INDIG         0.444     0.14      0.747 0.005 
##  6 ESCS         -0.336    -0.409    -0.264 < .001
##  7 GRADE        -0.379    -0.469    -0.289 < .001
##  8 FLAG_MISS     0.369     0.244     0.494 < .001
##  9 ACH1PV       -0.748    -0.818    -0.678 < .001
## 10 INDIG:ACH1PV  0.388     0.18      0.596 < .001

h2_everything in from hyp 1 and 2:

## # A tibble: 14 × 5
##    Parameter        Est `-95% CI` `+95% CI` p     
##    <chr>          <dbl>     <dbl>     <dbl> <chr> 
##  1 (Intercept)    1.98      1.05      2.91  < .001
##  2 GEO           -0.54     -0.669    -0.411 < .001
##  3 INDIG          0.602     0.154     1.05  0.009 
##  4 GENDER         0.469     0.358     0.581 < .001
##  5 COHORT2       -0.326    -0.477    -0.175 < .001
##  6 ESCS          -0.351    -0.426    -0.277 < .001
##  7 GRADE         -0.379    -0.469    -0.288 < .001
##  8 FLAG_MISS      0.371     0.245     0.496 < .001
##  9 ACH1PV        -0.745    -0.815    -0.674 < .001
## 10 GEO:INDIG      0.431     0.032     0.83  0.034 
## 11 INDIG:GENDER  -0.326    -0.73      0.077 0.11  
## 12 INDIG:COHORT2 -0.329    -0.714     0.057 0.092 
## 13 INDIG:ESCS     0.385     0.162     0.607 < .001
## 14 INDIG:ACH1PV   0.306     0.087     0.526 0.007

Figures hypothesis 2

#Figures faceted hypothesis 2: equal achievment

#Graph summarizing hyp1 results 


############################indiginous by location

h2_location_1 <- svyglm(DROPOUT ~ GEO*INDIG+INDIG+COHORT+GENDER+GRADE+GEO+ACH1PV+ESCS+FLAG_MISS,design = lsay,family = quasibinomial())


geo_h2 <- ggeffects::ggpredict(h2_location_1, terms = c("INDIG","GEO"))

geo_h2_out <- data.frame(prob = geo_h2$predicted, ci.low = geo_h2$conf.low, ci.high = geo_h2$conf.high,
                         indig = rep(c("non-Indigenous","Indigenous"),each=2), geo = rep(c("   Prov  ","Urban"), 2))


geo_plot_2 <- geo_h2_out %>%
  ggplot(aes(x=geo, y=prob, ymin=ci.low, ymax=ci.high)) +
  geom_pointrange() + 
  #geom_hline(yintercept=0, lty=2) +  # add a dotted line at x=1 after flip
  coord_flip() +  # flip coordinates (puts labels on y axis)
  xlab("Indigenous") + ylab("Probability") +
  facet_wrap(~indig) +
  theme_stata() + xlab("") + ylab("Probability of not completing high-school") +
  ggtitle("Geography by Indigenous Status")

#geo_plot

##############################indig by cohort

h2_cohort_1 <- svyglm(DROPOUT ~ COHORT*INDIG+INDIG+COHORT+GENDER+GRADE+GEO+ACH1PV+ESCS+FLAG_MISS,design = lsay,family = quasibinomial())


Cohort_h2_1<- ggeffects::ggpredict(h2_cohort_1, terms = c("INDIG","COHORT"))

Cohort_h2_1_out <- data.frame(prob = Cohort_h2_1$predicted, ci.low = Cohort_h2_1$conf.low, ci.high = Cohort_h2_1$conf.high,
                         indig = rep(c("non-Indigenous","Indigenous"),each=2), Cohort = rep(c("  2008","2010"), 2))


cohort_plot_2 <- Cohort_h2_1_out %>%
  ggplot(aes(x=Cohort, y=prob, ymin=ci.low, ymax=ci.high)) +
  geom_pointrange() + 
  #geom_hline(yintercept=0, lty=2) +  # add a dotted line at x=1 after flip
  coord_flip() +  # flip coordinates (puts labels on y axis)
  xlab("Indigenous") + ylab("Probability") +
  facet_wrap(~indig) +
  theme_stata() + xlab("") + ylab("Probability of not completing high-school") +
  ggtitle("Cohort by Indigenous Status")

#two columns one with seperate , one with all

#####ses by indig


h2_SES_1 <- svyglm(DROPOUT ~ ESCS*INDIG+INDIG+COHORT+GENDER+GRADE+GEO+ACH1PV+ESCS+FLAG_MISS,design = lsay,family = quasibinomial())



ses_h2 <- ggeffects::ggpredict(h2_SES_1, terms = c("INDIG","ESCS [-2,-1,0,1,2]"))




ses_h1_out_1 <- data.frame(prob = ses_h2$predicted, ci.low = ses_h2$conf.low, ci.high = ses_h2$conf.high,
                         indig = rep(c("non-Indigenous","Indigenous"),each=5), ses = rep(-2:2, 2))

ses_plot_2 <- ses_h1_out_1 %>%
  ggplot(aes(x=ses, y=prob, ymin=ci.low, ymax=ci.high)) +
  geom_pointrange() + 
  #geom_hline(yintercept=0, lty=2) +  # add a dotted line at x=1 after flip
  coord_flip() +  # flip coordinates (puts labels on y axis)
  xlab("Indigenous") + ylab("Probability") +
  facet_wrap(~indig) +
  theme_stata() + xlab("SES") + ylab("Probability of not completing high-school") +
  ggtitle("SES by Indiginous Status")


####Indiginous x achieve


h2_acheive_1 <- svyglm(DROPOUT ~ ACH1PV*INDIG+INDIG+COHORT+GENDER+GRADE+GEO+ACH1PV+ESCS+FLAG_MISS,design = lsay,family = quasibinomial())




Achieve_h2_1<- ggeffects::ggpredict(h2_acheive_1, terms = c("INDIG","ACH1PV [-2,-1,0,1,2]"))

Achieve_h2_out_1 <- data.frame(prob = Achieve_h2_1$predicted, ci.low = Achieve_h2_1$conf.low, ci.high = Achieve_h2_1$conf.high,
                         indig = rep(c("non-Indigenous","Indigenous"),each=5), ACH1PV = rep(-2:2, 2))

acheive_plot_2 <- Achieve_h2_out_1  %>%
  ggplot(aes(x=ACH1PV, y=prob, ymin=ci.low, ymax=ci.high)) +
  geom_pointrange() + 
  #geom_hline(yintercept=0, lty=2) +  # add a dotted line at x=1 after flip
  coord_flip() +  # flip coordinates (puts labels on y axis)
  xlab("Indigenous") + ylab("Probability") +
  facet_wrap(~indig) +
  theme_stata() + xlab("Achievement") + ylab("Probability of not completing high-school") +
  ggtitle("Achievement by Indiginous Status")



 
  
###face_wrap cohort and indiginous group indigionous  face row n =1 line up with each 

tmp<-  { cohort_plot_2+ geo_plot_2  +ses_plot_2 +  acheive_plot_2 + plot_layout(ncol=2)} + plot_layout(ncol=2)
tmp

ggsave("/Users/jociarrochi/Dropbox/to do/lsay/r syntax/hyp2Facet.png",width=10, height=8, dpi=600, tmp)

High school completion: Moderators of the gap between indiginous and non-indiginous youth

2021-10-22