Summary

Hi folks, Im just cleaning up the file now, making sure that we will be able to directly convert this to a word document


Descriptives (still working on this)

## 
##    0    1 
## 9781  589
## 
##     0     1 
## 13108  1143
## 
##    1    2 
## 5149 5221
## 
##    1    2 
## 7231 7020
##                            Variable       non-Indigenous           Indigenous
## 1                         Dropout %               14.82%               23.68%
## 2                       Cohort 2003               98.23%                1.77%
## 3                       Cohort 2009               96.88%                3.12%
## 4                           Girls %               50.37%               52.74%
## 5                           Urban %               50.93%               49.07%
## 6               Year 10 or Higher %               90.55%               89.36%
## 7          Achievement Index (Mean) -0.27 [-0.30, -0.23] -1.03 [-1.14, -0.93]
## 8 Socioeconomic Status Index (Mean)    0.31 [0.28, 0.33] -0.09 [-0.15, -0.02]

use same as paper

Hypothesis set 1:

H1a: Indigenous Australian Children have higher dropout rates

## Joining, by = "term"

A tibble: 7 × 5

Variables Log Odds -95% CI +95% CI p 1 Intercept 41.6 22.2 61.0 0
2 Indigenous 0.901 -1.76 3.56 0.501 3 Cohort -1.75 -3.93 0.437 0.115 4 Grade -5.36 -7.60 -3.13 0
5 Urban 0.698 -1.25 2.64 0.476 6 Gender (Boys) 1.05 -1.89 3.99 0.48 7 SES 0.089 -0.976 1.15 0.869

H1_gender: Is the gap the same for boys and girls

## Joining, by = "term"
## # A tibble: 8 × 5
##   Variables     `Log Odds` `-95% CI` `+95% CI`     p
##   <chr>              <dbl>     <dbl>     <dbl> <dbl>
## 1 Intercept          5.99      4.74      7.25  0    
## 2 Indigenous         0.644     0.22      1.07  0.003
## 3 Gender (Boys)      0.628     0.485     0.771 0    
## 4 Cohort            -0.255    -0.43     -0.08  0.005
## 5 Grade             -0.756    -0.878    -0.634 0    
## 6 Urban             -0.527    -0.693    -0.362 0    
## 7 SES               -0.678    -0.782    -0.575 0    
## 8 IndXGen           -0.435    -1.06      0.193 0.172

H1_location: Is the gap the same for urban and rural

## Joining, by = "term"
## # A tibble: 8 × 5
##   Variables     `Log Odds` `-95% CI` `+95% CI`     p
##   <chr>              <dbl>     <dbl>     <dbl> <dbl>
## 1 Intercept          6.00      4.75      7.26  0    
## 2 Indigenous         0.229    -0.13      0.588 0.207
## 3 Urban             -0.543    -0.709    -0.376 0    
## 4 Cohort            -0.255    -0.429    -0.08  0.005
## 5 Grade             -0.755    -0.876    -0.633 0    
## 6 Gender (Boys)      0.613     0.474     0.751 0    
## 7 SES               -0.678    -0.781    -0.574 0    
## 8 IndXLoc            0.439    -0.126     1.00  0.126

H1_ses: Is the gap the same for Rich and poor and rural

## Rows: 156 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): term, Vars
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining, by = "term"
## # A tibble: 8 × 5
##   Variables     `Log Odds` `-95% CI` `+95% CI`     p
##   <chr>              <dbl>     <dbl>     <dbl> <dbl>
## 1 Intercept          6.00      4.74      7.25  0    
## 2 Indigenous         0.489     0.193     0.784 0.002
## 3 SES               -0.686    -0.791    -0.58  0    
## 4 Cohort            -0.256    -0.431    -0.081 0.005
## 5 Grade             -0.755    -0.877    -0.633 0    
## 6 Urban             -0.527    -0.693    -0.361 0    
## 7 Gender (Boys)      0.613     0.474     0.751 0    
## 8 IndXSES            0.307    -0.016     0.631 0.062

H1_all three: Gap varies as a function of socioeconomic status, gender and locaion

## Joining, by = "term"
## # A tibble: 10 × 5
##    Variables     `Log Odds` `-95% CI` `+95% CI`     p
##    <chr>              <dbl>     <dbl>     <dbl> <dbl>
##  1 Intercept          5.99      4.73      7.24  0    
##  2 Indigenous         0.529     0.07      0.987 0.024
##  3 SES               -0.686    -0.792    -0.581 0    
##  4 Urban             -0.542    -0.709    -0.375 0    
##  5 Gender (Boys)      0.63      0.487     0.772 0    
##  6 Cohort            -0.254    -0.429    -0.079 0.005
##  7 Grade             -0.755    -0.876    -0.633 0    
##  8 IndXSES            0.297    -0.018     0.613 0.064
##  9 IndXLoc            0.407    -0.141     0.955 0.143
## 10 IndXGen           -0.446    -1.05      0.158 0.145

hypotheses 1_ cohort interaction only. This gap narrowed as a function on a change in legislation that increased the effective age of first selection to 17 between 2008 and 2010.

## Joining, by = "term"
## # A tibble: 8 × 5
##   Variables     `Log Odds` `-95% CI` `+95% CI`     p
##   <chr>              <dbl>     <dbl>     <dbl> <dbl>
## 1 Intercept          6.00      4.74      7.25  0    
## 2 SES               -0.678    -0.781    -0.574 0    
## 3 Urban             -0.528    -0.694    -0.362 0    
## 4 Gender (Boys)      0.613     0.474     0.752 0    
## 5 Cohort            -0.25     -0.431    -0.069 0.007
## 6 Indigenous         0.516     0.159     0.873 0.005
## 7 Grade             -0.755    -0.877    -0.634 0    
## 8 IndXCoh           -0.163    -0.768     0.442 0.593

Hypothesis 1_all three interactions and cohort interaction.

## Joining, by = "term"
## # A tibble: 11 × 5
##    Variables     `Log Odds` `-95% CI` `+95% CI`     p
##    <chr>              <dbl>     <dbl>     <dbl> <dbl>
##  1 Intercept          5.98      4.72      7.24  0    
##  2 Indigenous         0.636     0.131     1.14  0.014
##  3 SES               -0.687    -0.792    -0.581 0    
##  4 Urban             -0.543    -0.711    -0.376 0    
##  5 Gender (Boys)      0.63      0.487     0.773 0    
##  6 Cohort            -0.246    -0.428    -0.065 0.009
##  7 Grade             -0.754    -0.876    -0.632 0    
##  8 IndXSES            0.311    -0.002     0.623 0.052
##  9 IndXLoc            0.421    -0.125     0.968 0.129
## 10 IndXGen           -0.439    -1.04      0.166 0.152
## 11 IndXCoh           -0.207    -0.789     0.375 0.48


Figures hypothesis 1

hypothesis set 2

Hypothesis 2: Indigenous disadvantage still present when comparing equally advantaged and equally achieving Indigenous and non-Indigenous Youth (controling for achivement)

## # A tibble: 8 × 5
##   Parameter      Est `-95% CI` `+95% CI` p     
##   <chr>        <dbl>     <dbl>     <dbl> <chr> 
## 1 (Intercept)  2.78      1.46      4.10  < .001
## 2 GEO         -0.588    -0.752    -0.424 < .001
## 3 ESCS        -0.348    -0.458    -0.237 < .001
## 4 GENDER       0.579     0.428     0.729 < .001
## 5 INDIG        0.055    -0.259     0.369 0.727 
## 6 COHORT2     -0.349    -0.528    -0.169 < .001
## 7 GRADE       -0.475    -0.603    -0.347 < .001
## 8 ACH1PV      -0.782    -0.869    -0.694 < .001

Hypothesis 2_ses. Indiginous gap (controlling for achivement) varies as a function of ses

## # A tibble: 9 × 5
##   Parameter      Est `-95% CI` `+95% CI` p     
##   <chr>        <dbl>     <dbl>     <dbl> <chr> 
## 1 (Intercept)  2.78      1.46      4.10  < .001
## 2 GEO         -0.588    -0.751    -0.424 < .001
## 3 GENDER       0.579     0.428     0.729 < .001
## 4 INDIG        0.1      -0.211     0.41  0.523 
## 5 ESCS        -0.354    -0.466    -0.241 < .001
## 6 COHORT2     -0.349    -0.528    -0.17  < .001
## 7 GRADE       -0.475    -0.603    -0.347 < .001
## 8 ACH1PV      -0.781    -0.869    -0.694 < .001
## 9 INDIG:ESCS   0.224    -0.158     0.606 0.244

hypothess_2_location

## # A tibble: 9 × 5
##   Parameter      Est `-95% CI` `+95% CI` p     
##   <chr>        <dbl>     <dbl>     <dbl> <chr> 
## 1 (Intercept)  2.78      1.47      4.10  < .001
## 2 GEO         -0.607    -0.77     -0.444 < .001
## 3 INDIG       -0.182    -0.558     0.194 0.336 
## 4 GENDER       0.579     0.428     0.729 < .001
## 5 ESCS        -0.347    -0.458    -0.237 < .001
## 6 COHORT2     -0.347    -0.526    -0.168 < .001
## 7 GRADE       -0.474    -0.602    -0.346 < .001
## 8 ACH1PV      -0.782    -0.869    -0.695 < .001
## 9 GEO:INDIG    0.532    -0.1       1.16  0.096

hypothess_2_gender

## # A tibble: 9 × 5
##   Parameter       Est `-95% CI` `+95% CI` p     
##   <chr>         <dbl>     <dbl>     <dbl> <chr> 
## 1 (Intercept)   2.77      1.45      4.08  < .001
## 2 GEO          -0.588    -0.752    -0.424 < .001
## 3 GENDER        0.599     0.443     0.755 < .001
## 4 INDIG         0.342    -0.123     0.807 0.145 
## 5 ESCS         -0.348    -0.458    -0.237 < .001
## 6 COHORT2      -0.347    -0.527    -0.168 < .001
## 7 GRADE        -0.475    -0.603    -0.346 < .001
## 8 ACH1PV       -0.783    -0.87     -0.696 < .001
## 9 GENDER:INDIG -0.568    -1.26      0.124 0.104

Hypotheis2_legislation–This gap narrowed as a function on a change in legislation that increased the effective age of first selection to 17 between 2008 and 2010.

## # A tibble: 9 × 5
##   Parameter        Est `-95% CI` `+95% CI` p     
##   <chr>          <dbl>     <dbl>     <dbl> <chr> 
## 1 (Intercept)    2.78      1.46      4.10  < .001
## 2 GEO           -0.588    -0.752    -0.424 < .001
## 3 GENDER         0.579     0.428     0.73  < .001
## 4 COHORT2       -0.346    -0.53     -0.162 < .001
## 5 INDIG          0.098    -0.292     0.488 0.617 
## 6 ESCS          -0.348    -0.458    -0.237 < .001
## 7 GRADE         -0.475    -0.603    -0.347 < .001
## 8 ACH1PV        -0.782    -0.869    -0.694 < .001
## 9 COHORT2:INDIG -0.077    -0.738     0.584 0.817

h2_achivement moderation. Does this gap vary as a function of academic achievement. That is, is the gap between similarly low achieving Indigenous and non-Indigenous children the same as the gap between similarly high achieving Indigenous and non-Indigenous children.

## # A tibble: 9 × 5
##   Parameter       Est `-95% CI` `+95% CI` p     
##   <chr>         <dbl>     <dbl>     <dbl> <chr> 
## 1 (Intercept)   2.77      1.46      4.09  < .001
## 2 GEO          -0.588    -0.752    -0.425 < .001
## 3 GENDER        0.58      0.429     0.731 < .001
## 4 COHORT2      -0.35     -0.53     -0.171 < .001
## 5 INDIG         0.451     0.028     0.874 0.036 
## 6 ESCS         -0.346    -0.457    -0.236 < .001
## 7 GRADE        -0.475    -0.603    -0.347 < .001
## 8 ACH1PV       -0.791    -0.879    -0.703 < .001
## 9 INDIG:ACH1PV  0.31      0.042     0.578 0.023

h2_everything in from hyp 1 and 2:

## # A tibble: 13 × 5
##    Parameter        Est `-95% CI` `+95% CI` p     
##    <chr>          <dbl>     <dbl>     <dbl> <chr> 
##  1 (Intercept)    2.76      1.44      4.07  < .001
##  2 GEO           -0.609    -0.772    -0.445 < .001
##  3 INDIG          0.501    -0.073     1.08  0.085 
##  4 GENDER         0.6       0.444     0.756 < .001
##  5 COHORT2       -0.344    -0.529    -0.159 < .001
##  6 ESCS          -0.351    -0.464    -0.238 < .001
##  7 GRADE         -0.473    -0.601    -0.345 < .001
##  8 ACH1PV        -0.791    -0.879    -0.702 < .001
##  9 GEO:INDIG      0.515    -0.06      1.09  0.077 
## 10 INDIG:GENDER  -0.518    -1.16      0.119 0.108 
## 11 INDIG:COHORT2 -0.095    -0.701     0.511 0.755 
## 12 INDIG:ESCS     0.157    -0.214     0.527 0.4   
## 13 INDIG:ACH1PV   0.258    -0.022     0.538 0.07

Figures hypothesis 2


#Figures faceted hypothesis 2: equal achievment

#Graph summarizing hyp1 results 


############################indiginous by location

h2_location_1 <- svyglm(DROPOUT ~ GEO*INDIG+INDIG+COHORT+GENDER+GRADE+GEO+ACH1PV+ESCS,design = lsay,family = quasibinomial())


geo_h2 <- ggeffects::ggpredict(h2_location_1, terms = c("INDIG","GEO"))

geo_h2_out <- data.frame(prob = geo_h2$predicted, ci.low = geo_h2$conf.low, ci.high = geo_h2$conf.high,
                         indig = rep(c("non-Indigenous","Indigenous"),each=2), geo = rep(c("   Prov  ","Urban"), 2))


geo_plot_2 <- geo_h2_out %>%
  ggplot(aes(x=geo, y=prob, ymin=ci.low, ymax=ci.high)) +
  geom_pointrange() + 
  #geom_hline(yintercept=0, lty=2) +  # add a dotted line at x=1 after flip
  coord_flip() +  # flip coordinates (puts labels on y axis)
  xlab("Indigenous") + ylab("Probability") +
  facet_wrap(~indig) +
  theme_stata() + xlab("") + ylab("Probability of not completing high-school") +
  ggtitle("Geography by Indigenous Status")

#geo_plot

##############################indig by cohort

h2_cohort_1 <- svyglm(DROPOUT ~ COHORT*INDIG+INDIG+COHORT+GENDER+GRADE+GEO+ACH1PV+ESCS,design = lsay,family = quasibinomial())


Cohort_h2_1<- ggeffects::ggpredict(h2_cohort_1, terms = c("INDIG","COHORT"))

Cohort_h2_1_out <- data.frame(prob = Cohort_h2_1$predicted, ci.low = Cohort_h2_1$conf.low, ci.high = Cohort_h2_1$conf.high,
                         indig = rep(c("non-Indigenous","Indigenous"),each=2), Cohort = rep(c("  2008","2010"), 2))


cohort_plot_2 <- Cohort_h2_1_out %>%
  ggplot(aes(x=Cohort, y=prob, ymin=ci.low, ymax=ci.high)) +
  geom_pointrange() + 
  #geom_hline(yintercept=0, lty=2) +  # add a dotted line at x=1 after flip
  coord_flip() +  # flip coordinates (puts labels on y axis)
  xlab("Indigenous") + ylab("Probability") +
  facet_wrap(~indig) +
  theme_stata() + xlab("") + ylab("Probability of not completing high-school") +
  ggtitle("Cohort by Indigenous Status")

#two columns one with seperate , one with all

#####ses by indig


h2_SES_1 <- svyglm(DROPOUT ~ ESCS*INDIG+INDIG+COHORT+GENDER+GRADE+GEO+ACH1PV+ESCS,design = lsay,family = quasibinomial())



ses_h2 <- ggeffects::ggpredict(h2_SES_1, terms = c("INDIG","ESCS [-2,-1,0,1,2]"))




ses_h1_out_1 <- data.frame(prob = ses_h2$predicted, ci.low = ses_h2$conf.low, ci.high = ses_h2$conf.high,
                         indig = rep(c("non-Indigenous","Indigenous"),each=5), ses = rep(-2:2, 2))

ses_plot_2 <- ses_h1_out_1 %>%
  ggplot(aes(x=ses, y=prob, ymin=ci.low, ymax=ci.high)) +
  geom_pointrange() + 
  #geom_hline(yintercept=0, lty=2) +  # add a dotted line at x=1 after flip
  coord_flip() +  # flip coordinates (puts labels on y axis)
  xlab("Indigenous") + ylab("Probability") +
  facet_wrap(~indig) +
  theme_stata() + xlab("SES") + ylab("Probability of not completing high-school") +
  ggtitle("SES by Indiginous Status")


####Indiginous x achieve


h2_acheive_1 <- svyglm(DROPOUT ~ ACH1PV*INDIG+INDIG+COHORT+GENDER+GRADE+GEO+ACH1PV+ESCS,design = lsay,family = quasibinomial())




Achieve_h2_1<- ggeffects::ggpredict(h2_acheive_1, terms = c("INDIG","ACH1PV [-2,-1,0,1,2]"))

Achieve_h2_out_1 <- data.frame(prob = Achieve_h2_1$predicted, ci.low = Achieve_h2_1$conf.low, ci.high = Achieve_h2_1$conf.high,
                         indig = rep(c("non-Indigenous","Indigenous"),each=5), ACH1PV = rep(-2:2, 2))

acheive_plot_2 <- Achieve_h2_out_1  %>%
  ggplot(aes(x=ACH1PV, y=prob, ymin=ci.low, ymax=ci.high)) +
  geom_pointrange() + 
  #geom_hline(yintercept=0, lty=2) +  # add a dotted line at x=1 after flip
  coord_flip() +  # flip coordinates (puts labels on y axis)
  xlab("Indigenous") + ylab("Probability") +
  facet_wrap(~indig) +
  theme_stata() + xlab("Achievement") + ylab("Probability of not completing high-school") +
  ggtitle("Achievement by Indiginous Status")



 
  
###face_wrap cohort and indiginous group indigionous  face row n =1 line up with each 

tmp<-  { cohort_plot_2+ geo_plot_2  +ses_plot_2 +  acheive_plot_2 + plot_layout(ncol=2)} + plot_layout(ncol=2)
tmp

ggsave("/Users/jociarrochi/Dropbox/to do/lsay/r syntax/hyp2Facet.png",width=10, height=8, dpi=600, tmp)



tmp