Let’s just look at trying to make some single factor lavaan models work

Start with 2016 only data

 #read in the data for 2016 only
Data <-read.csv('../cleaned_data/2016_data.csv')
str(Data)
## 'data.frame':    7474 obs. of  54 variables:
##  $ postcode       : int  800 800 800 810 810 810 812 812 812 820 ...
##  $ state          : Factor w/ 12 levels "ACT","NSW","NSW/ACT",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ year           : int  2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 ...
##  $ age            : int  1 2 5 5 1 2 1 5 2 2 ...
##  $ pc_immun       : Factor w/ 9 levels "<70.0","70.0-74.9",..: 6 3 6 6 7 6 6 7 5 4 ...
##  $ caution        : int  1 1 1 0 0 0 0 0 0 0 ...
##  $ pc_immun_class : int  6 3 6 6 7 6 6 7 5 4 ...
##  $ PHN_code       : Factor w/ 31 levels "PHN101","PHN102",..: 30 30 30 30 30 30 30 30 30 30 ...
##  $ PHN_number     : int  701 701 701 701 701 701 701 701 701 701 ...
##  $ IEO_MAXS       : int  1163 1163 1163 1181 1181 1181 1061 1061 1061 1214 ...
##  $ IEO_MINS       : int  1023 1023 1023 858 858 858 916 916 916 622 ...
##  $ IEO_RWAD       : int  9 9 9 8 8 8 6 6 6 9 ...
##  $ IEO_RWAP       : int  87 87 87 77 77 77 58 58 58 87 ...
##  $ IEO_RWAR       : int  2287 2287 2287 2001 2001 2001 1520 1520 1520 2263 ...
##  $ IEO_RWSD       : int  10 10 10 9 9 9 6 6 6 9 ...
##  $ IEO_RWSP       : int  92 92 92 84 84 84 56 56 56 89 ...
##  $ IEO_RWSR       : int  33 33 33 30 30 30 20 20 20 32 ...
##  $ IEO_SCORE      : int  1089 1089 1089 1045 1045 1045 997 997 997 1085 ...
##  $ IEO_URP        : int  6464 6464 6464 33302 33302 33302 18873 18873 18873 19447 ...
##  $ IER_MAXS       : int  1019 1019 1019 1162 1162 1162 1122 1122 1122 1159 ...
##  $ IER_MINS       : int  751 751 751 779 779 779 735 735 735 407 ...
##  $ IER_RWAD       : int  2 2 2 6 6 6 6 6 6 6 ...
##  $ IER_RWAP       : int  16 16 16 58 58 58 57 57 57 56 ...
##  $ IER_RWAR       : int  412 412 412 1510 1510 1510 1485 1485 1485 1453 ...
##  $ IER_RWSD       : int  6 6 6 8 8 8 7 7 7 7 ...
##  $ IER_RWSP       : int  51 51 51 76 76 76 70 70 70 67 ...
##  $ IER_RWSR       : int  18 18 18 27 27 27 25 25 25 24 ...
##  $ IER_SCORE      : int  946 946 946 1014 1014 1014 1013 1013 1013 1011 ...
##  $ IER_URP        : int  6464 6464 6464 33302 33302 33302 18873 18873 18873 19447 ...
##  $ IRSAD_MAXS     : int  1167 1167 1167 1168 1168 1168 1130 1130 1130 1211 ...
##  $ IRSAD_MINS     : int  914 914 914 868 868 868 782 782 782 523 ...
##  $ IRSAD_RWAD     : int  10 10 10 9 9 9 7 7 7 10 ...
##  $ IRSAD_RWAP     : int  92 92 92 81 81 81 69 69 69 91 ...
##  $ IRSAD_RWAR     : int  2398 2398 2398 2121 2121 2121 1800 1800 1800 2389 ...
##  $ IRSAD_RWSD     : int  10 10 10 8 8 8 7 7 7 9 ...
##  $ IRSAD_RWSP     : int  92 92 92 73 73 73 62 62 62 89 ...
##  $ IRSAD_RWSR     : int  33 33 33 26 26 26 22 22 22 32 ...
##  $ IRSAD_SCORE    : int  1096 1096 1096 1052 1052 1052 1020 1020 1020 1094 ...
##  $ IRSAD_URP      : int  6464 6464 6464 33302 33302 33302 18873 18873 18873 19447 ...
##  $ IRSD_MAXS      : int  1125 1125 1125 1146 1146 1146 1111 1111 1111 1167 ...
##  $ IRSD_MINS      : int  842 842 842 832 832 832 720 720 720 403 ...
##  $ IRSD_RWAD      : int  9 9 9 8 8 8 6 6 6 9 ...
##  $ IRSD_RWAP      : int  86 86 86 72 72 72 59 59 59 89 ...
##  $ IRSD_RWAR      : int  2243 2243 2243 1883 1883 1883 1535 1535 1535 2323 ...
##  $ IRSD_RWSD      : int  8 8 8 7 7 7 6 6 6 9 ...
##  $ IRSD_RWSP      : int  78 78 78 67 67 67 59 59 59 87 ...
##  $ IRSD_RWSR      : int  28 28 28 24 24 24 21 21 21 31 ...
##  $ IRSD_SCORE     : int  1066 1066 1066 1037 1037 1037 1013 1013 1013 1073 ...
##  $ IRSD_URP       : int  6464 6464 6464 33302 33302 33302 18873 18873 18873 19447 ...
##  $ electorate     : Factor w/ 150 levels "Adelaide","Aston",..: 136 136 136 136 136 136 136 136 136 136 ...
##  $ total_tax      : num  389375576 389375576 389375576 1367379789 1367379789 ...
##  $ num_individuals: int  5464 5464 5464 21128 21128 21128 11509 11509 11509 13252 ...
##  $ mean_tax_000s  : num  71.3 71.3 71.3 64.7 64.7 ...
##  $ political_score: num  4 4 4 4 4 4 4 4 4 4 ...
#Filter and clean data to get only the variables we really want

geo_data <- Data %>%
  select(postcode, state, year, age, pc_immun, caution, pc_immun_class, PHN_number, IEO_SCORE, IER_SCORE, IRSAD_SCORE, IRSD_SCORE, electorate, mean_tax_000s, political_score)
str(geo_data)
## 'data.frame':    7474 obs. of  15 variables:
##  $ postcode       : int  800 800 800 810 810 810 812 812 812 820 ...
##  $ state          : Factor w/ 12 levels "ACT","NSW","NSW/ACT",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ year           : int  2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 ...
##  $ age            : int  1 2 5 5 1 2 1 5 2 2 ...
##  $ pc_immun       : Factor w/ 9 levels "<70.0","70.0-74.9",..: 6 3 6 6 7 6 6 7 5 4 ...
##  $ caution        : int  1 1 1 0 0 0 0 0 0 0 ...
##  $ pc_immun_class : int  6 3 6 6 7 6 6 7 5 4 ...
##  $ PHN_number     : int  701 701 701 701 701 701 701 701 701 701 ...
##  $ IEO_SCORE      : int  1089 1089 1089 1045 1045 1045 997 997 997 1085 ...
##  $ IER_SCORE      : int  946 946 946 1014 1014 1014 1013 1013 1013 1011 ...
##  $ IRSAD_SCORE    : int  1096 1096 1096 1052 1052 1052 1020 1020 1020 1094 ...
##  $ IRSD_SCORE     : int  1066 1066 1066 1037 1037 1037 1013 1013 1013 1073 ...
##  $ electorate     : Factor w/ 150 levels "Adelaide","Aston",..: 136 136 136 136 136 136 136 136 136 136 ...
##  $ mean_tax_000s  : num  71.3 71.3 71.3 64.7 64.7 ...
##  $ political_score: num  4 4 4 4 4 4 4 4 4 4 ...

Remove zeros from pc_immun_class

This seems an impossible task - ask Kirsty or Jarod tonight

#make electorate and state take a numerical class value in some way
#ASK TONIGHT

First 2016 model - not very good

pc.model <- 'pc_immun_class =~ postcode + PHN_number + IRSAD_SCORE'
pc.fit <- cfa(model=pc.model,
                  data = geo_data)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING: some observed variances are larger than 1000000
##   lavaan NOTE: use varTable(fit) to investigate
summary(pc.fit, standardized = TRUE, fit.measures = TRUE)
## lavaan 0.6-3 ended normally after 8 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                          6
## 
##                                                   Used       Total
##   Number of observations                          7471        7474
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                    6917.557
##   Degrees of freedom                                 0
##   Minimum Function Value               0.4629606128181
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             6917.558
##   Degrees of freedom                                 3
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -156861.729
##   Loglikelihood unrestricted model (H1)     -153402.950
## 
##   Number of free parameters                          6
##   Akaike (AIC)                              313735.458
##   Bayesian (BIC)                            313776.970
##   Sample-size adjusted Bayesian (BIC)       313757.903
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent Confidence Interval          0.000  0.000
##   P-value RMSEA <= 0.05                             NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.324
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                     Estimate     Std.Err    z-value  P(>|z|)   Std.lv   
##   pc_immun_class =~                                                     
##     postcode              1.000                                    0.224
##     PHN_number            0.050 543802.767    0.000    1.000       0.011
##     IRSAD_SCORE          -0.006  58350.984   -0.000    1.000      -0.001
##   Std.all
##          
##     0.000
##     0.000
##    -0.000
## 
## Variances:
##                    Estimate     Std.Err    z-value  P(>|z|)   Std.lv   
##    .postcode       2247616.205 553050.810    4.064    0.000 2247616.205
##    .PHN_number       26460.640   1421.910   18.609    0.000   26460.640
##    .IRSAD_SCORE       5823.976     97.088   59.986    0.000    5823.976
##     pc_immun_class       0.050 551826.810    0.000    1.000       1.000
##   Std.all
##     1.000
##     1.000
##     1.000
##     1.000
lavInspect(pc.fit, what="estimates")
## $lambda
##             pc_mm_
## postcode     1.000
## PHN_number   0.050
## IRSAD_SCORE -0.006
## 
## $theta
##             postcd      PHN_nm      IRSAD_     
## postcode    2247616.205                        
## PHN_number        0.000   26460.640            
## IRSAD_SCORE       0.000       0.000    5823.976
## 
## $psi
##                pc_mm_
## pc_immun_class 0.05
semPaths(pc.fit,  what = "stand", rotation = 4)

You can also embed plots, for example:

semCors(pc.fit)

Try the same single factor model as first model usng 2015 data

 #read in the data for 2015 only
#try this to stop data confusion
char_data <- read.csv('../cleaned_data/2015_data.csv', stringsAsFactors=F)
num_data <- data.frame(data.matrix(char_data))
## Warning in data.matrix(char_data): NAs introduced by coercion

## Warning in data.matrix(char_data): NAs introduced by coercion

## Warning in data.matrix(char_data): NAs introduced by coercion

## Warning in data.matrix(char_data): NAs introduced by coercion
numeric_columns <- sapply(num_data,function(x){mean(as.numeric(is.na(x)))<0.5})
final_data <- data.frame(num_data[,numeric_columns], char_data[,!numeric_columns])
str(final_data)
## 'data.frame':    7260 obs. of  54 variables:
##  $ postcode       : num  800 800 800 810 810 810 812 812 812 820 ...
##  $ year           : num  2015 2015 2015 2015 2015 ...
##  $ age            : num  1 5 2 2 1 5 2 5 1 2 ...
##  $ caution        : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ pc_immun_class : num  5 5 3 6 6 6 6 6 7 6 ...
##  $ PHN_number     : num  701 701 701 701 701 701 701 701 701 701 ...
##  $ IEO_MAXS       : num  1167 1167 1167 1252 1252 ...
##  $ IEO_MINS       : num  1066 1066 1066 872 872 ...
##  $ IEO_RWAD       : num  9 9 9 8 8 8 7 7 7 9 ...
##  $ IEO_RWAP       : num  85 85 85 79 79 79 65 65 65 85 ...
##  $ IEO_RWAR       : num  2089 2089 2089 1958 1958 ...
##  $ IEO_RWSD       : num  10 10 10 10 10 10 8 8 8 10 ...
##  $ IEO_RWSP       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ IEO_RWSR       : num  28 28 28 27 27 27 21 21 21 29 ...
##  $ IEO_SCORE      : num  1077 1077 1077 1051 1051 ...
##  $ IEO_URP        : num  0 0 0 0 0 0 0 0 0 3 ...
##  $ IER_MAXS       : num  986 986 986 1123 1123 ...
##  $ IER_MINS       : num  927 927 927 743 743 743 734 734 734 404 ...
##  $ IER_RWAD       : num  3 3 3 6 6 6 7 7 7 6 ...
##  $ IER_RWAP       : num  21 21 21 56 56 56 61 61 61 57 ...
##  $ IER_RWAR       : num  501 501 501 1388 1388 ...
##  $ IER_RWSD       : num  6 6 6 8 8 8 8 8 8 8 ...
##  $ IER_RWSP       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ IER_RWSR       : num  15 15 15 21 21 21 23 23 23 22 ...
##  $ IER_SCORE      : num  952 952 952 1008 1008 ...
##  $ IER_URP        : num  0 0 0 0 0 0 0 0 0 3 ...
##  $ IRSAD_MAXS     : num  1144 1144 1144 1131 1131 ...
##  $ IRSAD_MINS     : num  1051 1051 1051 803 803 ...
##  $ IRSAD_RWAD     : num  9 9 9 8 8 8 7 7 7 9 ...
##  $ IRSAD_RWAP     : num  87 87 87 75 75 75 67 67 67 88 ...
##  $ IRSAD_RWAR     : num  2136 2136 2136 1840 1840 ...
##  $ IRSAD_RWSD     : num  9 9 9 8 8 8 8 8 8 10 ...
##  $ IRSAD_RWSP     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ IRSAD_RWSR     : num  26 26 26 22 22 22 21 21 21 28 ...
##  $ IRSAD_SCORE    : num  1072 1072 1072 1037 1037 ...
##  $ IRSAD_URP      : num  4564 4564 4564 29725 29725 ...
##  $ IRSD_MAXS      : num  1127 1127 1127 1123 1123 ...
##  $ IRSD_MINS      : num  1032 1032 1032 768 768 ...
##  $ IRSD_RWAD      : num  9 9 9 7 7 7 6 6 6 9 ...
##  $ IRSD_RWAP      : num  83 83 83 67 67 67 60 60 60 86 ...
##  $ IRSD_RWAR      : num  2053 2053 2053 1656 1656 ...
##  $ IRSD_RWSD      : num  9 9 9 8 8 8 8 8 8 9 ...
##  $ IRSD_RWSP      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ IRSD_RWSR      : num  25 25 25 22 22 22 21 21 21 26 ...
##  $ IRSD_SCORE     : num  1060 1060 1060 1027 1027 ...
##  $ IRSD_URP       : num  0 0 0 0 0 0 0 0 0 3 ...
##  $ total_tax      : num  389375576 389375576 389375576 1367379789 1367379789 ...
##  $ num_individuals: num  5464 5464 5464 21128 21128 ...
##  $ mean_tax_000s  : num  71.3 71.3 71.3 64.7 64.7 ...
##  $ political_score: num  6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5 ...
##  $ state          : chr  "NT" "NT" "NT" "NT" ...
##  $ pc_immun       : chr  "85.0-89.9" "85.0-89.9" "75.0-79.9" "90.0-92.4" ...
##  $ PHN_code       : chr  "PHN701" "PHN701" "PHN701" "PHN701" ...
##  $ electorate     : chr  "Solomon" "Solomon" "Solomon" "Solomon" ...
#Our geo factor is measured by PHN, IRSAD score and pc immun class
geo_2015.model <- 'pc_immun_class =~ postcode + PHN_number + IRSAD_SCORE'
geo_2015.fit <- cfa(model = geo_2015.model,
                  data = final_data)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING: some observed variances are larger than 1000000
##   lavaan NOTE: use varTable(fit) to investigate
summary(geo_2015.fit, standardized = TRUE, fit.measures = TRUE)
## lavaan 0.6-3 ended normally after 8 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                          6
## 
##                                                   Used       Total
##   Number of observations                          7257        7260
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                    6725.242
##   Degrees of freedom                                 0
##   Minimum Function Value               0.4633624320893
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             6725.242
##   Degrees of freedom                                 3
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -152404.198
##   Loglikelihood unrestricted model (H1)     -149041.576
## 
##   Number of free parameters                          6
##   Akaike (AIC)                              304820.395
##   Bayesian (BIC)                            304861.734
##   Sample-size adjusted Bayesian (BIC)       304842.667
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent Confidence Interval          0.000  0.000
##   P-value RMSEA <= 0.05                             NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.320
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                     Estimate     Std.Err     z-value  P(>|z|)   Std.lv   
##   pc_immun_class =~                                                      
##     postcode              1.000                                     0.224
##     PHN_number            0.043  938907.382    0.000    1.000       0.010
##     IRSAD_SCORE          -0.003   68164.660   -0.000    1.000      -0.001
##   Std.all
##          
##     0.000
##     0.000
##    -0.000
## 
## Variances:
##                    Estimate     Std.Err     z-value  P(>|z|)   Std.lv   
##    .postcode       2236493.080 1090367.414    2.051    0.040 2236493.080
##    .PHN_number       26388.243    2076.885   12.706    0.000   26388.243
##    .IRSAD_SCORE       5926.914      99.164   59.769    0.000    5926.914
##     pc_immun_class       0.050 1089735.103    0.000    1.000       1.000
##   Std.all
##     1.000
##     1.000
##     1.000
##     1.000
lavInspect(geo_2015.fit, what="estimates")
## $lambda
##             pc_mm_
## postcode     1.000
## PHN_number   0.043
## IRSAD_SCORE -0.003
## 
## $theta
##             postcd      PHN_nm      IRSAD_     
## postcode    2236493.080                        
## PHN_number        0.000   26388.243            
## IRSAD_SCORE       0.000       0.000    5926.914
## 
## $psi
##                pc_mm_
## pc_immun_class 0.05
semPaths(geo_2015.fit,  what = "stand", rotation = 4)

You can also embed plots, for example:

semCors(geo_2015.fit)

#modification indices (rmsea and srmr need to be below 0.10)
#look at loading and variances to check whether they are improbable or just don't relate to the latent variable, if loadings are over point 3 (variance will be large in comparison to the data)
#var(final_data)

Not a great geo model

QUESTIONS: How can I correctly set the weight of these to become properly measured manifest variables?

Third model

#Try a single factor model on the sentiment of trust

trust.model <- 'score =~ IRSAD_SCORE + political_score + mean_tax_000s + postcode'
trust.fit <- cfa(model=trust.model,
                  data = geo_data)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, :
## lavaan WARNING: some observed variances are (at least) a factor 1000 times
## larger than others; use varTable(fit) to investigate
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING: some observed variances are larger than 1000000
##   lavaan NOTE: use varTable(fit) to investigate
## Warning in lavaan::lavaan(model = trust.model, data = geo_data, model.type
## = "cfa", : lavaan WARNING: the optimizer warns that a solution has NOT been
## found!
summary(trust.fit, standardized = TRUE, fit.measures = TRUE)
## lavaan 0.6-3 did NOT end normally after 10000 iterations
## ** WARNING ** Estimates below are most likely unreliable
## 
##   Optimization method                           NLMINB
##   Number of free parameters                          8
## 
##                                                   Used       Total
##   Number of observations                          7286        7474
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                          NA
##   Degrees of freedom                                NA
##   P-value                                           NA
## Warning in .local(object, ...): lavaan WARNING: fit measures not available if model did not converge
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate     Std.Err  z-value  P(>|z|)   Std.lv   
##   score =~                                                           
##     IRSAD_SCORE          1.000                                  1.906
##     political_scor      -0.003       NA                        -0.006
##     mean_tax_000s      237.702       NA                       453.073
##     postcode             1.152       NA                         2.195
##   Std.all
##          
##     0.025
##    -0.004
##    26.874
##     0.001
## 
## Variances:
##                    Estimate     Std.Err  z-value  P(>|z|)   Std.lv   
##    .IRSAD_SCORE       5747.355       NA                      5747.355
##    .political_scor       2.715       NA                         2.715
##    .mean_tax_000s  -204990.801       NA                   -204990.801
##    .postcode       2227970.825       NA                   2227970.825
##     score                3.633       NA                         1.000
##   Std.all
##     0.999
##     1.000
##  -721.214
##     1.000
##     1.000
semCors(trust.fit)

semPaths(trust.fit,  what = "stand", rotation = 4)

semCors(trust.fit, include="difference")

##This isn’t really working Let’s see if we can replicate the geo modelling at SA3 level ##Fourth model

#try this to stop data confusion
char_data <- read.csv('../raw_data/sa3_immune.csv', stringsAsFactors=F)
num_data <- data.frame(data.matrix(char_data))
## Warning in data.matrix(char_data): NAs introduced by coercion

## Warning in data.matrix(char_data): NAs introduced by coercion

## Warning in data.matrix(char_data): NAs introduced by coercion

## Warning in data.matrix(char_data): NAs introduced by coercion

## Warning in data.matrix(char_data): NAs introduced by coercion

## Warning in data.matrix(char_data): NAs introduced by coercion
numeric_columns <- sapply(num_data,function(x){mean(as.numeric(is.na(x)))<0.5})
final_data <- data.frame(num_data[,numeric_columns], char_data[,!numeric_columns])
str(final_data)
## 'data.frame':    5997 obs. of  10 variables:
##  $ SA3_code               : num  10101 10102 10103 10104 10201 ...
##  $ year_start             : num  2016 2016 2016 2016 2016 ...
##  $ year_close             : num  2017 2017 2017 2017 2017 ...
##  $ age                    : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ num_registered         : num  844 797 204 624 2014 ...
##  $ num_fully_immunised    : num  808 767 193 581 1904 ...
##  $ num_not_fully_immunised: num  36 30 11 43 110 68 24 28 29 34 ...
##  $ percent_fully_immunised: num  95.7 96.2 94.6 93.1 94.5 96.6 95.9 96.2 94.9 96.2 ...
##  $ state                  : chr  "NSW" "NSW" "NSW" "NSW" ...
##  $ SA3_name               : chr  "Goulburn - Yass" "Queanbeyan" "Snowy Mountains" "South Coast" ...
#Try a single factor model
#Our geo factor is measured by PHN, IRSAD score and pc immun class
sa3.model <- 'sa3 =~ SA3_code + year_start + age + num_not_fully_immunised + num_fully_immunised + percent_fully_immunised'
sa3.fit <- cfa(model=sa3.model,
                  data = final_data)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, :
## lavaan WARNING: some observed variances are (at least) a factor 1000 times
## larger than others; use varTable(fit) to investigate
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING: some observed variances are larger than 1000000
##   lavaan NOTE: use varTable(fit) to investigate
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING:
##     Could not compute standard errors! The information matrix could
##     not be inverted. This may be a symptom that the model is not
##     identified.
summary(sa3.fit, standardized = TRUE, fit.measures = TRUE)
## lavaan 0.6-3 ended normally after 8 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         12
## 
##                                                   Used       Total
##   Number of observations                          5823        5997
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                   15133.237
##   Degrees of freedom                                 9
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic            15133.237
##   Degrees of freedom                                15
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.000
##   Tucker-Lewis Index (TLI)                      -0.667
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -179463.826
##   Loglikelihood unrestricted model (H1)     -171897.207
## 
##   Number of free parameters                         12
##   Akaike (AIC)                              358951.652
##   Bayesian (BIC)                            359031.687
##   Sample-size adjusted Bayesian (BIC)       358993.554
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.537
##   90 Percent Confidence Interval          0.530  0.544
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.220
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate       Std.Err  z-value  P(>|z|)   Std.lv     
##   sa3 =~                                                                 
##     SA3_code               1.000                                    0.224
##     year_start            -0.000       NA                          -0.000
##     age                   -0.000       NA                          -0.000
##     nm_nt_flly_mmn        -0.009       NA                          -0.002
##     num_flly_mmnsd        -0.115       NA                          -0.026
##     prcnt_flly_mmn        -0.000       NA                          -0.000
##   Std.all
##          
##     0.000
##    -0.000
##    -0.000
##    -0.000
##    -0.000
##    -0.000
## 
## Variances:
##                    Estimate       Std.Err  z-value  P(>|z|)   Std.lv     
##    .SA3_code       301155142.396       NA                   301155142.396
##    .year_start             2.914       NA                           2.914
##    .age                    2.888       NA                           2.888
##    .nm_nt_flly_mmn      3580.560       NA                        3580.560
##    .num_flly_mmnsd    350482.667       NA                      350482.667
##    .prcnt_flly_mmn         7.456       NA                           7.456
##     sa3                    0.050       NA                           1.000
##   Std.all
##     1.000
##     1.000
##     1.000
##     1.000
##     1.000
##     1.000
##     1.000
#plot it
lavInspect(sa3.fit, what="estimates")
## $lambda
##                            sa3
## SA3_code                 1.000
## year_start               0.000
## age                      0.000
## num_not_fully_immunised -0.009
## num_fully_immunised     -0.115
## percent_fully_immunised  0.000
## 
## $theta
##                         SA3_cd        yr_str        age          
## SA3_code                301155142.396                            
## year_start                      0.000         2.914              
## age                             0.000         0.000         2.888
## num_not_fully_immunised         0.000         0.000         0.000
## num_fully_immunised             0.000         0.000         0.000
## percent_fully_immunised         0.000         0.000         0.000
##                         nm_n__        nm_fl_        prcn__       
## SA3_code                                                         
## year_start                                                       
## age                                                              
## num_not_fully_immunised      3580.560                            
## num_fully_immunised             0.000    350482.667              
## percent_fully_immunised         0.000         0.000         7.456
## 
## $psi
##     sa3 
## sa3 0.05
semPaths(sa3.fit,  what = "stand", rotation = 4)

semCors(sa3.fit, include="difference")