Update on qualitative coding

Item #13 - Please explain the one or two technology competencies that you consider as being most important to your individual work as a teacher educator.

Loading, setting up

In this section, we start out by doing:

library(tidyverse)
library(ggridges)
library(corrr)
library(clipr)
library(psych)
library(lavaan)
library(broom)

usethis::use_git_ignore("*.csv")
## ✔ Setting active project to '/Users/joshuarosenberg/Documents/tetc-analysis'

Read the data:

# d <- read_csv("tetcs-survey-export.csv", skip = 0)
d <- read_csv("full-tetcs-dataset.csv")
## Parsed with column specification:
## cols(
##   .default = col_character()
## )
## See spec(...) for full column specifications.
d <- d %>% slice(-c(1:2))

Check that the data loaded:

d
## # A tibble: 337 x 65
##    StartDate EndDate Status IPAddress Progress `Duration (in s… Finished
##    <chr>     <chr>   <chr>  <chr>     <chr>    <chr>            <chr>   
##  1 2018-09-… 2018-0… IP Ad… 152.33.5… 100      499              True    
##  2 2018-09-… 2018-0… IP Ad… 152.33.6… 100      1569             True    
##  3 2018-09-… 2018-0… IP Ad… 152.33.7… 100      667              True    
##  4 2018-09-… 2018-0… IP Ad… 134.139.… 100      555              True    
##  5 2018-09-… 2018-0… IP Ad… 152.33.1… 100      600              True    
##  6 2018-09-… 2018-0… IP Ad… 49.199.2… 100      1006             True    
##  7 2018-09-… 2018-0… IP Ad… 120.151.… 100      491              True    
##  8 2018-09-… 2018-0… IP Ad… 1.128.10… 100      868              True    
##  9 2018-09-… 2018-0… IP Ad… 121.222.… 100      674              True    
## 10 2018-09-… 2018-0… IP Ad… 144.133.… 100      616              True    
## # … with 327 more rows, and 58 more variables: RecordedDate <chr>,
## #   ResponseId <chr>, RecipientLastName <chr>, RecipientFirstName <chr>,
## #   RecipientEmail <chr>, ExternalReference <chr>, LocationLatitude <chr>,
## #   LocationLongitude <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, Q2 <chr>, Q3 <chr>, Q3_8_TEXT <chr>, Q4 <chr>,
## #   Q5_1 <chr>, Q5_2 <chr>, Q6 <chr>, Q6_6_TEXT <chr>, Q7 <chr>,
## #   Q7_7_TEXT <chr>, Q8 <chr>, Q8_12_TEXT <chr>, Q9_4 <chr>, Q9_5 <chr>,
## #   Q9_6 <chr>, Q9_7 <chr>, `Q10#1_1` <chr>, `Q10#1_2` <chr>,
## #   `Q10#1_3` <chr>, `Q10#1_4` <chr>, `Q10#1_5` <chr>, Q11_1 <chr>,
## #   Q11_2 <chr>, Q11_3 <chr>, Q11_4 <chr>, Q11_5 <chr>, Q11_6 <chr>,
## #   Q11_7 <chr>, Q11_8 <chr>, Q11_9 <chr>, Q11_10 <chr>, Q11_11 <chr>,
## #   Q11_12 <chr>, Q12 <chr>, Q13 <chr>, Q14 <chr>, Q15 <chr>, Q16 <chr>,
## #   Q17 <chr>, Q18_1 <chr>, Q18_2 <chr>, Q18_3 <chr>, Q18_4 <chr>,
## #   Q18_5 <chr>, Q18_6 <chr>, Q18_7 <chr>, Q18_8 <chr>, Q18_9 <chr>

Process the TETCs variables:

ds <- d %>% 
    select(Q11_1:Q11_12) %>% 
    set_names(1:12) %>% 
    mutate_all(str_extract, "\\(?[0-9,.]+\\)?") %>% 
    mutate_all(as.integer) %>% 
    set_names(str_c("v", 1:12))

ds %>% 
    summarize_all(funs(mean, sd), na.rm = TRUE) %>%
    gather(key, val) %>% 
    separate(key, into = c("var", "stat")) %>% 
    spread(stat, val) %>% 
    mutate(TETC = str_sub(var, start = 2),
           TETC = as.integer(TETC)) %>% 
    arrange(TETC) %>% 
    mutate(TETC = str_c("TETC", TETC)) %>% 
    mutate(mean = round(mean, 2),
           sd = round(sd, 2)) %>% 
    mutate(mean_sd = str_c(mean, " (", sd, ")")) %>% 
    select(mean_sd)
## # A tibble: 12 x 1
##    mean_sd    
##    <chr>      
##  1 3.66 (1.11)
##  2 3.79 (1.03)
##  3 3.86 (0.98)
##  4 4.21 (0.89)
##  5 3.54 (1.11)
##  6 3.69 (1.05)
##  7 3.74 (1.15)
##  8 3.06 (1.31)
##  9 3.41 (1.17)
## 10 3.59 (1.2) 
## 11 3.22 (1.37)
## 12 3.87 (1.15)
# c <- cbind(ds)
# 
# g <- ds %>% 
#     gather(key, val)
# 
# summary(aov(val ~ key, g))
rec_f <- function(x) {
    recode(x, 
           `Major asset I experience / have experienced.` = 5,
           `Asset I experience / have experienced.` = 4,
           `Neutral / neither a barrier nor an asset.` = 3,
           `Barrier I experience / have experienced.` = 2,
           `Major barrier I experience / have experienced.` = 1
           )
}
    
dd <- d %>% 
    select(Q18_1:Q18_9) %>% 
    mutate_all(rec_f)

ddd <- dd %>% 
    gather(key, val) %>% 
    mutate(key = str_replace(key, "v", "Asset/Barier")) %>% 
    group_by(key) %>% 
    summarize(mean_val = mean(val, na.rm = TRUE),
              sd_val = sd(val, na.rm = TRUE)) %>% 
    arrange(desc(mean_val))

dd %>% 
    gather(key, val) %>% 
    ggplot() +
    geom_jitter(aes(x = key, y = val), 
                alpha = .5, 
                color = "gray", 
                height = 0.2) +
    geom_point(data = ddd,
               aes(x = key, y = mean_val),
               size = 2.5) +
    geom_errorbar(data = ddd,
                  aes(x = key, y = mean_val,
                      ymin = mean_val-sd_val,
                      ymax = mean_val+sd_val)) +
    theme_bw() +
    ylab("Value") +
    xlab(NULL) +
    theme(text = element_text(size = 14),
          axis.text.x = element_text(angle = 45, hjust = 1)) +
    scale_x_discrete(limits = c("Q18_7", "Q18_9", "Q18_8", "Q18_6", "Q18_1", "Q18_3", "Q18_4", "Q18_5", "Q18_2"))

ggsave('assets-barriers-plot.png', width = 8, height = 6)

EFA

scree(dd[complete.cases(dd), ])

f <- fac(dd[complete.cases(dd), ], nfactors = 2)
## Loading required namespace: GPArotation
print(f)
## Factor Analysis using method =  minres
## Call: fac(r = dd[complete.cases(dd), ], nfactors = 2)
## Standardized loadings (pattern matrix) based upon correlation matrix
##         MR1   MR2   h2   u2 com
## Q18_1  0.22  0.56 0.46 0.54 1.3
## Q18_2  0.28  0.51 0.46 0.54 1.5
## Q18_3  0.16  0.61 0.48 0.52 1.1
## Q18_4 -0.11  0.83 0.62 0.38 1.0
## Q18_5 -0.10  0.71 0.46 0.54 1.0
## Q18_6  0.37  0.37 0.39 0.61 2.0
## Q18_7  0.67  0.11 0.52 0.48 1.1
## Q18_8  0.93 -0.03 0.85 0.15 1.0
## Q18_9  0.89 -0.01 0.79 0.21 1.0
## 
##                        MR1  MR2
## SS loadings           2.58 2.44
## Proportion Var        0.29 0.27
## Cumulative Var        0.29 0.56
## Proportion Explained  0.51 0.49
## Cumulative Proportion 0.51 1.00
## 
##  With factor correlations of 
##      MR1  MR2
## MR1 1.00 0.41
## MR2 0.41 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  36  and the objective function was  4.53 with Chi Square of  1462.71
## The degrees of freedom for the model are 19  and the objective function was  0.44 
## 
## The root mean square of the residuals (RMSR) is  0.06 
## The df corrected root mean square of the residuals is  0.08 
## 
## The harmonic number of observations is  328 with the empirical chi square  75.38  with prob <  1.1e-08 
## The total number of observations was  328  with Likelihood Chi Square =  143.03  with prob <  4.8e-21 
## 
## Tucker Lewis Index of factoring reliability =  0.835
## RMSEA index =  0.143  and the 90 % confidence intervals are  0.12 0.163
## BIC =  32.97
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy             
##                                                    MR1  MR2
## Correlation of (regression) scores with factors   0.96 0.91
## Multiple R square of scores with factors          0.92 0.83
## Minimum correlation of possible factor scores     0.83 0.67

CFA

mod <- 'access  =~ Q18_1 + Q18_2 + Q18_3
             vision =~ Q18_4 + Q18_5 + Q18_6
             personal   =~ Q18_7 + Q18_8 +Q18_9'

c2a <- cfa(mod, data = dd)
summary(c2a, fit.measures = TRUE)
## lavaan 0.6-3 ended normally after 28 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         21
## 
##                                                   Used       Total
##   Number of observations                           328         337
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                     127.682
##   Degrees of freedom                                24
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             1484.588
##   Degrees of freedom                                36
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.928
##   Tucker-Lewis Index (TLI)                       0.893
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -3813.350
##   Loglikelihood unrestricted model (H1)      -3749.508
## 
##   Number of free parameters                         21
##   Akaike (AIC)                                7668.699
##   Bayesian (BIC)                              7748.352
##   Sample-size adjusted Bayesian (BIC)         7681.741
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.115
##   90 Percent Confidence Interval          0.096  0.135
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.098
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   access =~                                           
##     Q18_1             1.000                           
##     Q18_2             1.010    0.081   12.398    0.000
##     Q18_3             0.886    0.071   12.413    0.000
##   vision =~                                           
##     Q18_4             1.000                           
##     Q18_5             0.872    0.073   11.904    0.000
##     Q18_6             0.561    0.069    8.184    0.000
##   personal =~                                         
##     Q18_7             1.000                           
##     Q18_8             1.347    0.088   15.336    0.000
##     Q18_9             1.308    0.086   15.157    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   access ~~                                           
##     vision            0.608    0.078    7.808    0.000
##     personal          0.385    0.056    6.864    0.000
##   vision ~~                                           
##     personal          0.221    0.046    4.759    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Q18_1             0.655    0.073    9.035    0.000
##    .Q18_2             0.742    0.079    9.414    0.000
##    .Q18_3             0.567    0.060    9.391    0.000
##    .Q18_4             0.336    0.065    5.214    0.000
##    .Q18_5             0.481    0.059    8.137    0.000
##    .Q18_6             0.884    0.074   11.945    0.000
##    .Q18_7             0.515    0.044   11.744    0.000
##    .Q18_8             0.123    0.030    4.111    0.000
##    .Q18_9             0.210    0.031    6.682    0.000
##     access            0.924    0.123    7.486    0.000
##     vision            0.886    0.109    8.122    0.000
##     personal          0.502    0.071    7.101    0.000
modb <- 'access  =~ Q18_1 + Q18_2 + Q18_3
             vision =~ Q18_4 + Q18_5
             personal   =~ Q18_7 + Q18_8 +Q18_9'

c2b <- cfa(modb, data = dd)
summary(c2b, fit.measures = TRUE)
## lavaan 0.6-3 ended normally after 34 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         19
## 
##                                                   Used       Total
##   Number of observations                           328         337
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                      35.342
##   Degrees of freedom                                17
##   P-value (Chi-square)                           0.006
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             1323.569
##   Degrees of freedom                                28
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.986
##   Tucker-Lewis Index (TLI)                       0.977
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -3357.441
##   Loglikelihood unrestricted model (H1)      -3339.770
## 
##   Number of free parameters                         19
##   Akaike (AIC)                                6752.882
##   Bayesian (BIC)                              6824.949
##   Sample-size adjusted Bayesian (BIC)         6764.681
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.057
##   90 Percent Confidence Interval          0.030  0.084
##   P-value RMSEA <= 0.05                          0.296
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.034
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   access =~                                           
##     Q18_1             1.000                           
##     Q18_2             1.010    0.082   12.331    0.000
##     Q18_3             0.891    0.072   12.406    0.000
##   vision =~                                           
##     Q18_4             1.000                           
##     Q18_5             0.790    0.080    9.851    0.000
##   personal =~                                         
##     Q18_7             1.000                           
##     Q18_8             1.351    0.088   15.312    0.000
##     Q18_9             1.308    0.086   15.127    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   access ~~                                           
##     vision            0.601    0.078    7.678    0.000
##     personal          0.383    0.056    6.850    0.000
##   vision ~~                                           
##     personal          0.190    0.047    4.062    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Q18_1             0.659    0.073    9.046    0.000
##    .Q18_2             0.746    0.079    9.419    0.000
##    .Q18_3             0.561    0.060    9.298    0.000
##    .Q18_4             0.217    0.089    2.427    0.015
##    .Q18_5             0.527    0.069    7.686    0.000
##    .Q18_7             0.517    0.044   11.753    0.000
##    .Q18_8             0.121    0.030    4.022    0.000
##    .Q18_9             0.212    0.032    6.730    0.000
##     access            0.920    0.123    7.457    0.000
##     vision            1.005    0.129    7.815    0.000
##     personal          0.501    0.071    7.090    0.000
fs <- lavaan::lavPredict(c2b, newdata = dd)

names(fs) <- str_c("fs", 1:3)

mod1 <- 'access_vision  =~ Q18_1 + Q18_2 + Q18_3 + Q18_4 + Q18_5 + Q18_6
             personal   =~ Q18_7 + Q18_8 +Q18_9'

c3 <- cfa(mod1, data = dd)
summary(c3, fit.measures = TRUE)
## lavaan 0.6-3 ended normally after 25 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         19
## 
##                                                   Used       Total
##   Number of observations                           328         337
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                     206.917
##   Degrees of freedom                                26
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             1484.588
##   Degrees of freedom                                36
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.875
##   Tucker-Lewis Index (TLI)                       0.827
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -3852.967
##   Loglikelihood unrestricted model (H1)      -3749.508
## 
##   Number of free parameters                         19
##   Akaike (AIC)                                7743.933
##   Bayesian (BIC)                              7816.001
##   Sample-size adjusted Bayesian (BIC)         7755.733
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.146
##   90 Percent Confidence Interval          0.128  0.164
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.085
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   access_vision =~                                    
##     Q18_1             1.000                           
##     Q18_2             1.006    0.085   11.831    0.000
##     Q18_3             0.906    0.075   12.134    0.000
##     Q18_4             0.763    0.072   10.591    0.000
##     Q18_5             0.650    0.070    9.302    0.000
##     Q18_6             0.668    0.070    9.518    0.000
##   personal =~                                         
##     Q18_7             1.000                           
##     Q18_8             1.332    0.086   15.435    0.000
##     Q18_9             1.307    0.086   15.284    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   access_vision ~~                                    
##     personal          0.371    0.054    6.857    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Q18_1             0.724    0.073    9.984    0.000
##    .Q18_2             0.818    0.080   10.278    0.000
##    .Q18_3             0.589    0.059    9.956    0.000
##    .Q18_4             0.724    0.065   11.175    0.000
##    .Q18_5             0.793    0.068   11.731    0.000
##    .Q18_6             0.782    0.067   11.655    0.000
##    .Q18_7             0.510    0.044   11.707    0.000
##    .Q18_8             0.134    0.030    4.500    0.000
##    .Q18_9             0.203    0.031    6.484    0.000
##     access_vision     0.855    0.119    7.209    0.000
##     personal          0.508    0.071    7.149    0.000

Items:

dx <- ds[complete.cases(dd), ] %>% 
    mutate(id = 1:nrow(.)) %>% 
    gather(key, val, -id) %>% 
    group_by(id) %>% 
    summarize(mean_tetc = mean(val)) %>% 
    cbind(fs) %>% 
    as_tibble()

dx %>% 
    corrr::correlate() %>% 
    slice(3:5) %>% 
    select(-id)
## 
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'
## # A tibble: 3 x 5
##   rowname  mean_tetc access vision personal
##   <chr>        <dbl>  <dbl>  <dbl>    <dbl>
## 1 access       0.429 NA      0.707    0.629
## 2 vision       0.244  0.707 NA        0.299
## 3 personal     0.614  0.629  0.299   NA
dx %>% 
    as.matrix() %>% 
    Hmisc::rcorr()
##              id mean_tetc access vision personal
## id         1.00     -0.02  -0.01  -0.04     0.03
## mean_tetc -0.02      1.00   0.43   0.24     0.61
## access    -0.01      0.43   1.00   0.71     0.63
## vision    -0.04      0.24   0.71   1.00     0.30
## personal   0.03      0.61   0.63   0.30     1.00
## 
## n= 328 
## 
## 
## P
##           id     mean_tetc access vision personal
## id               0.6838    0.8301 0.4769 0.6307  
## mean_tetc 0.6838           0.0000 0.0000 0.0000  
## access    0.8301 0.0000           0.0000 0.0000  
## vision    0.4769 0.0000    0.0000        0.0000  
## personal  0.6307 0.0000    0.0000 0.0000

Demographics - RQ3

Grade level

m <- read_csv("grade-levels-m.csv")
## Parsed with column specification:
## cols(
##   Q7 = col_character(),
##   n = col_double(),
##   X3 = col_character()
## )
m <- m %>% set_names(c("Q7", "Q7n", "Q7_code"))

dg <- d %>% 
    left_join(m, by = "Q7") %>% 
    filter(complete.cases(dd)) %>% 
    cbind(dx) %>% 
    as_tibble()

summary(aov(access ~ Q7_code, data = dg))
##              Df Sum Sq Mean Sq F value Pr(>F)
## Q7_code       3   1.83  0.6116   0.794  0.498
## Residuals   324 249.53  0.7702
summary(aov(vision ~ Q7_code, data = dg))
##              Df Sum Sq Mean Sq F value Pr(>F)
## Q7_code       3   3.03  1.0107   1.164  0.323
## Residuals   324 281.27  0.8681
summary(aov(personal ~ Q7_code, data = dg))
##              Df Sum Sq Mean Sq F value  Pr(>F)   
## Q7_code       3   5.49  1.8309   4.035 0.00772 **
## Residuals   324 147.01  0.4537                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(aov(personal ~ Q7_code, data = dg))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = personal ~ Q7_code, data = dg)
## 
## $Q7_code
##                   diff        lwr         upr     p adj
## Elem-All   -0.28563405 -0.5536261 -0.01764203 0.0315911
## Other-All  -0.08059171 -0.3779460  0.21676256 0.8970633
## Sec-All    -0.26980336 -0.5181486 -0.02145809 0.0271995
## Other-Elem  0.20504234 -0.1427244  0.55280907 0.4251311
## Sec-Elem    0.01583069 -0.2910854  0.32274682 0.9991549
## Sec-Other  -0.18921165 -0.5220740  0.14365071 0.4581361
dg %>% 
    select(Q7_code, access, vision, personal) %>% 
    gather(key, val, -Q7_code) %>% 
    group_by(Q7_code, key) %>% 
    summarize(mean_val = mean(val)) %>% 
    ggplot(aes(x = key, y = mean_val, fill = Q7_code)) +
    geom_col(position = 'dodge')

Content area/subject

m <- read_csv("subject-m.csv")
## Parsed with column specification:
## cols(
##   Q8 = col_character(),
##   nn = col_double(),
##   code = col_character()
## )
m <- m %>% set_names(c("Q8", "Q8n", "Q8_code")) %>% mutate(Q8_code = ifelse(Q8_code == "STEM", "Science and Math", Q8_code))

dg <- d %>% 
    left_join(m, by = "Q8") %>% 
    filter(complete.cases(dd)) %>% 
    cbind(dx) %>% 
    as_tibble()

summary(aov(access ~ Q8_code, data = dg))
##              Df Sum Sq Mean Sq F value Pr(>F)  
## Q8_code       3   7.25   2.418   3.241 0.0225 *
## Residuals   280 208.89   0.746                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 44 observations deleted due to missingness
TukeyHSD(aov(access ~ Q8_code, data = dg))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = access ~ Q8_code, data = dg)
## 
## $Q8_code
##                                   diff         lwr       upr     p adj
## Other-Non-STEM               0.2663706 -0.07798734 0.6107285 0.1907295
## Science and Math-Non-STEM    0.1639589 -0.18428961 0.5122074 0.6166822
## Technology-Non-STEM          0.4773361  0.05953710 0.8951351 0.0178631
## Science and Math-Other      -0.1024117 -0.46824336 0.2634199 0.8876558
## Technology-Other             0.2109655 -0.22159869 0.6435297 0.5888775
## Technology-Science and Math  0.3133772 -0.12229054 0.7490450 0.2482323
summary(aov(vision ~ Q8_code, data = dg))
##              Df Sum Sq Mean Sq F value Pr(>F)
## Q8_code       3   2.82  0.9389   1.084  0.356
## Residuals   280 242.48  0.8660               
## 44 observations deleted due to missingness
summary(aov(personal ~ Q8_code, data = dg))
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Q8_code       3   8.38  2.7939   6.353 0.000352 ***
## Residuals   280 123.14  0.4398                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 44 observations deleted due to missingness
TukeyHSD(aov(personal ~ Q8_code, data = dg))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = personal ~ Q8_code, data = dg)
## 
## $Q8_code
##                                   diff         lwr       upr     p adj
## Other-Non-STEM              0.22940442 -0.03498935 0.4937982 0.1144596
## Science and Math-Non-STEM   0.24597695 -0.02140394 0.5133578 0.0837736
## Technology-Non-STEM         0.52766367  0.20688276 0.8484446 0.0001693
## Science and Math-Other      0.01657253 -0.26430849 0.2974535 0.9987341
## Technology-Other            0.29825925 -0.03385820 0.6303767 0.0957032
## Technology-Science and Math 0.28168672 -0.05281361 0.6161871 0.1323431
dg %>% 
    select(Q8_code, access, vision, personal) %>% 
    gather(key, val, -Q8_code) %>% 
    group_by(Q8_code, key) %>% 
    summarize(mean_val = mean(val)) %>% 
    ggplot(aes(x = key, y = mean_val, fill = Q8_code)) +
    geom_col(position = 'dodge')