library(dplyr)
library(tidyr)
library(purrr)
library(DT)

# merge docvars and surveydata
surv <- read.csv(file="career_imp.csv", header=T)
surv$Litho <- trimws(surv$Litho, which=c("both"))

  # calculate selected factors
  surv$engpc <- (surv$Q3Eng_m + surv$Q3Eng_n + surv$Q3Eng_k)/3
  surv$engint <- (surv$Q3Eng_i + surv$Q3Eng_h + surv$Q3Eng_j)/3
  surv$engrec <- (surv$Q3Eng_f + surv$Q3Eng_e + surv$Q3Eng_d + surv$Q3Eng_g)/4
  surv$belong1 <- (surv$Q4a + surv$Q4b + surv$Q4c + surv$Q4g + surv$Q4h)/5
  surv$belong2 <- (surv$Q4e + surv$Q4f)/2
  surv$engbel <- (surv$Q5d + surv$Q5h + surv$Q5g + surv$Q5e + surv$Q5b)/5
  surv$engemp <- (surv$Q5a + surv$Q5c + surv$Q5f)/3

  # subset needed columns
  names(surv)
  surv2 <- subset(surv, select=c(1,185:205,221:228,229:237,319:327,328:335,377:383))
  head(surv2)

  # calculate demographics
  surv2$racenum <- as.numeric(surv2$Q30a) + as.numeric(surv2$Q30b) + as.numeric(surv2$Q30c) + as.numeric(surv2$Q30d) + as.numeric(surv2$Q30e) + as.numeric(surv2$Q30f) + as.numeric(surv2$Q30g) + as.numeric(surv2$Q30h)
  surv2$raceeth[surv2$racenum == 0] <- NA
  surv2$raceeth[surv2$racenum == 2] <- "Biracial"
  surv2$raceeth[surv2$racenum > 2] <- "Multiracial"
  surv2$raceeth[surv2$racenum == 1 & surv2$Q30a == 1] <- "Asian"
  surv2$raceeth[surv2$racenum == 1 & surv2$Q30b == 1] <- "Black/AA"
  surv2$raceeth[surv2$racenum == 1 & surv2$Q30c == 1] <- "Latinx"
  surv2$raceeth[surv2$racenum == 1 & surv2$Q30d == 1] <- "MidEast"
  surv2$raceeth[surv2$racenum == 1 & surv2$Q30e == 1] <- "NH/PI"
  surv2$raceeth[surv2$racenum == 1 & surv2$Q30f == 1] <- "NA/AN"
  surv2$raceeth[surv2$racenum == 1 & surv2$Q30g == 1] <- "White"
  surv2$raceeth[surv2$racenum == 1 & surv2$Q30h == 1] <- "WriteIn"
  table(surv2$raceeth)
  
  surv2$gennum <- as.numeric(surv2$Q31a) + as.numeric(surv2$Q31b) + as.numeric(surv2$Q31c) + as.numeric(surv2$Q31d) + as.numeric(surv2$Q31e) + as.numeric(surv2$Q31f) + as.numeric(surv2$Q31g)
  table(surv2$gennum)
  surv2$genid[surv2$gennum == 0] <- NA
  surv2$genid[surv2$gennum > 1] <- "Multiple Options Selected"
  surv2$genid[surv2$gennum == 1 & surv2$Q31a == 1] <- "Female"
  surv2$genid[surv2$gennum == 1 & surv2$Q31b == 1] <- "Male"
  surv2$genid[surv2$gennum == 1 & surv2$Q31c == 1] <- "Agender"
  surv2$genid[surv2$gennum == 1 & surv2$Q31d == 1] <- "Genderqueer"
  surv2$genid[surv2$gennum == 1 & surv2$Q31e == 1] <- "Cisgender"
  surv2$genid[surv2$gennum == 1 & surv2$Q31f == 1] <- "Transgender"
  surv2$genid[surv2$gennum == 1 & surv2$Q31g == 1] <- "Not Listed"
  table(surv2$genid)
  subset(surv2, surv2$gennum > 1, select=c(28:35))

  # career priorities
  colnames(surv2)[2:39] <- c("cmn1","cmn2","cmn3","cmn4","cmn5","cmn6","cmn7","cmn8","cmn9","cmn10","cmn11","cmn12","cmn13","cmn14","cmn15","cmn16","cmn17","cmn18","cmn19","cmn20","cmn21",
                       "career_money",
                       "career_known",
                       "career_helping",
                       "career_supervising",
                       "career_security",
                       "career_people",
                       "career_invent",
                       "career_developing",
                       "field_academia",
                       "field_industry",
                       "field_entre",
                       "field_govt",
                       "field_k12",
                       "field_law",
                       "field_med",
                       "field_nonprofit",
                       "field_other")
  head(surv2)
  names(surv2)
  surv3 <- subset(surv2, select=c(1:39,57:63))
  head(surv3)
  names(surv3)
  
  library(psych)
  table(surv3$cmn3)
  recodeme <- c(1, 1, -1, 1, 1, -1, 1, -1, -1, 1, -1, 1, 1, -1, 1, -1, 1, 1, -1, 1, 1)
  surv4 <- surv3
  surv4[2:22] <- reverse.code(recodeme, surv3[2:22])
  head(surv4)

data <- surv4

rm(list=setdiff(ls(), c("data")))

Factor Analysis

Career Vars only

Field Vars only

CMN Vars only

Final Composites

Career

Q15. How important are the following factors for your future career satisfaction?

  • Finances
    • Q15e Having job security and opportunity (security)
    • Q15a Making money (money)
  • Leadership
    • Q15b Becoming well known (known)
    • Q15d Supervising others (supervising)
    • Q15f Working with people (people)
  • Innovation
    • Q15g Inventing/designing things (invent)
    • Q15h Developing new knowledge and skills (developing)
    • Q15c Helping others (helping)

CMN

  1. Thinking about your own actions, feelings and beliefs, please indicate how much you personally agree or disagree with each statement. There are no correct or wrong answers to the items. You should give the responses that most accurately describe your personal actions, feelings and beliefs. It is best if you respond with your first impression when answering.
  • Stoic
    • cmn6-Q13f = I like to talk about my feelings. *
    • cmn9-Q13i = I tend to share my feelings. *
  • Hetero
    • cmn4-Q13d = It would be awful if someone thought I was gay.
    • cmn7-Q13g = It is important to me that people think I am heterosexual.
  • Risks
    • cmn3-Q13c = In general, I do not like risky situations. *
    • cmn18-Q13r = I enjoy taking risks.
  • Help
    • cmn21-Q12u = It bothers me when I have to ask for help.
    • cmn17-Q13q = I never ask for help.
  • Violence
    • cmn8-Q13h = I believe that violence is never justified. *
    • cmn12-Q13l = Sometimes violent action is necessary.
  • Importance
    • cmn11-Q13k = I would hate to be important. *
    • cmn16-Q13p = I never do things to be an important person. *

In Progress

Several rows with same Litho number but not same values

rm(list=setdiff(ls(), c("data")))

data$car_fin <- (data$career_security + data$career_money)/2
data$car_lead <- (data$career_known  + data$career_supervising  + data$career_people)/3
data$car_inno <- (data$career_invent  + data$career_developing  + data$career_helping)/3
data$cmn_stoic <- (data$cmn6 + data$cmn9)/2
data$cmn_hetero <- (data$cmn4 + data$cmn7)/2
data$cmn_risks <- (data$cmn3 + data$cmn18)/2
data$cmn_help <- (data$cmn21 + data$cmn17)/2
data$cmn_violence <- (data$cmn8 + data$cmn12)/2
data$cmn_importance <- (data$cmn11 + data$cmn16)/2
head(data)
##   Litho cmn1 cmn2 cmn3 cmn4 cmn5 cmn6 cmn7 cmn8 cmn9 cmn10 cmn11 cmn12
## 1  1028    1    3    3    6    4    4    6    4    3     5     1     4
## 2  1030    4    2    5    6    3    1    6    1    1     3     1     5
## 3  1029    3    6    6    6    3    0    6    3    0     6     0     6
## 4  1054   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA
## 5  1027    0    1    4    6    3    4    6    0    2    NA    NA    NA
## 6  1039    1    1    3    5    1    1    5    4    1     3     3     4
##   cmn13 cmn14 cmn15 cmn16 cmn17 cmn18 cmn19 cmn20 cmn21 career_money
## 1     1     4     0     0     1     1     6     0     0            1
## 2     2     1    NA     2     4     4     2     1     4            4
## 3     6     3     0     0     0     3     6     3     0           NA
## 4    NA    NA    NA    NA    NA    NA    NA    NA    NA           NA
## 5    NA    NA    NA    NA    NA    NA    NA    NA    NA           NA
## 6     4     2     0     3     1     4     6     1     2            4
##   career_known career_helping career_supervising career_security
## 1            2              5                  5               5
## 2            4              5                  2               4
## 3           NA             NA                 NA              NA
## 4           NA             NA                 NA              NA
## 5           NA             NA                 NA              NA
## 6            3              5                  3               5
##   career_people career_invent career_developing field_academia
## 1             5             5                 5              5
## 2             4             4                 4              2
## 3            NA            NA                NA             NA
## 4            NA            NA                NA             NA
## 5            NA            NA                NA             NA
## 6             4             5                 5              5
##   field_industry field_entre field_govt field_k12 field_law field_med
## 1              5           0          4         0         3         3
## 2              5           4          4         1         4         4
## 3             NA          NA         NA        NA        NA        NA
## 4             NA          NA         NA        NA        NA        NA
## 5             NA          NA         NA        NA        NA        NA
## 6              5           1          1         1         1         4
##   field_nonprofit field_other    engpc   engint engrec belong1 belong2
## 1               4           5 4.000000 4.666667   5.00     4.4     4.5
## 2               1           5 4.333333 4.333333   4.00     3.6     4.5
## 3              NA          NA 6.000000 6.000000   3.00     5.8     6.0
## 4              NA          NA 4.666667 5.333333   5.00     4.8     5.0
## 5              NA          NA 6.000000 5.333333   3.25     4.8     5.5
## 6               2           3 4.666667 5.333333   3.75     4.6     4.0
##   engbel   engemp car_fin car_lead car_inno cmn_stoic cmn_hetero cmn_risks
## 1    4.6 4.666667     3.0 4.000000 5.000000       3.5          6       2.0
## 2    5.8 5.000000     4.0 3.333333 4.333333       1.0          6       4.5
## 3    6.0 6.000000      NA       NA       NA       0.0          6       4.5
## 4    5.2 5.000000      NA       NA       NA        NA         NA        NA
## 5    4.6 5.333333      NA       NA       NA       3.0          6        NA
## 6    4.6 3.666667     4.5 3.333333 5.000000       1.0          5       3.5
##   cmn_help cmn_violence cmn_importance
## 1      0.5          4.0            0.5
## 2      4.0          3.0            1.5
## 3      0.0          4.5            0.0
## 4       NA           NA             NA
## 5       NA           NA             NA
## 6      1.5          4.0            3.0
## Normality
d <- na.omit(data[40:55])
norm <- describe(d)
datatable(norm) %>%
  formatRound(1:13) %>%
  formatStyle(11:12, color = styleInterval(c(-2, 2), c('red', 'black', 'red')))
kurt <- cbind.data.frame(d$engint, d$car_fin, d$engbel, d$car_inno)
library(Hmisc)
hist.data.frame(kurt)

## Detect Outliers
d1 <- na.omit(data[40:55])
m_dist <- mahalanobis(d1, colMeans(d1), cov(d1))
d1$MD <- round(m_dist, 1)
plot(d1$MD)

d1$outlier <- "No"
d1$outlier[d1$MD > 40] <- "Yes"
table(d1$outlier)
## 
##   No  Yes 
## 2290   86
d$outlier <- d1$outlier
d2 <- subset(d1, outlier == "No", select=c(1:16))
d3 <- as.data.frame(scale(d2, center=TRUE, scale=TRUE))
d4 <- subset(d, outlier == "No", select=c(1:16))

allcor <- corr.test(d4, use = "pairwise", adjust = "holm")
allcor_r <- data.frame(allcor$r)

datatable(allcor_r) %>%
  formatRound(1:16) %>%
  formatStyle(1:16, backgroundColor = styleInterval(.75, c(none, 'yellow')))
library(corrplot)
col4 <- colorRampPalette(c("#7F0000", "red", "#FF7F00", "yellow", "#7FFF7F",
                           "cyan", "#007FFF", "blue", "#00007F"))
corrplot(allcor$r, p.mat = allcor$p, insig = "blank",
         method = "color", order = "hclust", col = col4(10))

library(sjPlot)
library(car)
d <- d4
Y <- d$belong1
fit <- lm(Y ~ d$cmn_help + d$cmn_importance + d$cmn_hetero + d$cmn_risks + d$cmn_violence + d$cmn_stoic, d=d)
summary(fit)
## 
## Call:
## lm(formula = Y ~ d$cmn_help + d$cmn_importance + d$cmn_hetero + 
##     d$cmn_risks + d$cmn_violence + d$cmn_stoic, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7482 -0.6406  0.2038  0.9129  2.1466 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.44377    0.13535  32.832  < 2e-16 ***
## d$cmn_help       -0.10637    0.01883  -5.649 1.81e-08 ***
## d$cmn_importance -0.13266    0.02138  -6.205 6.48e-10 ***
## d$cmn_hetero      0.04629    0.01393   3.322 0.000907 ***
## d$cmn_risks       0.14166    0.03492   4.057 5.14e-05 ***
## d$cmn_violence    0.05018    0.02957   1.697 0.089890 .  
## d$cmn_stoic      -0.01877    0.01548  -1.212 0.225466    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.201 on 2283 degrees of freedom
## Multiple R-squared:  0.04776,    Adjusted R-squared:  0.04526 
## F-statistic: 19.08 on 6 and 2283 DF,  p-value: < 2.2e-16
# diagnostics
  # distribution of studentized residuals
  library(MASS)
  layout(matrix(c(1,2,3,4),2,2))
  plot(fit)

  layout(matrix(c(1), byrow = TRUE))

  # Evaluate Collinearity
  vif(fit) # variance inflation factors
##       d$cmn_help d$cmn_importance     d$cmn_hetero      d$cmn_risks 
##         1.149100         1.144091         1.061365         1.083872 
##   d$cmn_violence      d$cmn_stoic 
##         1.115697         1.054945
  sqrt(vif(fit)) > 2 # problem?
##       d$cmn_help d$cmn_importance     d$cmn_hetero      d$cmn_risks 
##            FALSE            FALSE            FALSE            FALSE 
##   d$cmn_violence      d$cmn_stoic 
##            FALSE            FALSE
summary(fit)
## 
## Call:
## lm(formula = Y ~ d$cmn_help + d$cmn_importance + d$cmn_hetero + 
##     d$cmn_risks + d$cmn_violence + d$cmn_stoic, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7482 -0.6406  0.2038  0.9129  2.1466 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.44377    0.13535  32.832  < 2e-16 ***
## d$cmn_help       -0.10637    0.01883  -5.649 1.81e-08 ***
## d$cmn_importance -0.13266    0.02138  -6.205 6.48e-10 ***
## d$cmn_hetero      0.04629    0.01393   3.322 0.000907 ***
## d$cmn_risks       0.14166    0.03492   4.057 5.14e-05 ***
## d$cmn_violence    0.05018    0.02957   1.697 0.089890 .  
## d$cmn_stoic      -0.01877    0.01548  -1.212 0.225466    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.201 on 2283 degrees of freedom
## Multiple R-squared:  0.04776,    Adjusted R-squared:  0.04526 
## F-statistic: 19.08 on 6 and 2283 DF,  p-value: < 2.2e-16
plot_model(fit, type = "est") #output

##########

corrplot(allcor$r, p.mat = allcor$p, insig = "blank",
         method = "color", order = "hclust", col = col4(10))

Y <- d$belong2
fit <- lm(Y ~ d$cmn_help + d$cmn_importance + d$cmn_hetero + d$cmn_risks + d$cmn_violence + d$cmn_stoic, d=d)

# diagnostics

  layout(matrix(c(1,2,3,4),2,2))
  plot(fit)

  layout(matrix(c(1), byrow = TRUE))

  # Evaluate Collinearity
  vif(fit) # variance inflation factors
##       d$cmn_help d$cmn_importance     d$cmn_hetero      d$cmn_risks 
##         1.149100         1.144091         1.061365         1.083872 
##   d$cmn_violence      d$cmn_stoic 
##         1.115697         1.054945
  sqrt(vif(fit)) > 2 # problem?
##       d$cmn_help d$cmn_importance     d$cmn_hetero      d$cmn_risks 
##            FALSE            FALSE            FALSE            FALSE 
##   d$cmn_violence      d$cmn_stoic 
##            FALSE            FALSE
summary(fit)
## 
## Call:
## lm(formula = Y ~ d$cmn_help + d$cmn_importance + d$cmn_hetero + 
##     d$cmn_risks + d$cmn_violence + d$cmn_stoic, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.6895 -0.7104  0.1603  0.9326  2.4558 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.443759   0.133177  33.367  < 2e-16 ***
## d$cmn_help       -0.127450   0.018528  -6.879 7.77e-12 ***
## d$cmn_importance -0.121558   0.021038  -5.778 8.58e-09 ***
## d$cmn_hetero      0.042474   0.013709   3.098  0.00197 ** 
## d$cmn_risks       0.099528   0.034358   2.897  0.00381 ** 
## d$cmn_violence    0.077864   0.029100   2.676  0.00751 ** 
## d$cmn_stoic       0.009415   0.015231   0.618  0.53655    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.182 on 2283 degrees of freedom
## Multiple R-squared:  0.05173,    Adjusted R-squared:  0.04924 
## F-statistic: 20.76 on 6 and 2283 DF,  p-value: < 2.2e-16
plot_model(fit, type = "est") #output

##########

corrplot(allcor$r, p.mat = allcor$p, insig = "blank",
         method = "color", order = "hclust", col = col4(10))

Y <- (d$engint + d$engrec + d$engpc)/3
fit <- lm(Y ~ d$cmn_help + d$cmn_importance + d$cmn_hetero + d$cmn_risks + d$cmn_violence + d$cmn_stoic, d=d)

# diagnostics

  layout(matrix(c(1,2,3,4),2,2))
  plot(fit)

  layout(matrix(c(1), byrow = TRUE))

  # Evaluate Collinearity
  vif(fit) # variance inflation factors
##       d$cmn_help d$cmn_importance     d$cmn_hetero      d$cmn_risks 
##         1.149100         1.144091         1.061365         1.083872 
##   d$cmn_violence      d$cmn_stoic 
##         1.115697         1.054945
  sqrt(vif(fit)) > 2 # problem?
##       d$cmn_help d$cmn_importance     d$cmn_hetero      d$cmn_risks 
##            FALSE            FALSE            FALSE            FALSE 
##   d$cmn_violence      d$cmn_stoic 
##            FALSE            FALSE
summary(fit)
## 
## Call:
## lm(formula = Y ~ d$cmn_help + d$cmn_importance + d$cmn_hetero + 
##     d$cmn_risks + d$cmn_violence + d$cmn_stoic, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.3961 -0.5435  0.0882  0.6384  2.0434 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.272641   0.101372  42.148  < 2e-16 ***
## d$cmn_help       -0.059823   0.014104  -4.242 2.31e-05 ***
## d$cmn_importance -0.120724   0.016013  -7.539 6.79e-14 ***
## d$cmn_hetero      0.030441   0.010435   2.917  0.00357 ** 
## d$cmn_risks       0.104338   0.026153   3.989 6.83e-05 ***
## d$cmn_violence    0.050284   0.022151   2.270  0.02329 *  
## d$cmn_stoic       0.008556   0.011594   0.738  0.46062    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8994 on 2283 degrees of freedom
## Multiple R-squared:  0.05035,    Adjusted R-squared:  0.04785 
## F-statistic: 20.17 on 6 and 2283 DF,  p-value: < 2.2e-16
plot_model(fit, type = "est") #output

##########

d$id <- (d$engint + d$engrec + d$engpc)/3
Y <- d$id
fit <- lm(Y ~ d$car_fin + d$car_inno + d$car_lead, d=d)

# diagnostics

  layout(matrix(c(1,2,3,4),2,2))
  plot(fit)

  layout(matrix(c(1), byrow = TRUE))

  # Evaluate Collinearity
  vif(fit) # variance inflation factors
##  d$car_fin d$car_inno d$car_lead 
##   1.161333   1.294814   1.340308
  sqrt(vif(fit)) > 2 # problem?
##  d$car_fin d$car_inno d$car_lead 
##      FALSE      FALSE      FALSE
summary(fit)
## 
## Call:
## lm(formula = Y ~ d$car_fin + d$car_inno + d$car_lead, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.2239 -0.4911  0.0824  0.5687  2.8504 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.28192    0.11368  20.074   <2e-16 ***
## d$car_fin    0.03049    0.01964   1.552    0.121    
## d$car_inno   0.44670    0.02144  20.839   <2e-16 ***
## d$car_lead  -0.03570    0.01772  -2.015    0.044 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8294 on 2286 degrees of freedom
## Multiple R-squared:  0.1913, Adjusted R-squared:  0.1902 
## F-statistic: 180.2 on 3 and 2286 DF,  p-value: < 2.2e-16
plot_model(fit, type = "est") #output

Y <- d$belong1
fit <- lm(Y ~ d$car_fin + d$car_inno + d$car_lead, d=d)

# diagnostics

  layout(matrix(c(1,2,3,4),2,2))
  plot(fit)

  layout(matrix(c(1), byrow = TRUE))

  # Evaluate Collinearity
  vif(fit) # variance inflation factors
##  d$car_fin d$car_inno d$car_lead 
##   1.161333   1.294814   1.340308
  sqrt(vif(fit)) > 2 # problem?
##  d$car_fin d$car_inno d$car_lead 
##      FALSE      FALSE      FALSE
summary(fit)
## 
## Call:
## lm(formula = Y ~ d$car_fin + d$car_inno + d$car_lead, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7845 -0.5665  0.1917  0.7586  3.2305 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.60296    0.15158  10.575   <2e-16 ***
## d$car_fin    0.07080    0.02618   2.704   0.0069 ** 
## d$car_inno   0.59017    0.02858  20.649   <2e-16 ***
## d$car_lead  -0.06001    0.02362  -2.540   0.0111 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.106 on 2286 degrees of freedom
## Multiple R-squared:  0.1912, Adjusted R-squared:  0.1901 
## F-statistic: 180.1 on 3 and 2286 DF,  p-value: < 2.2e-16
plot_model(fit, type = "est") #output

Y <- d$belong2
fit <- lm(Y ~ d$car_fin + d$car_inno + d$car_lead, d=d)

# diagnostics

  layout(matrix(c(1,2,3,4),2,2))
  plot(fit)

  layout(matrix(c(1), byrow = TRUE))

  # Evaluate Collinearity
  vif(fit) # variance inflation factors
##  d$car_fin d$car_inno d$car_lead 
##   1.161333   1.294814   1.340308
  sqrt(vif(fit)) > 2 # problem?
##  d$car_fin d$car_inno d$car_lead 
##      FALSE      FALSE      FALSE
summary(fit)
## 
## Call:
## lm(formula = Y ~ d$car_fin + d$car_inno + d$car_lead, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1097 -0.6611  0.1593  0.9011  2.7186 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.401712   0.156117  15.384   <2e-16 ***
## d$car_fin    0.003308   0.026968   0.123    0.902    
## d$car_inno   0.464748   0.029438  15.788   <2e-16 ***
## d$car_lead  -0.027380   0.024330  -1.125    0.261    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.139 on 2286 degrees of freedom
## Multiple R-squared:  0.1175, Adjusted R-squared:  0.1164 
## F-statistic: 101.5 on 3 and 2286 DF,  p-value: < 2.2e-16
plot_model(fit, type = "est") #output

###########

head(data)
##   Litho cmn1 cmn2 cmn3 cmn4 cmn5 cmn6 cmn7 cmn8 cmn9 cmn10 cmn11 cmn12
## 1  1028    1    3    3    6    4    4    6    4    3     5     1     4
## 2  1030    4    2    5    6    3    1    6    1    1     3     1     5
## 3  1029    3    6    6    6    3    0    6    3    0     6     0     6
## 4  1054   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA
## 5  1027    0    1    4    6    3    4    6    0    2    NA    NA    NA
## 6  1039    1    1    3    5    1    1    5    4    1     3     3     4
##   cmn13 cmn14 cmn15 cmn16 cmn17 cmn18 cmn19 cmn20 cmn21 career_money
## 1     1     4     0     0     1     1     6     0     0            1
## 2     2     1    NA     2     4     4     2     1     4            4
## 3     6     3     0     0     0     3     6     3     0           NA
## 4    NA    NA    NA    NA    NA    NA    NA    NA    NA           NA
## 5    NA    NA    NA    NA    NA    NA    NA    NA    NA           NA
## 6     4     2     0     3     1     4     6     1     2            4
##   career_known career_helping career_supervising career_security
## 1            2              5                  5               5
## 2            4              5                  2               4
## 3           NA             NA                 NA              NA
## 4           NA             NA                 NA              NA
## 5           NA             NA                 NA              NA
## 6            3              5                  3               5
##   career_people career_invent career_developing field_academia
## 1             5             5                 5              5
## 2             4             4                 4              2
## 3            NA            NA                NA             NA
## 4            NA            NA                NA             NA
## 5            NA            NA                NA             NA
## 6             4             5                 5              5
##   field_industry field_entre field_govt field_k12 field_law field_med
## 1              5           0          4         0         3         3
## 2              5           4          4         1         4         4
## 3             NA          NA         NA        NA        NA        NA
## 4             NA          NA         NA        NA        NA        NA
## 5             NA          NA         NA        NA        NA        NA
## 6              5           1          1         1         1         4
##   field_nonprofit field_other    engpc   engint engrec belong1 belong2
## 1               4           5 4.000000 4.666667   5.00     4.4     4.5
## 2               1           5 4.333333 4.333333   4.00     3.6     4.5
## 3              NA          NA 6.000000 6.000000   3.00     5.8     6.0
## 4              NA          NA 4.666667 5.333333   5.00     4.8     5.0
## 5              NA          NA 6.000000 5.333333   3.25     4.8     5.5
## 6               2           3 4.666667 5.333333   3.75     4.6     4.0
##   engbel   engemp car_fin car_lead car_inno cmn_stoic cmn_hetero cmn_risks
## 1    4.6 4.666667     3.0 4.000000 5.000000       3.5          6       2.0
## 2    5.8 5.000000     4.0 3.333333 4.333333       1.0          6       4.5
## 3    6.0 6.000000      NA       NA       NA       0.0          6       4.5
## 4    5.2 5.000000      NA       NA       NA        NA         NA        NA
## 5    4.6 5.333333      NA       NA       NA       3.0          6        NA
## 6    4.6 3.666667     4.5 3.333333 5.000000       1.0          5       3.5
##   cmn_help cmn_violence cmn_importance
## 1      0.5          4.0            0.5
## 2      4.0          3.0            1.5
## 3      0.0          4.5            0.0
## 4       NA           NA             NA
## 5       NA           NA             NA
## 6      1.5          4.0            3.0