Libraries

Here’s an overview of all the packages we will need:

library(tidyverse) 
library(reshape2) #because i don't fully understand tidy syntax still sometimes
library(readxl) #for reading in separate sheets from excel
library(sjPlot) #for quick effect plots and other stuff
library(jtools) #for actually legible model printouts -- where have you been all my life??

Test1

#load data
test1 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.vision & reaction tm1")


#goal -- remove columns with 0 values
test1 <- na_if(test1, 0) #change 0s to NAs
test1 %>%  select_if(~ !any(is.na(.))) #remove columns with NAs
## # A tibble: 2 x 5
##   Type                ME `PALM-UP`   YOU  WILL
##   <chr>            <dbl>     <dbl> <dbl> <dbl>
## 1 LOOK-AT.vision      23         9     5     2
## 2 LOOK-AT.reaction    95         5     3     2
#convert wide to longo
test1_long <- melt(test1,
        # ID variables - all the variables to keep but not split apart on
    id.vars=c("Type"),
        # The source columns
    measure.vars=c("ME", "PALM-UP", "YOU", "WILL"),
        # Name of the destination column that will identify the original
        # column that the measurement came from
    variable.name="collocation",
    value.name="count")
test1_long
##               Type collocation count
## 1   LOOK-AT.vision          ME    23
## 2 LOOK-AT.reaction          ME    95
## 3   LOOK-AT.vision     PALM-UP     9
## 4 LOOK-AT.reaction     PALM-UP     5
## 5   LOOK-AT.vision         YOU     5
## 6 LOOK-AT.reaction         YOU     3
## 7   LOOK-AT.vision        WILL     2
## 8 LOOK-AT.reaction        WILL     2
#convert to binomial distribution
test1_binom <- test1_long %>%  uncount(count) # create repeated observations by count value
test1_binom <- mutate(test1_binom, Type = factor(Type)) #remake type a factor

# set distribution into logistic regression (winter pp.289-299)
test1_m <- glm(Type~collocation, data = test1_binom, family = "binomial")
summ(test1_m) #only interested i intercept 
## MODEL INFO:
## Observations: 144
## Dependent Variable: Type
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## <U+03C7>²(3) = 17.43, p = 0.00
## Pseudo-R² (Cragg-Uhler) = 0.17
## Pseudo-R² (McFadden) = 0.10
## AIC = 158.79, BIC = 170.67 
## 
## Standard errors: MLE
## -------------------------------------------------------
##                             Est.   S.E.   z val.      p
## ------------------------ ------- ------ -------- ------
## (Intercept)                -1.42   0.23    -6.10   0.00
## collocationPALM-UP          2.01   0.60     3.32   0.00
## collocationYOU              1.93   0.77     2.52   0.01
## collocationWILL             1.42   1.03     1.38   0.17
## -------------------------------------------------------
plot_model(test1_m, type = "pred", terms = c("collocation"), title = "Test1: predicted w/ 'LOOK-AT.vision'", axis.title ="") + theme_bw()

Test2

#load data
test2 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.vision & reaction tp1")
test2 <- na_if(test2, 0) 
test2 %>%  select_if(~ !any(is.na(.))) 
## # A tibble: 2 x 4
##   Type                ME `PALM-UP`  THAT
##   <chr>            <dbl>     <dbl> <dbl>
## 1 LOOK-AT.vision       6         9     3
## 2 LOOK-AT.reaction    13        10     2
test2_long <- melt(test2, id.vars=c("Type"),
    measure.vars=c("ME", "PALM-UP", "THAT"),
    variable.name="collocation", value.name="count")
test2_binom <- test2_long %>%  uncount(count) 
test2_binom <- mutate(test2_binom, Type = factor(Type)) 
test2_m <- glm(Type~collocation, data = test2_binom, family = "binomial")
summ(test2_m) 
## MODEL INFO:
## Observations: 43
## Dependent Variable: Type
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## <U+03C7>²(2) = 1.75, p = 0.42
## Pseudo-R² (Cragg-Uhler) = 0.05
## Pseudo-R² (McFadden) = 0.03
## AIC = 62.72, BIC = 68.00 
## 
## Standard errors: MLE
## -------------------------------------------------------
##                             Est.   S.E.   z val.      p
## ------------------------ ------- ------ -------- ------
## (Intercept)                -0.77   0.49    -1.57   0.12
## collocationPALM-UP          0.67   0.67     0.99   0.32
## collocationTHAT             1.18   1.04     1.14   0.26
## -------------------------------------------------------
plot_model(test2_m, type = "pred", terms = c("collocation"), title = "Test2: predicted w/'LOOK-AT.vision'", axis.title ="") + theme_bw()

Test3

test3 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.vision & SEE tm1")
test3 <- na_if(test3, 0) 
test3 %>%  select_if(~ !any(is.na(.))) 
## # A tibble: 2 x 11
##   Type        ME `PALM-UP`   CAN INDEX   YOU NEVER `CAN'T` `LOOK-AT`   NOT   NOW
##   <chr>    <dbl>     <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>     <dbl> <dbl> <dbl>
## 1 LOOK-AT~    23         9     6     6     5     3       2         2     2     2
## 2 SEE         23         5    22     3     2     4       2         6     2     6
test3_long <- melt(test3, id.vars=c("Type"),
    measure.vars=c("ME", "PALM-UP", "CAN","INDEX","YOU","NEVER","CAN'T","LOOK-AT","NOT","NOW"),
    variable.name="collocation", value.name="count")
test3_binom <- test3_long %>%  uncount(count) 
test3_binom <- mutate(test3_binom, Type = factor(Type)) 
test3_m <- glm(Type~collocation, data = test3_binom, family = "binomial")
summ(test3_m) 
## MODEL INFO:
## Observations: 135
## Dependent Variable: Type
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## <U+03C7>²(9) = 15.89, p = 0.07
## Pseudo-R² (Cragg-Uhler) = 0.15
## Pseudo-R² (McFadden) = 0.09
## AIC = 189.59, BIC = 218.65 
## 
## Standard errors: MLE
## -------------------------------------------------------
##                             Est.   S.E.   z val.      p
## ------------------------ ------- ------ -------- ------
## (Intercept)                 0.00   0.29     0.00   1.00
## collocationPALM-UP         -0.59   0.63    -0.93   0.35
## collocationCAN              1.30   0.55     2.38   0.02
## collocationINDEX           -0.69   0.77    -0.90   0.37
## collocationYOU             -0.92   0.89    -1.03   0.30
## collocationNEVER            0.29   0.82     0.35   0.73
## collocationCAN'T           -0.00   1.04    -0.00   1.00
## collocationLOOK-AT          1.10   0.87     1.27   0.21
## collocationNOT              0.00   1.04     0.00   1.00
## collocationNOW              1.10   0.87     1.27   0.21
## -------------------------------------------------------
plot_model(test3_m, type = "pred", terms = c("collocation"), title = "Test3: predicted w/'LOOK-AT.vision'",axis.title = "") + theme_bw()

Test4

test4 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.vision & SEE tp1")
test4 <- na_if(test4, 0) 
test4 %>%  select_if(~ !any(is.na(.))) 
## # A tibble: 2 x 7
##   Type              ME `PALM-UP` INDEX   YOU `LOOK-AT`  THEY
##   <chr>          <dbl>     <dbl> <dbl> <dbl>     <dbl> <dbl>
## 1 LOOK-AT.vision    23         9     6     5         2     2
## 2 SEE                4         8    10    10         2     3
test4_long <- melt(test4, id.vars=c("Type"),
    measure.vars=c("ME", "PALM-UP", "INDEX","YOU","LOOK-AT","THEY"),
    variable.name="collocation", value.name="count")
test4_binom <- test4_long %>%  uncount(count) 
test4_binom <- mutate(test4_binom, Type = factor(Type)) 
test4_m <- glm(Type~collocation, data = test4_binom, family = "binomial")
summ(test4_m) 
## MODEL INFO:
## Observations: 84
## Dependent Variable: Type
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## <U+03C7>²(5) = 16.55, p = 0.01
## Pseudo-R² (Cragg-Uhler) = 0.24
## Pseudo-R² (McFadden) = 0.14
## AIC = 110.70, BIC = 125.29 
## 
## Standard errors: MLE
## -------------------------------------------------------
##                             Est.   S.E.   z val.      p
## ------------------------ ------- ------ -------- ------
## (Intercept)                -1.75   0.54    -3.23   0.00
## collocationPALM-UP          1.63   0.73     2.24   0.02
## collocationINDEX            2.26   0.75     3.02   0.00
## collocationYOU              2.44   0.77     3.17   0.00
## collocationLOOK-AT          1.75   1.14     1.54   0.12
## collocationTHEY             2.15   1.06     2.03   0.04
## -------------------------------------------------------
plot_model(test4_m, type = "pred", terms = c("collocation"), title = "Test4: predicted w/'LOOK-AT.vision'",axis.title = "") + theme_bw()

Test5

test5 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.reaction & SEE tm1")
test5 <- na_if(test5, 0) 
test5 %>%  select_if(~ !any(is.na(.))) 
## # A tibble: 2 x 7
##   Type                ME PRO.3 PEOPLE `PALM-UP`   YOU  WILL
##   <chr>            <dbl> <dbl>  <dbl>     <dbl> <dbl> <dbl>
## 1 LOOK-AT.reaction    95    11      8         5     3     2
## 2 SEE                 23     4      2         5     2     9
test5_long <- melt(test5, id.vars=c("Type"),
    measure.vars=c("ME", "PRO.3", "PEOPLE","PALM-UP","YOU","WILL"),
    variable.name="collocation", value.name="count")
test5_binom <- test5_long %>%  uncount(count) 
test5_binom <- mutate(test5_binom, Type = factor(Type)) 
test5_m <- glm(Type~collocation, data = test5_binom, family = "binomial")
summ(test5_m) 
## MODEL INFO:
## Observations: 169
## Dependent Variable: Type
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## <U+03C7>²(5) = 21.03, p = 0.00
## Pseudo-R² (Cragg-Uhler) = 0.17
## Pseudo-R² (McFadden) = 0.11
## AIC = 186.84, BIC = 205.62 
## 
## Standard errors: MLE
## -------------------------------------------------------
##                             Est.   S.E.   z val.      p
## ------------------------ ------- ------ -------- ------
## (Intercept)                -1.42   0.23    -6.10   0.00
## collocationPRO.3            0.41   0.63     0.65   0.52
## collocationPEOPLE           0.03   0.82     0.04   0.97
## collocationPALM-UP          1.42   0.67     2.11   0.04
## collocationYOU              1.01   0.94     1.08   0.28
## collocationWILL             2.92   0.82     3.58   0.00
## -------------------------------------------------------
plot_model(test5_m, type = "pred", terms = c("collocation"), title = "Test5: predicted w/'LOOK-AT.reaction'",axis.title = "") + theme_bw()

Test6

test6 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.reaction & SEE tp1")
test6 <- na_if(test6, 0) 
test6 %>%  select_if(~ !any(is.na(.))) 
## # A tibble: 2 x 5
##   Type                ME `PALM-UP` PRO.3  THAT
##   <chr>            <dbl>     <dbl> <dbl> <dbl>
## 1 LOOK-AT.reaction    13        10     3     2
## 2 SEE                  4         8     3    10
test6_long <- melt(test6, id.vars=c("Type"),
    measure.vars=c("ME", "PALM-UP","PRO.3", "THAT"),
    variable.name="collocation", value.name="count")
test6_binom <- test6_long %>%  uncount(count) 
test6_binom <- mutate(test6_binom, Type = factor(Type)) 
test6_m <- glm(Type~collocation, data = test6_binom, family = "binomial")
summ(test6_m) 
## MODEL INFO:
## Observations: 53
## Dependent Variable: Type
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## <U+03C7>²(3) = 10.89, p = 0.01
## Pseudo-R² (Cragg-Uhler) = 0.25
## Pseudo-R² (McFadden) = 0.15
## AIC = 70.41, BIC = 78.29 
## 
## Standard errors: MLE
## -------------------------------------------------------
##                             Est.   S.E.   z val.      p
## ------------------------ ------- ------ -------- ------
## (Intercept)                -1.18   0.57    -2.06   0.04
## collocationPALM-UP          0.96   0.74     1.29   0.20
## collocationPRO.3            1.18   1.00     1.18   0.24
## collocationTHAT             2.79   0.96     2.90   0.00
## -------------------------------------------------------
plot_model(test6_m, type = "pred", terms = c("collocation"), title = "Test6: predicted w/'LOOK-AT.reaction'",axis.title = "") + theme_bw()

Test7

test7 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "SEE & SEE-SEE tm1")
test7 <- na_if(test7, 0) 
test7 %>%  select_if(~ !any(is.na(.))) 
## # A tibble: 2 x 3
##   Type       ME `PALM-UP`
##   <chr>   <dbl>     <dbl>
## 1 SEE        23         5
## 2 SEE-SEE     4         5
test7_long <- melt(test7, id.vars=c("Type"),
    measure.vars=c("ME", "PALM-UP"),
    variable.name="collocation", value.name="count")
test7_binom <- test7_long %>%  uncount(count) 
test7_binom <- mutate(test7_binom, Type = factor(Type)) 
test7_m <- glm(Type~collocation, data = test7_binom, family = "binomial")
summ(test7_m) 
## MODEL INFO:
## Observations: 37
## Dependent Variable: Type
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## <U+03C7>²(1) = 4.54, p = 0.03
## Pseudo-R² (Cragg-Uhler) = 0.17
## Pseudo-R² (McFadden) = 0.11
## AIC = 40.52, BIC = 43.74 
## 
## Standard errors: MLE
## -------------------------------------------------------
##                             Est.   S.E.   z val.      p
## ------------------------ ------- ------ -------- ------
## (Intercept)                -1.75   0.54    -3.23   0.00
## collocationPALM-UP          1.75   0.83     2.10   0.04
## -------------------------------------------------------
plot_model(test7_m, type = "pred", terms = c("collocation"), title = "Test7: predicted w/'SEE'",axis.title = "") + theme_bw()

Test8

test8 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "SEE & SEE-SEE tp1")
test8 <- na_if(test8, 0) 
test8 %>%  select_if(~ !any(is.na(.))) 
## # A tibble: 2 x 2
##   Type    `PALM-UP`
##   <chr>       <dbl>
## 1 SEE             8
## 2 SEE-SEE         3
#STOP: cannot run test with only one overlapping variable

# test8_long <- melt(test8, id.vars=c("Type"),
#     measure.vars=c("ME", "PALM-UP"),
#     variable.name="collocation", value.name="count")
# test8_binom <- test8_long %>%  uncount(count) 
# test8_binom <- mutate(test8_binom, Type = factor(Type)) 
# test8_m <- glm(Type~collocation, data = test8_binom, family = "binomial")
# summ(test8_m) 
# plot_model(test8_m, type = "pred", terms = c("collocation"), title = "test8: predicted w/'LOOK-AT.reaction'",axis.title = "") + theme_bw()

Test9

test9 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "SEE & SEE-SEE tm1")
test9 <- na_if(test9, 0) 
test9 %>%  select_if(~ !any(is.na(.))) 
## # A tibble: 2 x 3
##   Type       ME `PALM-UP`
##   <chr>   <dbl>     <dbl>
## 1 SEE        23         5
## 2 SEE-SEE     4         5
test9_long <- melt(test9, id.vars=c("Type"),
    measure.vars=c("ME", "PALM-UP"),
    variable.name="collocation", value.name="count")
test9_binom <- test9_long %>%  uncount(count) 
test9_binom <- mutate(test9_binom, Type = factor(Type)) 
test9_m <- glm(Type~collocation, data = test9_binom, family = "binomial")
summ(test9_m) 
## MODEL INFO:
## Observations: 37
## Dependent Variable: Type
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## <U+03C7>²(1) = 4.54, p = 0.03
## Pseudo-R² (Cragg-Uhler) = 0.17
## Pseudo-R² (McFadden) = 0.11
## AIC = 40.52, BIC = 43.74 
## 
## Standard errors: MLE
## -------------------------------------------------------
##                             Est.   S.E.   z val.      p
## ------------------------ ------- ------ -------- ------
## (Intercept)                -1.75   0.54    -3.23   0.00
## collocationPALM-UP          1.75   0.83     2.10   0.04
## -------------------------------------------------------
plot_model(test9_m, type = "pred", terms = c("collocation"), title = "Test9: predicted w/'SEE'",axis.title = "") + theme_bw()

Test10

test10 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.reaction & SEE-SEE tp1")
test10 <- na_if(test10, 0) 
test10 %>%  select_if(~ !any(is.na(.))) 
## # A tibble: 2 x 2
##   Type             `PALM-UP`
##   <chr>                <dbl>
## 1 LOOK-AT.reaction        10
## 2 SEE-SEE                  3
#STOP: cannot run test with only one overlapping variable


# test10_long <- melt(test10, id.vars=c("Type"),
#     measure.vars=c("ME", "PALM-UP"),
#     variable.name="collocation", value.name="count")
# test10_binom <- test10_long %>%  uncount(count) 
# test10_binom <- mutate(test10_binom, Type = factor(Type)) 
# test10_m <- glm(Type~collocation, data = test10_binom, family = "binomial")
# summ(test10_m) 
# plot_model(test10_m, type = "pred", terms = c("collocation"), title = "test10: predicted w/'SEE'",axis.title = "") + theme_bw()

Test11

test11 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.vision + SEE tm1")
## New names:
## * YOU -> YOU...4
## * YOU -> YOU...7
test11 <- na_if(test11, 0) 
test11 %>%  select_if(~ !any(is.na(.))) 
## # A tibble: 2 x 3
##   Type              ME `PALM-UP`
##   <chr>          <dbl>     <dbl>
## 1 LOOK-AT.vision    23         9
## 2 SEE-SEE            4         5
test11_long <- melt(test11, id.vars=c("Type"),
    measure.vars=c("ME", "PALM-UP"),
    variable.name="collocation", value.name="count")
test11_binom <- test11_long %>%  uncount(count) 
test11_binom <- mutate(test11_binom, Type = factor(Type)) 
test11_m <- glm(Type~collocation, data = test11_binom, family = "binomial")
summ(test11_m) 
## MODEL INFO:
## Observations: 41
## Dependent Variable: Type
## Type: Generalized linear model
##   Family: binomial 
##   Link function: logit 
## 
## MODEL FIT:
## <U+03C7>²(1) = 2.25, p = 0.13
## Pseudo-R² (Cragg-Uhler) = 0.08
## Pseudo-R² (McFadden) = 0.05
## AIC = 44.90, BIC = 48.33 
## 
## Standard errors: MLE
## -------------------------------------------------------
##                             Est.   S.E.   z val.      p
## ------------------------ ------- ------ -------- ------
## (Intercept)                -1.75   0.54    -3.23   0.00
## collocationPALM-UP          1.16   0.78     1.49   0.14
## -------------------------------------------------------
plot_model(test11_m, type = "pred", terms = c("collocation"), title = "Test11: predicted w/'LOOK-AT.vision'",axis.title = "") + theme_bw()

Test12

test12 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.vision + SEE tp1")
test12 <- na_if(test12, 0) 
test12 %>%  select_if(~ !any(is.na(.))) 
## # A tibble: 2 x 2
##   Type           `PALM-UP`
##   <chr>              <dbl>
## 1 LOOK-AT.vision         9
## 2 SEE-SEE                3
#STOP: cannot run test with only one overlapping variable

# test12_long <- melt(test12, id.vars=c("Type"),
#     measure.vars=c("ME", "PALM-UP"),
#     variable.name="collocation", value.name="count")
# test12_binom <- test12_long %>%  uncount(count) 
# test12_binom <- mutate(test12_binom, Type = factor(Type)) 
# test12_m <- glm(Type~collocation, data = test12_binom, family = "binomial")
# summ(test12_m) 
# plot_model(test12_m, type = "pred", terms = c("collocation"), title = "test12: predicted w/'LOOK-AT.vision'",axis.title = "") + theme_bw()