Here’s an overview of all the packages we will need:
library(tidyverse)
library(reshape2) #because i don't fully understand tidy syntax still sometimes
library(readxl) #for reading in separate sheets from excel
library(sjPlot) #for quick effect plots and other stuff
library(jtools) #for actually legible model printouts -- where have you been all my life??
#load data
test1 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.vision & reaction tm1")
#goal -- remove columns with 0 values
test1 <- na_if(test1, 0) #change 0s to NAs
test1 %>% select_if(~ !any(is.na(.))) #remove columns with NAs
## # A tibble: 2 x 5
## Type ME `PALM-UP` YOU WILL
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 LOOK-AT.vision 23 9 5 2
## 2 LOOK-AT.reaction 95 5 3 2
#convert wide to longo
test1_long <- melt(test1,
# ID variables - all the variables to keep but not split apart on
id.vars=c("Type"),
# The source columns
measure.vars=c("ME", "PALM-UP", "YOU", "WILL"),
# Name of the destination column that will identify the original
# column that the measurement came from
variable.name="collocation",
value.name="count")
test1_long
## Type collocation count
## 1 LOOK-AT.vision ME 23
## 2 LOOK-AT.reaction ME 95
## 3 LOOK-AT.vision PALM-UP 9
## 4 LOOK-AT.reaction PALM-UP 5
## 5 LOOK-AT.vision YOU 5
## 6 LOOK-AT.reaction YOU 3
## 7 LOOK-AT.vision WILL 2
## 8 LOOK-AT.reaction WILL 2
#convert to binomial distribution
test1_binom <- test1_long %>% uncount(count) # create repeated observations by count value
test1_binom <- mutate(test1_binom, Type = factor(Type)) #remake type a factor
# set distribution into logistic regression (winter pp.289-299)
test1_m <- glm(Type~collocation, data = test1_binom, family = "binomial")
summ(test1_m) #only interested i intercept
## MODEL INFO:
## Observations: 144
## Dependent Variable: Type
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## <U+03C7>²(3) = 17.43, p = 0.00
## Pseudo-R² (Cragg-Uhler) = 0.17
## Pseudo-R² (McFadden) = 0.10
## AIC = 158.79, BIC = 170.67
##
## Standard errors: MLE
## -------------------------------------------------------
## Est. S.E. z val. p
## ------------------------ ------- ------ -------- ------
## (Intercept) -1.42 0.23 -6.10 0.00
## collocationPALM-UP 2.01 0.60 3.32 0.00
## collocationYOU 1.93 0.77 2.52 0.01
## collocationWILL 1.42 1.03 1.38 0.17
## -------------------------------------------------------
plot_model(test1_m, type = "pred", terms = c("collocation"), title = "Test1: predicted w/ 'LOOK-AT.vision'", axis.title ="") + theme_bw()
#load data
test2 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.vision & reaction tp1")
test2 <- na_if(test2, 0)
test2 %>% select_if(~ !any(is.na(.)))
## # A tibble: 2 x 4
## Type ME `PALM-UP` THAT
## <chr> <dbl> <dbl> <dbl>
## 1 LOOK-AT.vision 6 9 3
## 2 LOOK-AT.reaction 13 10 2
test2_long <- melt(test2, id.vars=c("Type"),
measure.vars=c("ME", "PALM-UP", "THAT"),
variable.name="collocation", value.name="count")
test2_binom <- test2_long %>% uncount(count)
test2_binom <- mutate(test2_binom, Type = factor(Type))
test2_m <- glm(Type~collocation, data = test2_binom, family = "binomial")
summ(test2_m)
## MODEL INFO:
## Observations: 43
## Dependent Variable: Type
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## <U+03C7>²(2) = 1.75, p = 0.42
## Pseudo-R² (Cragg-Uhler) = 0.05
## Pseudo-R² (McFadden) = 0.03
## AIC = 62.72, BIC = 68.00
##
## Standard errors: MLE
## -------------------------------------------------------
## Est. S.E. z val. p
## ------------------------ ------- ------ -------- ------
## (Intercept) -0.77 0.49 -1.57 0.12
## collocationPALM-UP 0.67 0.67 0.99 0.32
## collocationTHAT 1.18 1.04 1.14 0.26
## -------------------------------------------------------
plot_model(test2_m, type = "pred", terms = c("collocation"), title = "Test2: predicted w/'LOOK-AT.vision'", axis.title ="") + theme_bw()
test3 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.vision & SEE tm1")
test3 <- na_if(test3, 0)
test3 %>% select_if(~ !any(is.na(.)))
## # A tibble: 2 x 11
## Type ME `PALM-UP` CAN INDEX YOU NEVER `CAN'T` `LOOK-AT` NOT NOW
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 LOOK-AT~ 23 9 6 6 5 3 2 2 2 2
## 2 SEE 23 5 22 3 2 4 2 6 2 6
test3_long <- melt(test3, id.vars=c("Type"),
measure.vars=c("ME", "PALM-UP", "CAN","INDEX","YOU","NEVER","CAN'T","LOOK-AT","NOT","NOW"),
variable.name="collocation", value.name="count")
test3_binom <- test3_long %>% uncount(count)
test3_binom <- mutate(test3_binom, Type = factor(Type))
test3_m <- glm(Type~collocation, data = test3_binom, family = "binomial")
summ(test3_m)
## MODEL INFO:
## Observations: 135
## Dependent Variable: Type
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## <U+03C7>²(9) = 15.89, p = 0.07
## Pseudo-R² (Cragg-Uhler) = 0.15
## Pseudo-R² (McFadden) = 0.09
## AIC = 189.59, BIC = 218.65
##
## Standard errors: MLE
## -------------------------------------------------------
## Est. S.E. z val. p
## ------------------------ ------- ------ -------- ------
## (Intercept) 0.00 0.29 0.00 1.00
## collocationPALM-UP -0.59 0.63 -0.93 0.35
## collocationCAN 1.30 0.55 2.38 0.02
## collocationINDEX -0.69 0.77 -0.90 0.37
## collocationYOU -0.92 0.89 -1.03 0.30
## collocationNEVER 0.29 0.82 0.35 0.73
## collocationCAN'T -0.00 1.04 -0.00 1.00
## collocationLOOK-AT 1.10 0.87 1.27 0.21
## collocationNOT 0.00 1.04 0.00 1.00
## collocationNOW 1.10 0.87 1.27 0.21
## -------------------------------------------------------
plot_model(test3_m, type = "pred", terms = c("collocation"), title = "Test3: predicted w/'LOOK-AT.vision'",axis.title = "") + theme_bw()
test4 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.vision & SEE tp1")
test4 <- na_if(test4, 0)
test4 %>% select_if(~ !any(is.na(.)))
## # A tibble: 2 x 7
## Type ME `PALM-UP` INDEX YOU `LOOK-AT` THEY
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 LOOK-AT.vision 23 9 6 5 2 2
## 2 SEE 4 8 10 10 2 3
test4_long <- melt(test4, id.vars=c("Type"),
measure.vars=c("ME", "PALM-UP", "INDEX","YOU","LOOK-AT","THEY"),
variable.name="collocation", value.name="count")
test4_binom <- test4_long %>% uncount(count)
test4_binom <- mutate(test4_binom, Type = factor(Type))
test4_m <- glm(Type~collocation, data = test4_binom, family = "binomial")
summ(test4_m)
## MODEL INFO:
## Observations: 84
## Dependent Variable: Type
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## <U+03C7>²(5) = 16.55, p = 0.01
## Pseudo-R² (Cragg-Uhler) = 0.24
## Pseudo-R² (McFadden) = 0.14
## AIC = 110.70, BIC = 125.29
##
## Standard errors: MLE
## -------------------------------------------------------
## Est. S.E. z val. p
## ------------------------ ------- ------ -------- ------
## (Intercept) -1.75 0.54 -3.23 0.00
## collocationPALM-UP 1.63 0.73 2.24 0.02
## collocationINDEX 2.26 0.75 3.02 0.00
## collocationYOU 2.44 0.77 3.17 0.00
## collocationLOOK-AT 1.75 1.14 1.54 0.12
## collocationTHEY 2.15 1.06 2.03 0.04
## -------------------------------------------------------
plot_model(test4_m, type = "pred", terms = c("collocation"), title = "Test4: predicted w/'LOOK-AT.vision'",axis.title = "") + theme_bw()
test5 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.reaction & SEE tm1")
test5 <- na_if(test5, 0)
test5 %>% select_if(~ !any(is.na(.)))
## # A tibble: 2 x 7
## Type ME PRO.3 PEOPLE `PALM-UP` YOU WILL
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 LOOK-AT.reaction 95 11 8 5 3 2
## 2 SEE 23 4 2 5 2 9
test5_long <- melt(test5, id.vars=c("Type"),
measure.vars=c("ME", "PRO.3", "PEOPLE","PALM-UP","YOU","WILL"),
variable.name="collocation", value.name="count")
test5_binom <- test5_long %>% uncount(count)
test5_binom <- mutate(test5_binom, Type = factor(Type))
test5_m <- glm(Type~collocation, data = test5_binom, family = "binomial")
summ(test5_m)
## MODEL INFO:
## Observations: 169
## Dependent Variable: Type
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## <U+03C7>²(5) = 21.03, p = 0.00
## Pseudo-R² (Cragg-Uhler) = 0.17
## Pseudo-R² (McFadden) = 0.11
## AIC = 186.84, BIC = 205.62
##
## Standard errors: MLE
## -------------------------------------------------------
## Est. S.E. z val. p
## ------------------------ ------- ------ -------- ------
## (Intercept) -1.42 0.23 -6.10 0.00
## collocationPRO.3 0.41 0.63 0.65 0.52
## collocationPEOPLE 0.03 0.82 0.04 0.97
## collocationPALM-UP 1.42 0.67 2.11 0.04
## collocationYOU 1.01 0.94 1.08 0.28
## collocationWILL 2.92 0.82 3.58 0.00
## -------------------------------------------------------
plot_model(test5_m, type = "pred", terms = c("collocation"), title = "Test5: predicted w/'LOOK-AT.reaction'",axis.title = "") + theme_bw()
test6 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.reaction & SEE tp1")
test6 <- na_if(test6, 0)
test6 %>% select_if(~ !any(is.na(.)))
## # A tibble: 2 x 5
## Type ME `PALM-UP` PRO.3 THAT
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 LOOK-AT.reaction 13 10 3 2
## 2 SEE 4 8 3 10
test6_long <- melt(test6, id.vars=c("Type"),
measure.vars=c("ME", "PALM-UP","PRO.3", "THAT"),
variable.name="collocation", value.name="count")
test6_binom <- test6_long %>% uncount(count)
test6_binom <- mutate(test6_binom, Type = factor(Type))
test6_m <- glm(Type~collocation, data = test6_binom, family = "binomial")
summ(test6_m)
## MODEL INFO:
## Observations: 53
## Dependent Variable: Type
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## <U+03C7>²(3) = 10.89, p = 0.01
## Pseudo-R² (Cragg-Uhler) = 0.25
## Pseudo-R² (McFadden) = 0.15
## AIC = 70.41, BIC = 78.29
##
## Standard errors: MLE
## -------------------------------------------------------
## Est. S.E. z val. p
## ------------------------ ------- ------ -------- ------
## (Intercept) -1.18 0.57 -2.06 0.04
## collocationPALM-UP 0.96 0.74 1.29 0.20
## collocationPRO.3 1.18 1.00 1.18 0.24
## collocationTHAT 2.79 0.96 2.90 0.00
## -------------------------------------------------------
plot_model(test6_m, type = "pred", terms = c("collocation"), title = "Test6: predicted w/'LOOK-AT.reaction'",axis.title = "") + theme_bw()
test7 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "SEE & SEE-SEE tm1")
test7 <- na_if(test7, 0)
test7 %>% select_if(~ !any(is.na(.)))
## # A tibble: 2 x 3
## Type ME `PALM-UP`
## <chr> <dbl> <dbl>
## 1 SEE 23 5
## 2 SEE-SEE 4 5
test7_long <- melt(test7, id.vars=c("Type"),
measure.vars=c("ME", "PALM-UP"),
variable.name="collocation", value.name="count")
test7_binom <- test7_long %>% uncount(count)
test7_binom <- mutate(test7_binom, Type = factor(Type))
test7_m <- glm(Type~collocation, data = test7_binom, family = "binomial")
summ(test7_m)
## MODEL INFO:
## Observations: 37
## Dependent Variable: Type
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## <U+03C7>²(1) = 4.54, p = 0.03
## Pseudo-R² (Cragg-Uhler) = 0.17
## Pseudo-R² (McFadden) = 0.11
## AIC = 40.52, BIC = 43.74
##
## Standard errors: MLE
## -------------------------------------------------------
## Est. S.E. z val. p
## ------------------------ ------- ------ -------- ------
## (Intercept) -1.75 0.54 -3.23 0.00
## collocationPALM-UP 1.75 0.83 2.10 0.04
## -------------------------------------------------------
plot_model(test7_m, type = "pred", terms = c("collocation"), title = "Test7: predicted w/'SEE'",axis.title = "") + theme_bw()
test8 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "SEE & SEE-SEE tp1")
test8 <- na_if(test8, 0)
test8 %>% select_if(~ !any(is.na(.)))
## # A tibble: 2 x 2
## Type `PALM-UP`
## <chr> <dbl>
## 1 SEE 8
## 2 SEE-SEE 3
#STOP: cannot run test with only one overlapping variable
# test8_long <- melt(test8, id.vars=c("Type"),
# measure.vars=c("ME", "PALM-UP"),
# variable.name="collocation", value.name="count")
# test8_binom <- test8_long %>% uncount(count)
# test8_binom <- mutate(test8_binom, Type = factor(Type))
# test8_m <- glm(Type~collocation, data = test8_binom, family = "binomial")
# summ(test8_m)
# plot_model(test8_m, type = "pred", terms = c("collocation"), title = "test8: predicted w/'LOOK-AT.reaction'",axis.title = "") + theme_bw()
test9 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "SEE & SEE-SEE tm1")
test9 <- na_if(test9, 0)
test9 %>% select_if(~ !any(is.na(.)))
## # A tibble: 2 x 3
## Type ME `PALM-UP`
## <chr> <dbl> <dbl>
## 1 SEE 23 5
## 2 SEE-SEE 4 5
test9_long <- melt(test9, id.vars=c("Type"),
measure.vars=c("ME", "PALM-UP"),
variable.name="collocation", value.name="count")
test9_binom <- test9_long %>% uncount(count)
test9_binom <- mutate(test9_binom, Type = factor(Type))
test9_m <- glm(Type~collocation, data = test9_binom, family = "binomial")
summ(test9_m)
## MODEL INFO:
## Observations: 37
## Dependent Variable: Type
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## <U+03C7>²(1) = 4.54, p = 0.03
## Pseudo-R² (Cragg-Uhler) = 0.17
## Pseudo-R² (McFadden) = 0.11
## AIC = 40.52, BIC = 43.74
##
## Standard errors: MLE
## -------------------------------------------------------
## Est. S.E. z val. p
## ------------------------ ------- ------ -------- ------
## (Intercept) -1.75 0.54 -3.23 0.00
## collocationPALM-UP 1.75 0.83 2.10 0.04
## -------------------------------------------------------
plot_model(test9_m, type = "pred", terms = c("collocation"), title = "Test9: predicted w/'SEE'",axis.title = "") + theme_bw()
test10 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.reaction & SEE-SEE tp1")
test10 <- na_if(test10, 0)
test10 %>% select_if(~ !any(is.na(.)))
## # A tibble: 2 x 2
## Type `PALM-UP`
## <chr> <dbl>
## 1 LOOK-AT.reaction 10
## 2 SEE-SEE 3
#STOP: cannot run test with only one overlapping variable
# test10_long <- melt(test10, id.vars=c("Type"),
# measure.vars=c("ME", "PALM-UP"),
# variable.name="collocation", value.name="count")
# test10_binom <- test10_long %>% uncount(count)
# test10_binom <- mutate(test10_binom, Type = factor(Type))
# test10_m <- glm(Type~collocation, data = test10_binom, family = "binomial")
# summ(test10_m)
# plot_model(test10_m, type = "pred", terms = c("collocation"), title = "test10: predicted w/'SEE'",axis.title = "") + theme_bw()
test11 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.vision + SEE tm1")
## New names:
## * YOU -> YOU...4
## * YOU -> YOU...7
test11 <- na_if(test11, 0)
test11 %>% select_if(~ !any(is.na(.)))
## # A tibble: 2 x 3
## Type ME `PALM-UP`
## <chr> <dbl> <dbl>
## 1 LOOK-AT.vision 23 9
## 2 SEE-SEE 4 5
test11_long <- melt(test11, id.vars=c("Type"),
measure.vars=c("ME", "PALM-UP"),
variable.name="collocation", value.name="count")
test11_binom <- test11_long %>% uncount(count)
test11_binom <- mutate(test11_binom, Type = factor(Type))
test11_m <- glm(Type~collocation, data = test11_binom, family = "binomial")
summ(test11_m)
## MODEL INFO:
## Observations: 41
## Dependent Variable: Type
## Type: Generalized linear model
## Family: binomial
## Link function: logit
##
## MODEL FIT:
## <U+03C7>²(1) = 2.25, p = 0.13
## Pseudo-R² (Cragg-Uhler) = 0.08
## Pseudo-R² (McFadden) = 0.05
## AIC = 44.90, BIC = 48.33
##
## Standard errors: MLE
## -------------------------------------------------------
## Est. S.E. z val. p
## ------------------------ ------- ------ -------- ------
## (Intercept) -1.75 0.54 -3.23 0.00
## collocationPALM-UP 1.16 0.78 1.49 0.14
## -------------------------------------------------------
plot_model(test11_m, type = "pred", terms = c("collocation"), title = "Test11: predicted w/'LOOK-AT.vision'",axis.title = "") + theme_bw()
test12 <- read_excel("2021April29-LOOK-AT&SEE_worksheet-chisq.xlsx", sheet = "LOOK-AT.vision + SEE tp1")
test12 <- na_if(test12, 0)
test12 %>% select_if(~ !any(is.na(.)))
## # A tibble: 2 x 2
## Type `PALM-UP`
## <chr> <dbl>
## 1 LOOK-AT.vision 9
## 2 SEE-SEE 3
#STOP: cannot run test with only one overlapping variable
# test12_long <- melt(test12, id.vars=c("Type"),
# measure.vars=c("ME", "PALM-UP"),
# variable.name="collocation", value.name="count")
# test12_binom <- test12_long %>% uncount(count)
# test12_binom <- mutate(test12_binom, Type = factor(Type))
# test12_m <- glm(Type~collocation, data = test12_binom, family = "binomial")
# summ(test12_m)
# plot_model(test12_m, type = "pred", terms = c("collocation"), title = "test12: predicted w/'LOOK-AT.vision'",axis.title = "") + theme_bw()