# install.packages("dplyr")
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.2.2에서 작성되었습니다
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# install.packages("ggplot2")
library(ggplot2)
# install.packages("lmerTest")
library(lmerTest)
## Warning: 패키지 'lmerTest'는 R 버전 4.2.2에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: lme4
## Warning: 패키지 'lme4'는 R 버전 4.2.2에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: Matrix
## 
## 다음의 패키지를 부착합니다: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
# install.packages("lme4")
library(lme4)
# install.packages("sciplot")
library(sciplot)
data <- read.table("C:\\Users\\csjja\\Desktop\\data_JCL_22-3-11.txt",sep="\t",header=T)
View(data)
str(data)
## 'data.frame':    1577 obs. of  18 variables:
##  $ exp            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ register       : chr  "ids" "ids" "ids" "ids" ...
##  $ subject        : chr  "W01" "W01" "W01" "W01" ...
##  $ speaker_gender : chr  "f" "f" "f" "f" ...
##  $ listener_gender: chr  "m" "m" "m" "m" ...
##  $ speaker_age    : chr  "na" "na" "na" "na" ...
##  $ listener_age   : chr  "11;09" "11;09" "11;09" "11;09" ...
##  $ item           : chr  "kkoch" "kkoch" "kkoch" "kkoch" ...
##  $ syllableN      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ frequency      : chr  "high" "high" "high" "high" ...
##  $ coda           : chr  "ch" "ch" "ch" "ch" ...
##  $ pronounced     : chr  "ch" "ch" "ch" "ch" ...
##  $ ur             : int  1 1 1 1 1 1 0 0 0 1 ...
##  $ urPal          : int  1 1 1 1 1 1 0 1 0 1 ...
##  $ suffix         : chr  "i" "e" "ul" "i" ...
##  $ vowel          : chr  "i" "e" "u" "i" ...
##  $ task           : chr  "scripted" "scripted" "scripted" "scripted" ...
##  $ task2          : chr  "reading" "reading" "reading" "reading" ...
# ur과 urPal을 factor로 변경
data$ur <- as.factor(data$ur)
data$urPal <- as.factor(data$urPal)
class(data$ur) #class 확인
## [1] "factor"
class(data$urPal) #class 확인
## [1] "factor"
# 열 이름을 변경(task2를 style로)
data <- rename(data, "style" = "task2")
data_1 <- data %>%
  filter(exp=="1" & suffix!="e") #접미사 e인 단어는 제외
str(data_1)
## 'data.frame':    828 obs. of  18 variables:
##  $ exp            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ register       : chr  "ids" "ids" "ids" "ids" ...
##  $ subject        : chr  "W01" "W01" "W01" "W01" ...
##  $ speaker_gender : chr  "f" "f" "f" "f" ...
##  $ listener_gender: chr  "m" "m" "m" "m" ...
##  $ speaker_age    : chr  "na" "na" "na" "na" ...
##  $ listener_age   : chr  "11;09" "11;09" "11;09" "11;09" ...
##  $ item           : chr  "kkoch" "kkoch" "kkoch" "path" ...
##  $ syllableN      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ frequency      : chr  "high" "high" "high" "high" ...
##  $ coda           : chr  "ch" "ch" "ch" "th" ...
##  $ pronounced     : chr  "ch" "ch" "ch" "ch" ...
##  $ ur             : Factor w/ 2 levels "0","1": 2 2 2 1 1 1 2 2 2 2 ...
##  $ urPal          : Factor w/ 2 levels "0","1": 2 2 2 1 2 1 2 2 2 2 ...
##  $ suffix         : chr  "i" "ul" "i" "ul" ...
##  $ vowel          : chr  "i" "u" "i" "u" ...
##  $ task           : chr  "scripted" "scripted" "scripted" "scripted" ...
##  $ style          : chr  "reading" "reading" "reading" "reading" ...
summary(glmer(ur ~ register + coda + vowel + style + (1|subject), family="binomial", data=data_1))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: ur ~ register + coda + vowel + style + (1 | subject)
##    Data: data_1
## 
##      AIC      BIC   logLik deviance df.resid 
##    818.6    856.3   -401.3    802.6      820 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.9882 -0.5467  0.1788  0.5305  4.1888 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev.
##  subject (Intercept) 1.851    1.361   
## Number of obs: 828, groups:  subject, 22
## 
## Fixed effects:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       1.7033     0.5445   3.128  0.00176 ** 
## registerids       1.6491     0.2004   8.229  < 2e-16 ***
## codaph            0.9052     0.2304   3.929 8.53e-05 ***
## codath           -1.7263     0.2329  -7.414 1.23e-13 ***
## voweli           -2.1974     0.4333  -5.071 3.96e-07 ***
## vowelu           -1.7882     0.4320  -4.139 3.48e-05 ***
## stylespotaneous  -0.3521     0.2069  -1.702  0.08883 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) rgstrd codaph codath voweli vowelu
## registerids -0.203                                   
## codaph      -0.108  0.061                            
## codath      -0.290 -0.143  0.330                     
## voweli      -0.751  0.007 -0.035  0.247              
## vowelu      -0.767  0.114 -0.085  0.153  0.892       
## stylespotns -0.194 -0.083 -0.099 -0.006  0.071  0.160
# summary(glmer(ur ~ register*coda + register*vowel + register*style + (1 + register + coda + vowel + style|subject),  family="binomial",data=data_1))
ggplot(data_1,aes(x=register ,y=ur,color=coda,fill=coda))  + 
      geom_point(position=position_jitterdodge(dodge.width=0.9)) +
      geom_boxplot(outlier.colour = NA, alpha=0.5,
                        position = position_dodge(width=0.9))+
      labs(x = NULL, y="CANONICALITY", title="Canonical output form on Surface in CDS and ADS", tag="", show.legend = T)+
  theme(plot.title = element_text(hjust = 0.5))+
  scale_x_discrete(labels= c("CDS","ADS"))