# install.packages("dplyr")
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.2.2에서 작성되었습니다
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# install.packages(lme4)
library(lme4)
## Warning: 패키지 'lme4'는 R 버전 4.2.2에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: Matrix
# install.packages("lmerTest")
library(lmerTest)
## Warning: 패키지 'lmerTest'는 R 버전 4.2.2에서 작성되었습니다
## 
## 다음의 패키지를 부착합니다: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
# install.packages("ggplot2")
library(ggplot2)
# install.packages("sciplot")
library(sciplot)
wd <- read.csv("C:\\Users\\csjja\\Desktop\\word_data_20220222.csv", header=T);head(wd)
##   X              subject age        word wordclass left.right location type
## 1 0 11_complete.TEXTGRID  NA          JA         I                         
## 2 1 11_complete.TEXTGRID  NA       YEOGI         N                         
## 3 2 11_complete.TEXTGRID  NA         ANE         N                         
## 4 3 11_complete.TEXTGRID  NA DEUREOGADDA         V                         
## 5 4 11_complete.TEXTGRID  NA          JA         I                         
## 6 5 11_complete.TEXTGRID  NA    JABAYAJI                                   
##   beat word_start word_end prev_word_end touch_end touch_start
## 1   NA    340.037  340.997       292.744        NA          NA
## 2   NA    340.997  342.207       340.997        NA          NA
## 3   NA    342.207  342.447       342.207        NA          NA
## 4   NA    342.447  343.247       342.447        NA          NA
## 5   NA    356.034  356.164       343.247        NA          NA
## 6   NA    358.827  360.407       356.164        NA          NA
##   word_start.touch_start word_end.touch_end align_start align_end
## 1                     NA                 NA           0         0
## 2                     NA                 NA           0         0
## 3                     NA                 NA           0         0
## 4                     NA                 NA           0         0
## 5                     NA                 NA           0         0
## 6                     NA                 NA           0         0
##   align_start_with_touch_end align_end_with_touch_start concur percent_concur
## 1                          0                          0      0              0
## 2                          0                          0      0              0
## 3                          0                          0      0              0
## 4                          0                          0      0              0
## 5                          0                          0      0              0
## 6                          0                          0      0              0
wd$age<-wd$subject
sub("^2_complete.TEXTGRID","A0P01M",wd$age)->wd$age
sub("^3_complete.TEXTGRID","A2P01F",wd$age)->wd$age
sub("^4_complete.TEXTGRID","A1P01M",wd$age)->wd$age
sub("^5_complete.TEXTGRID","A2P02M",wd$age)->wd$age
sub("^6_complete.TEXTGRID","A0P02F",wd$age)->wd$age
sub("^7_complete.TEXTGRID","A0P03F",wd$age)->wd$age
sub("^8_complete.TEXTGRID","A1P02M",wd$age)->wd$age
sub("^9_complete.TEXTGRID","A2P03F",wd$age)->wd$age
sub("11_complete.TEXTGRID","A1P04M",wd$age)->wd$age
sub("12_complete.TEXTGRID","A2P04M",wd$age)->wd$age
sub("13_complete.TextGrid","A0P04M",wd$age)->wd$age
sub("14_complete.TEXTGRID","A2P05M",wd$age)->wd$age
sub("15_complete.TEXTGRID","A1P05F",wd$age)->wd$age
sub("16_complete.TEXTGRID","A1P06F",wd$age)->wd$age
sub("17_complete.TEXTGRID","A2P06M",wd$age)->wd$age
sub("18_complete.TEXTGRID","A2P07M",wd$age)->wd$age
sub("19_complete.TEXTGRID","A2P08M",wd$age)->wd$age
sub("20_complete.TEXTGRID","A2P09F",wd$age)->wd$age
sub("21_complete.TEXTGRID","A0P05M",wd$age)->wd$age
sub("22_complete.TEXTGRID","A2P10F",wd$age)->wd$age
sub("23_complete.TEXTGRID","A1P07F",wd$age)->wd$age
sub("24_complete.TEXTGRID","A1P08M",wd$age)->wd$age
sub("25_complete.TEXTGRID","A1P09M",wd$age)->wd$age
sub("26_complete.TEXTGRID","A2P11M",wd$age)->wd$age
sub("27_complete.TEXTGRID","A0P06M",wd$age)->wd$age
sub("28_complete.TEXTGRID","A0P07M",wd$age)->wd$age
sub("29_complete.TEXTGRID","A0P08M",wd$age)->wd$age
sub("30_complete.TEXTGRID","A0P09F",wd$age)->wd$age
sub("31_complete.TEXTGRID","A0P10F",wd$age)->wd$age
sub("32_complete.TEXTGRID","A1P10F",wd$age)->wd$age
sub("33_complete.TEXTGRID","A2P12M",wd$age)->wd$age
sub("34_complete.TEXTGRID","A1P11F",wd$age)->wd$age
sub("35_complete.TEXTGRID","A0P11M",wd$age)->wd$age
sub("36_complete.TEXTGRID","A0P12M",wd$age)->wd$age
sub("37_complete.TEXTGRID","A1P12F",wd$age)->wd$age
wd$age <- substr(wd$age, 1, 2) #age 추출 
# wd -> wd_saved # data 저장
wd <- wd %>% 
  filter(touch_start != 'NA'& touch_end != 'NA') #touch가 있는 행만 추출

문제: 나이에 따라 touch를 하는 location이 어떤 식으로 차이가 있을까?

wd$location <- as.factor(wd$location) #location을 factor로 변경 
wd$age <- as.factor(wd$age)  #age를 facotor로 변경
# contrasts(wd$location)
levels(wd$location);str(wd$location) # location에 들어있는 종류: "Arm"   "Face"  "Foot"  "Hand"  "Head"  "Leg"   "Toros" "Torso"
## [1] "Arm"   "Face"  "Foot"  "Hand"  "Head"  "Leg"   "Toros" "Torso"
##  Factor w/ 8 levels "Arm","Face","Foot",..: 8 8 8 8 8 8 8 8 4 4 ...
levels(wd$age)
## [1] "A0" "A1" "A2"
a <- nrow(wd[wd$location=="Arm",]) #location Arm인 행의 개수 세기 #234
b <- nrow(wd[wd$location=="Face",])  #location Face인 행의 개수 세기 #115
c <- nrow(wd[wd$location=="Foot",]) #location Foot 인 행의 개수 세기 #29
d <- nrow(wd[wd$location=="Hand",]) #location Hand 인 행의 개수 세기 # 258
e <- nrow(wd[wd$location=="Head",]) #location Head 인 행의 개수 세기 #84
f <- nrow(wd[wd$location=="Leg",]) #location Leg 인 행의 개수 세기 #88
g <- nrow(wd[wd$location=="Toros",]) #location Toros 인 행의 개수 세기 #3
h <- nrow(wd[wd$location=="Torso",]) #location Torso 인 행의 개수 세기 #795
total <-a+b+c+d+e+f+g+h #1606
loc_prop <- ifelse(wd$location =="Arm", a, ifelse(wd$location =="Face",b, ifelse(wd$location =="Foot", c, ifelse(wd$location =="Hand", d, ifelse(wd$location =="Leg", f, ifelse(wd$location =="Toros", g, h))))));head(loc_prop)
## [1] 795 795 795 795 795 795
loc_prop <- loc_prop/total
#cbind
wd <- cbind(wd, loc_prop); View(wd)
summary(lm(loc_prop ~ age, data=wd)) #나이 변화에 따른 touch하는 location변화
## 
## Call:
## lm(formula = loc_prop ~ age, data = wd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3276 -0.1850  0.1493  0.1493  0.2281 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.345692   0.005465  63.254  < 2e-16 ***
## ageA1       -0.078804   0.012342  -6.385 2.24e-10 ***
## ageA2       -0.050801   0.015125  -3.359 0.000801 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1855 on 1603 degrees of freedom
## Multiple R-squared:  0.02812,    Adjusted R-squared:  0.0269 
## F-statistic: 23.19 on 2 and 1603 DF,  p-value: 1.181e-10
summary(lm(loc_prop ~ left.right, data=wd)) #왼손인지 오른손인지에 따른 touch하는 location변화
## 
## Call:
## lm(formula = loc_prop ~ left.right, data = wd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3238 -0.1799  0.1694  0.1694  0.1694 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.34127    0.02077  16.432   <2e-16 ***
## left.rightRhand -0.01563    0.02132  -0.733    0.464    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1881 on 1604 degrees of freedom
## Multiple R-squared:  0.0003351,  Adjusted R-squared:  -0.0002882 
## F-statistic: 0.5376 on 1 and 1604 DF,  p-value: 0.4635
summary(lmer(loc_prop ~ age + (1|subject), data=wd))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: loc_prop ~ age + (1 | subject)
##    Data: wd
## 
## REML criterion at convergence: -1163.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.3693 -0.9255  0.4620  0.7648  2.0267 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subject  (Intercept) 0.01137  0.1066  
##  Residual             0.02668  0.1633  
## Number of obs: 1606, groups:  subject, 35
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)  0.288467   0.031486 26.132063   9.162 1.21e-09 ***
## ageA1       -0.007488   0.046717 28.542180  -0.160    0.874    
## ageA2        0.001047   0.047053 31.202876   0.022    0.982    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr) ageA1 
## ageA1 -0.674       
## ageA2 -0.669  0.451
# summary(lm(type ~ age, data=wd)) # error_that's why we should transfer to proprtion!
boxplot(loc_prop ~ age, col=c("green","blue","red"),wd)

lineplot.CI(wd$age, wd$loc_prop)