load the necessary packages

## Warning: 패키지 'dplyr'는 R 버전 4.3.2에서 작성되었습니다

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

## Warning: 패키지 'ggplot2'는 R 버전 4.3.2에서 작성되었습니다

shapewords <-  c("기차", "배", "버스", "비행기", "소방차/불자동차", "썰매", "오토바이", "유모차", "자전거", 
            "차/자동차", "택시", "트럭", "헬리콥터", "공", "로봇", "연필", "인형", "종이", "책", 
            "총", "크레용/크레파스", "개", "고양이", "곰", "나비", "다람쥐", "닭", "당나귀", 
            "돼지", "말", "물고기", "뱀", "벌", "부엉이", "비둘기", "사슴", "사자", "새", 
            "소", "송아지", "악어", "양", "여우", "염소", "원숭이", "쥐", "참새", "코끼리", 
            "토끼", "펭귄", "하마", "호랑이", "가방", "구두", "기저귀", "단추", "모자", 
            "목걸이", "바지", "목도리", "신/신발", "양말", "운동화", "잠바", "잠옷", "장갑", 
            "주머니", "치마", "코트/외투", "팬티", "허리띠", "계단", "냉장고", "목욕탕", "문", 
            "베개", "서랍", "세탁기", "소파", "옷장", "의자", "이불", "전자레인지", "지하철", 
            "창문", "침대", "텔레비전", "피아노", "도너츠", "떡", "콩", "고추", "귀", "다리", 
            "똥꼬", "머리", "목", "무릎", "발", "발목", "발톱", "배", "배꼽", "손", "손가락", 
            "얼굴", "입", "코", "턱", "팔", "허리", "가위", "그릇", "라디오", "망치", "못", 
            "병", "비누", "빗", "빗자루", "상자", "숟가락", "시계", "쓰레기통/휴지통", "안경", 
            "열쇠", "옷걸이", "우산", "쟁반", "전화", "접시", "젓가락", "지갑", "청소기", "칫솔", 
            "카메라", "칼", "컵", "포크", "휴지", "그네", "깃발", "미끄럼틀", "사다리", "시소", 
            "지붕")

shapewords -> word

shape$lexicon <- do.call(paste, c(shape[26:49], sep = “,”))

shape$subset <- sapply(strsplit(shape$lexicon, “,”), function(x) { common_words <- intersect(x, word) if (length(common_words) > 0) { paste(common_words, collapse = “,”) } else { “” } })

shape$prop <- sapply(1:nrow(shape), function(i) { lexicon_words <- unlist(strsplit(shape$lexicon[i], “,”)) non_empty_lexicon_words <- lexicon_words[lexicon_words != “”] subset_words <- unlist(strsplit(shape$subset[i], “,”))

# Calculate the proportion of words in subset relative to lexicon prop <- length(subset_words) / length(non_empty_lexicon_words)

# Return the proportion return(prop) })

shape$vsize <- sapply(1:nrow(shape), function(i) { lexicon_words <- unlist(strsplit(shape$lexicon[i], “,”)) non_empty_lexicon_words <- lexicon_words[lexicon_words != “”] subset_words <- unlist(strsplit(shape$subset[i], “,”))

# Calculate the proportion of words in subset relative to lexicon vsize <- length(non_empty_lexicon_words)

# Return the proportion return(vsize) })

momo <- write.csv(shape, file = “【유아용(18~36개월)】(Moma_Kipi) K M-B CDI 맥아더-베이츠 의사소통발달 평가(응답).xlsx”, fileEncoding = “UTF-8”, row.names = FALSE)


Create a vector containing words that at least 85% of the people judged to have shape bias. 


```r
  shapewords <-  c("기차", "배", "버스", "비행기", "소방차/불자동차", "썰매", "오토바이", "유모차", "자전거", 
            "차/자동차", "택시", "트럭", "헬리콥터", "공", "로봇", "연필", "인형", "종이", "책", 
            "총", "크레용/크레파스", "개", "고양이", "곰", "나비", "다람쥐", "닭", "당나귀", 
            "돼지", "말", "물고기", "뱀", "벌", "부엉이", "비둘기", "사슴", "사자", "새", 
            "소", "송아지", "악어", "양", "여우", "염소", "원숭이", "쥐", "참새", "코끼리", 
            "토끼", "펭귄", "하마", "호랑이", "가방", "구두", "기저귀", "단추", "모자", 
            "목걸이", "바지", "목도리", "신/신발", "양말", "운동화", "잠바", "잠옷", "장갑", 
            "주머니", "치마", "코트/외투", "팬티", "허리띠", "계단", "냉장고", "목욕탕", "문", 
            "베개", "서랍", "세탁기", "소파", "옷장", "의자", "이불", "전자레인지", "지하철", 
            "창문", "침대", "텔레비전", "피아노", "도너츠", "떡", "콩", "고추", "귀", "다리", 
            "똥꼬", "머리", "목", "무릎", "발", "발목", "발톱", "배", "배꼽", "손", "손가락", 
            "얼굴", "입", "코", "턱", "팔", "허리", "가위", "그릇", "라디오", "망치", "못", 
            "병", "비누", "빗", "빗자루", "상자", "숟가락", "시계", "쓰레기통/휴지통", "안경", 
            "열쇠", "옷걸이", "우산", "쟁반", "전화", "접시", "젓가락", "지갑", "청소기", "칫솔", 
            "카메라", "칼", "컵", "포크", "휴지", "그네", "깃발", "미끄럼틀", "사다리", "시소", 
            "지붕")

shapewords -> word

Read in the data that contains CDI responses.

library(readxl)

## Warning: 패키지 'readxl'는 R 버전 4.3.2에서 작성되었습니다

setwd("C:/Users/admin/Desktop/R/shape bias/new shape bias/1203")
momo_kipi <- read_excel("【유아용(18~36개월)】(Moma_Kipi) K M-B CDI 맥아더-베이츠 의사소통발달 평가(응답).xlsx")

## New names:
## • `` -> `...57`
## • `` -> `...58`
## • `` -> `...59`
## • `` -> `...60`
## • `` -> `...61`
## • `` -> `...62`
## • `` -> `...63`
## • `` -> `...64`
## • `` -> `...65`
## • `` -> `...66`
## • `` -> `...67`
## • `` -> `...68`
## • `` -> `...69`
## • `` -> `...70`
## • `` -> `...71`
## • `` -> `...72`
## • `` -> `...73`
## • `` -> `...74`
## • `` -> `...75`
## • `` -> `...76`
## • `` -> `...77`
## • `` -> `...78`
## • `` -> `...79`
## • `` -> `...80`
## • `` -> `...81`
## • `` -> `...82`
## • `` -> `...83`
## • `` -> `...84`
## • `` -> `...85`
## • `` -> `...86`
## • `` -> `...87`
## • `` -> `...88`
## • `` -> `...96`
## • `` -> `...97`
## • `` -> `...98`

seg4 <- read_excel("【유아용(18~36개월)】(Seg4)K M-B CDI 맥아더-베이츠 의사소통발달 평가(응답).xlsx")

## New names:
## • `` -> `...57`
## • `` -> `...58`
## • `` -> `...59`
## • `` -> `...60`
## • `` -> `...61`
## • `` -> `...62`
## • `` -> `...63`
## • `` -> `...64`
## • `` -> `...65`
## • `` -> `...66`
## • `` -> `...67`
## • `` -> `...68`
## • `` -> `...69`
## • `` -> `...70`
## • `` -> `...71`
## • `` -> `...72`
## • `` -> `...73`
## • `` -> `...74`
## • `` -> `...75`
## • `` -> `...76`
## • `` -> `...77`
## • `` -> `...78`
## • `` -> `...79`
## • `` -> `...80`
## • `` -> `...81`
## • `` -> `...82`
## • `` -> `...83`
## • `` -> `...84`
## • `` -> `...85`
## • `` -> `...86`
## • `` -> `...87`
## • `` -> `...88`

cdi_response <- read_excel("Entire CDI response Compile(21-10-29).xlsx")

## New names:
## • `만약 그렇다면, 구체적으로 적어주세요.` -> `만약 그렇다면, 구체적으로
##   적어주세요....22`
## • `만약 그렇다면, 구체적으로 적어주세요.` -> `만약 그렇다면, 구체적으로
##   적어주세요....24`
## • `만약 그렇다면, 구체적으로 적어주세요.` -> `만약 그렇다면, 구체적으로
##   적어주세요....40`

Examine which columns we need to process.

shape.moma <- read.csv("https://www.dropbox.com/scl/fi/aomwwff838jlc4tguz300/moma.csv?rlkey=hugeugfcahc5h9e4uo0f9no9q&dl=1", header=T)
shape.moma[,8] <- as.Date(shape.moma[,8], format = "%Y. %m. %d")
shape.moma[,9] <- as.Date(shape.moma[,9], format = "%Y. %m. %d")
shape.moma$month <- as.numeric(difftime(shape.moma[,9], shape.moma[,8], units = "days"))
colnames(shape.moma)

##   [1] "타임스탬프"                                                                                                                                                                                     
##   [2] "본.검사지를.복제..송신..출판..배포..방송.기타.방법에.의하여.영리목적으로.이용하거나.제3자에게.이용하게.하여서는.안됩니다."                                                                      
##   [3] "피검자의.개인정보는.검사결과.수집과.통보의.목적으로.활용되며..피검자의.사전동의.없이.개인정보를.외부에.제공하지.않습니다."                                                                      
##   [4] "나는.이.연구에.참여하는.것에.대하여.자발적으로.동의하며..어떠한.강제나.부당한.영향을.받지.않았습니다."                                                                                          
##   [5] "나는.이.연구에서.수집되는.나에.대한.정보는.연구.목적.이외에는.사용되지.않으며.현행.법률과.연구윤리심사위원회.규정이.허용하는.범위.내에서.연구자가.수집하고.처리한다는.것을.이해합니다."         
##   [6] "나는.언제라도.이.연구의.참여를.철회할.수.있고..이러한.결정이.나에게.어떠한.불이익도.되지.않을.것이라는.것을.알고.있습니다."                                                                     
##   [7] "아동.성별"                                                                                                                                                                                      
##   [8] "아동.생일"                                                                                                                                                                                      
##   [9] "검사.날짜"                                                                                                                                                                                      
##  [10] "보호자.연락처"                                                                                                                                                                                  
##  [11] "보호자.이메일"                                                                                                                                                                                  
##  [12] "아버지의.나이"                                                                                                                                                                                  
##  [13] "아버지의.직업"                                                                                                                                                                                  
##  [14] "아버지의.교육수준"                                                                                                                                                                              
##  [15] "아버지의.월별평균소득"                                                                                                                                                                          
##  [16] "어머니의.나이"                                                                                                                                                                                  
##  [17] "어머니의.직업"                                                                                                                                                                                  
##  [18] "어머니의.교육수준"                                                                                                                                                                              
##  [19] "어머니의.월별평균소득"                                                                                                                                                                          
##  [20] "주양육자는.어머니를.비롯하여.아동을.잘.알고.있는.사람이어야.합니다."                                                                                                                            
##  [21] "아동의.능력을.과대평가하지.않도록.합니다."                                                                                                                                                      
##  [22] "표현하지.않는.낱말을.표현했다고.하거나..많이.들려주었거나.아이에게.친숙한.물건이라고.표현했다고.잘못.체크하지.않도록.합니다."                                                                   
##  [23] "아동의.능력을.과소평가하지.않도록.합니다."                                                                                                                                                      
##  [24] "체크해야.할.낱말이.수백.개.이상이므로.뒤로.갈수록.어머니나.보호자가.다소.무성의하게.체크하는.경우가.있을.수.있습니다..그러므로.끝까지.아이를.위해.성심성의껏.체크할.수.있도록.합니다."          
##  [25] "주위가.너무.소란스럽거나.다른.일을.하면서.같이.하면.제대로.표시하기.힘들기.때문에.어머니나.주양육자가.방해받지.않고.조용한.분위기에서.아이의.낱말을.표시할.수.있도록.합니다."                   
##  [26] "X1..소리..11."                                                                                                                                                                                  
##  [27] "X2..탈.것..13."                                                                                                                                                                                 
##  [28] "X3..장난감.및.문구류..14."                                                                                                                                                                      
##  [29] "X4..동물..41."                                                                                                                                                                                  
##  [30] "X5..옷..20."                                                                                                                                                                                    
##  [31] "X6..가구.및.방안..21."                                                                                                                                                                          
##  [32] "X7..음식..58."                                                                                                                                                                                  
##  [33] "X8..신체부위..31."                                                                                                                                                                              
##  [34] "X9..가정용품..36."                                                                                                                                                                              
##  [35] "X10..외부사물..26."                                                                                                                                                                             
##  [36] "X11..일상생활..14."                                                                                                                                                                             
##  [37] "X12..장소..25."                                                                                                                                                                                 
##  [38] "X13..양..정도..14."                                                                                                                                                                             
##  [39] "X14..사람..33."                                                                                                                                                                                 
##  [40] "X15..의문사..11."                                                                                                                                                                               
##  [41] "X16..동사..150."                                                                                                                                                                                
##  [42] "X17..형용사..52."                                                                                                                                                                               
##  [43] "X18..끝맺는.말..15."                                                                                                                                                                            
##  [44] "X19..조사..12."                                                                                                                                                                                 
##  [45] "X20..연결하는.말..6."                                                                                                                                                                           
##  [46] "X21..위치..8."                                                                                                                                                                                  
##  [47] "X22..시간..17."                                                                                                                                                                                 
##  [48] "X23..대명사..7."                                                                                                                                                                                
##  [49] "X24..돕는.말..8."                                                                                                                                                                               
##  [50] "X1..우리.아이가.낱말을.붙여서.말합니까...예..이거.이뻐....물.줘.."                                                                                                                              
##  [51] "X2..1.에서..가끔....종종..중.하나에.표시하셨다면.다음에.답하십시오...1.에서.안함에.표시하셨다면..2.는.안하셔도.됩니다...우리.아이가.최근에.말한.문장표현.중.가장.긴.것.세.가지를.적어.주십시오."
##  [52] "X1..조사...가...는...도.를.명사.뒤에.붙인다..예를.들어..엄마가..아빠는..나도.처럼.조사를.붙여서.말한다."                                                                                        
##  [53] "X2....어...지...다...네.를.사용하여.어미를.변화시킨다..예를.들어..먹어..먹지..먹네....커..크지..크다..크네.처럼.어미변화를.시킨다."                                                             
##  [54] "X3....는...야.와.같은.어미를.낱말과.낱말을.연결하기.위해.적절하게.사용한다..예를.들어..먹는.거..먹어야.돼....가는.사람..가야.돼.에서처럼.표현한다."                                             
##  [55] "X4..사건이.일어나는.때와.연관시켜.어미..ㄴ.은..는..ㄹ.을.를.적절하게.사용한다..예를.들어..먹은.거..먹는.거..먹을.거....본.거..보는.거..볼.거.를.사용한다."                                      
##  [56] "X5...이..히..기..리.와.같은.피동형이나.사동형.접사를.사용한다..예를.들어..먹여..잡혔어..신겨.줘..울리네.와.같은.말을.사용한다."                                                                 
##  [57] "X"                                                                                                                                                                                              
##  [58] "X.1"                                                                                                                                                                                            
##  [59] "X.2"                                                                                                                                                                                            
##  [60] "X.3"                                                                                                                                                                                            
##  [61] "X.4"                                                                                                                                                                                            
##  [62] "X.5"                                                                                                                                                                                            
##  [63] "X.6"                                                                                                                                                                                            
##  [64] "X.7"                                                                                                                                                                                            
##  [65] "X.8"                                                                                                                                                                                            
##  [66] "X.9"                                                                                                                                                                                            
##  [67] "X.10"                                                                                                                                                                                           
##  [68] "X.11"                                                                                                                                                                                           
##  [69] "X.12"                                                                                                                                                                                           
##  [70] "X.13"                                                                                                                                                                                           
##  [71] "X.14"                                                                                                                                                                                           
##  [72] "X.15"                                                                                                                                                                                           
##  [73] "X.16"                                                                                                                                                                                           
##  [74] "X.17"                                                                                                                                                                                           
##  [75] "X.18"                                                                                                                                                                                           
##  [76] "X.19"                                                                                                                                                                                           
##  [77] "X.20"                                                                                                                                                                                           
##  [78] "X.21"                                                                                                                                                                                           
##  [79] "X.22"                                                                                                                                                                                           
##  [80] "X.23"                                                                                                                                                                                           
##  [81] "X.24"                                                                                                                                                                                           
##  [82] "X.25"                                                                                                                                                                                           
##  [83] "X.26"                                                                                                                                                                                           
##  [84] "X.27"                                                                                                                                                                                           
##  [85] "X.28"                                                                                                                                                                                           
##  [86] "X.29"                                                                                                                                                                                           
##  [87] "X.30"                                                                                                                                                                                           
##  [88] "X.31"                                                                                                                                                                                           
##  [89] "피검자는.아동을.잘.알고.있는.사람입니까."                                                                                                                                                       
##  [90] "아동의.능력을.과대평가하지.않았습니까."                                                                                                                                                         
##  [91] "표현하지.않는.낱말을.표현했다고.하거나..많이.들려주었거나.아이에게.친숙한.물건이라고.표현했다고.잘못.체크하지.않았습니까."                                                                      
##  [92] "아동의.능력을.과소평가하지.않았습니까."                                                                                                                                                         
##  [93] "끝까지.아이를.위해.성심성의껏.체크했습니까."                                                                                                                                                    
##  [94] "피검자는.방해받지.않고.조용한.분위기에서.아이의.낱말을.표시했습니까."                                                                                                                           
##  [95] "조선대학교.음성학연구실에서.진행되는.또.다른.연구에.참여할.의사가.있으십니까."                                                                                                                  
##  [96] "X.32"                                                                                                                                                                                           
##  [97] "X.33"                                                                                                                                                                                           
##  [98] "X.34"                                                                                                                                                                                           
##  [99] "아동.이름"                                                                                                                                                                                      
## [100] "month"

shape.moma$month <- shape.moma$month/30.5
shape.moma$month <- round(shape.moma$month)
# 0보다 적은 age 버리기
shape.moma <- shape.moma[shape.moma$month > 0,]

shape.moma <- shape.moma[1:dim(shape.moma)[1], c(99, 7, 8, 9, 10, 11, 12, 26:49, 100)]

colnames(shape.moma)[1] <- "child"

shape.moma -> shape

colnames(shape)

##  [1] "child"                     "아동.성별"                
##  [3] "아동.생일"                 "검사.날짜"                
##  [5] "보호자.연락처"             "보호자.이메일"            
##  [7] "아버지의.나이"             "X1..소리..11."            
##  [9] "X2..탈.것..13."            "X3..장난감.및.문구류..14."
## [11] "X4..동물..41."             "X5..옷..20."              
## [13] "X6..가구.및.방안..21."     "X7..음식..58."            
## [15] "X8..신체부위..31."         "X9..가정용품..36."        
## [17] "X10..외부사물..26."        "X11..일상생활..14."       
## [19] "X12..장소..25."            "X13..양..정도..14."       
## [21] "X14..사람..33."            "X15..의문사..11."         
## [23] "X16..동사..150."           "X17..형용사..52."         
## [25] "X18..끝맺는.말..15."       "X19..조사..12."           
## [27] "X20..연결하는.말..6."      "X21..위치..8."            
## [29] "X22..시간..17."            "X23..대명사..7."          
## [31] "X24..돕는.말..8."          "month"

shape$lexicon <- do.call(paste, c(shape[8:31], sep = ", "))
shape$objs <- do.call(paste, c(shape[9:17], sep = ", "))

shape$subset <- sapply(strsplit(shape$lexicon, ", "), function(x) {
  common_words <- intersect(x, word)
  if (length(common_words) > 0) {
    paste(common_words, collapse = ", ")
  } else {
    ""
  }
})
shape$subset_objs <- sapply(strsplit(shape$objs, ", "), function(x) {
  common_words <- intersect(x, word)
  if (length(common_words) > 0) {
    paste(common_words, collapse = ", ")
  } else {
    ""
  }
})

shape$prop_obj <- sapply(1:nrow(shape), function(i) { objs_words <- unlist(strsplit(shape$objs[i], “,”)) non_empty_objs_words <- objs_words[objs_words != “”] subset_objs_words <- unlist(strsplit(shape$subset_objs[i], “,”))

# Calculate the proportion of words in subset relative to lexicon prop_obj <- length(subset_objs_words) / length(non_empty_objs_words)

# Return the proportion return(prop_obj) })

# Calculate the proportion of words in subset relative to lexicon prop <- length(subset_words) / length(non_empty_lexicon_words)

# Return the proportion return(prop) })

# Apply strsplit function to split words and calculate proportion
shape$prop <- sapply(1:nrow(shape), function(i) {
  lexicon_words <- unlist(strsplit(shape$lexicon[i], ", "))
  non_empty_lexicon_words <- lexicon_words[lexicon_words != ""]
  subset_words <- unlist(strsplit(shape$subset[i], ", "))
  
  # Calculate the proportion of words in subset relative to lexicon
  prop <- length(subset_words) / length(non_empty_lexicon_words)
  
  # Return the proportion
  return(prop)
})

#repeat for object nouns only (9 categories of nouns in CDI)
shape$prop_obj <- sapply(1:nrow(shape), function(i) {
  objs_words <- unlist(strsplit(shape$objs[i], ", "))
  non_empty_objs_words <- objs_words[objs_words != ""]
  subset_objs_words <- unlist(strsplit(shape$subset_objs[i], ", "))
  
  # Calculate the proportion of words in subset relative to lexicon
  prop_obj <- length(subset_objs_words) / length(non_empty_objs_words)
  
  # Return the proportion
  return(prop_obj)
})

colnames(shape)

##  [1] "child"                     "아동.성별"                
##  [3] "아동.생일"                 "검사.날짜"                
##  [5] "보호자.연락처"             "보호자.이메일"            
##  [7] "아버지의.나이"             "X1..소리..11."            
##  [9] "X2..탈.것..13."            "X3..장난감.및.문구류..14."
## [11] "X4..동물..41."             "X5..옷..20."              
## [13] "X6..가구.및.방안..21."     "X7..음식..58."            
## [15] "X8..신체부위..31."         "X9..가정용품..36."        
## [17] "X10..외부사물..26."        "X11..일상생활..14."       
## [19] "X12..장소..25."            "X13..양..정도..14."       
## [21] "X14..사람..33."            "X15..의문사..11."         
## [23] "X16..동사..150."           "X17..형용사..52."         
## [25] "X18..끝맺는.말..15."       "X19..조사..12."           
## [27] "X20..연결하는.말..6."      "X21..위치..8."            
## [29] "X22..시간..17."            "X23..대명사..7."          
## [31] "X24..돕는.말..8."          "month"                    
## [33] "lexicon"                   "objs"                     
## [35] "subset"                    "subset_objs"              
## [37] "prop"                      "prop_obj"

shape$vsize <- sapply(1:nrow(shape), function(i) {
  lexicon_words <- unlist(strsplit(shape$lexicon[i], ", "))
  non_empty_lexicon_words <- lexicon_words[lexicon_words != ""]
  subset_words <- unlist(strsplit(shape$subset[i], ", "))
  
  # Calculate the proportion of words in subset relative to lexicon
  vsize <- length(non_empty_lexicon_words)
  
  # Return the proportion
  return(vsize)
})

colnames(shape)

##  [1] "child"                     "아동.성별"                
##  [3] "아동.생일"                 "검사.날짜"                
##  [5] "보호자.연락처"             "보호자.이메일"            
##  [7] "아버지의.나이"             "X1..소리..11."            
##  [9] "X2..탈.것..13."            "X3..장난감.및.문구류..14."
## [11] "X4..동물..41."             "X5..옷..20."              
## [13] "X6..가구.및.방안..21."     "X7..음식..58."            
## [15] "X8..신체부위..31."         "X9..가정용품..36."        
## [17] "X10..외부사물..26."        "X11..일상생활..14."       
## [19] "X12..장소..25."            "X13..양..정도..14."       
## [21] "X14..사람..33."            "X15..의문사..11."         
## [23] "X16..동사..150."           "X17..형용사..52."         
## [25] "X18..끝맺는.말..15."       "X19..조사..12."           
## [27] "X20..연결하는.말..6."      "X21..위치..8."            
## [29] "X22..시간..17."            "X23..대명사..7."          
## [31] "X24..돕는.말..8."          "month"                    
## [33] "lexicon"                   "objs"                     
## [35] "subset"                    "subset_objs"              
## [37] "prop"                      "prop_obj"                 
## [39] "vsize"

shape -> shape.with.cdi.responses
# write.csv(shape, file = "shape_combined.csv", fileEncoding = "UTF-8", row.names = FALSE)
shape <- shape[,-c(8:31, 33, 34, 35, 36)]

read.csv("https://www.dropbox.com/s/ry39ofiykjhvqw7/female_WG_CDI_comprehension_norms.csv?dl=1",header=T)->norm.wg.f
read.csv("https://www.dropbox.com/s/dqruubaea7wl2d5/female_WS_CDI_comprehension_norms.csv?dl=1",header=T)->norm.ws.f

read.csv("https://www.dropbox.com/s/1e1hd49r6djg3bq/male_WG_CDI_comprehension_norms.csv?dl=1",header=T)->norm.wg.m
read.csv("https://www.dropbox.com/s/wu7w47vvooo3waj/male_WS_CDI_comprehension_norms.csv?dl=1",header=T)->norm.ws.m

norm.f<-rbind(norm.wg.f,norm.ws.f)
norm.m<-rbind(norm.wg.m,norm.ws.m)

#norm.ws.f -> norm.f
#norm.ws.m -> norm.m

norm.m$X

##  [1] "8months"  "9months"  "10months" "11months" "12months" "13months"
##  [7] "14months" "15months" "16months" "17months" "18months" "19months"
## [13] "20months" "21months" "22months" "23months" "24months" "25months"
## [19] "26months" "27months" "28months" "29months" "30months" "31months"
## [25] "32months" "33months" "34months" "35months" "36months"

colnames(shape)[2] <- "gender"
shape$gender <- ifelse(shape$gender == "남", "M","F")

table(is.na(shape$month))

## 
## FALSE 
##    40

shape <- shape %>% filter(!is.na(month))
table(is.na(shape$vsize))

## 
## FALSE 
##    40

shape -> shape.simple
shape <- shape %>% 
  rename(phone = "보호자.연락처",email = "보호자.이메일") 

table(shape$month)

## 
##  3 17 18 19 20 21 22 23 24 25 27 28 29 30 48 
##  1  1  4  1  5  4  2  3  5  1  3  1  2  6  1

shape <- shape %>% filter(month > 7 | month < 37)

#퍼센타일 계산하기
eachPercentile <- NULL
for (baby in 1: length(shape[,1])) { #1부터 끝까지아이들을 하나씩 불러온다.
  age=shape[baby,"month"] # 나이를 불러온다
  gender=shape[baby,"gender"] #성별
  vsize=shape[baby,"vsize"] #vocab size
  if (gender =="M"){ #아이가 남자인경우
    lookup.t = length(norm.m[age-7,])  #상위 7개 데이터 이후 개월에 따른 단어 인지 개수가 기록되어 있음, 길이는 100; #lookup table
    lookup.t.no = norm.m[age-7,2:lookup.t] #개월 수를 나타내는 X를 제거하고 추출  #lookup table number only
  } 
  if (gender =="F"){ #아이가 여자인경우
    lookup.t = length(norm.f[age-7,]) #해당 아이의 개월에 맞는 언어 인지 단어 수가 적혀있는 행의 갯수 (100) #lookup table
    lookup.t.no = norm.f[age-7,2:lookup.t] #개월 수를 나타내는 X를 제거하고 추출 #lookup table number only
    } 
  if (age > 36 || age < 8){ #나이가 8미만 36초과인 경우 NA처리한다.
    rank = NA
  } else {
    for (k in 1:length(lookup.t.no)-1){ #k는 1부터 98 (99-1)
        if(vsize < lookup.t.no[1]){
          #print(paste(baby,vsize,sep=";"))
        rank=0
        break
     # } else if ((vsize >= lookup.t.no[k])==TRUE && (vsize < lookup.t.no[k+1])==TRUE){ #아이가 단어를 이해하는 갯수 (vsize)가 k번째 lookup.t.no과 k+1번째 lookup.t.no사이 값이라면
      } else if (vsize >= lookup.t.no[k] && vsize < lookup.t.no[k+1]){ #아이가 단어를 이해하는 갯수 (vsize)가 k번째 lookup.t.no과 k+1번째 lookup.t.no사이 값이라면
        rank=k    #rank는 k번째가 되고 (퍼센타일이 정해진다.)
        break #for문을 멈춘다. #아이에게 맞는 퍼센타일을 찾음
      } else {
        rank = 99 #아이의 퍼센타일을 찾지 못한 경우, rank는 99가 된다. 99이상의 뛰어난 아이임
      }
    }
  }
  
  col_percentile = rank   #col_percentile은 아이에게 맞는 퍼센타일 값이다.
  eachPercentile <-  c(eachPercentile, col_percentile)   #각 아이별로 추출한 퍼센타일 값을 eachPercentile에 넣어준다.
}

shape <- cbind(shape,eachPercentile)

eachPercentile <- NULL
for (baby in 1: length(shape[,1])) { #1부터 끝까지아이들을 하나씩 불러온다.
  age=shape[baby,"month"] # 나이를 불러온다
  gender=shape[baby,"gender"] #성별
  vsize=shape[baby,"vsize"] #vocab size
  if (gender =="M"){ #아이가 남자인경우
    lookup.t = length(norm.m[age-7,])  #상위 7개 데이터 이후 개월에 따른 단어 인지 개수가 기록되어 있음, 길이는 100; #lookup table
    lookup.t.no = norm.m[age-7,2:lookup.t] #개월 수를 나타내는 X를 제거하고 추출  #lookup table number only
  } 
  if (gender =="F"){ #아이가 여자인경우
    lookup.t = length(norm.f[age-7,]) #해당 아이의 개월에 맞는 언어 인지 단어 수가 적혀있는 행의 갯수 (100) #lookup table
    lookup.t.no = norm.f[age-7,2:lookup.t] #개월 수를 나타내는 X를 제거하고 추출 #lookup table number only
    } 
  if (age > 36 || age < 8){ #나이가 8미만 36초과인 경우 NA처리한다.
    rank = NA
  } else {
    for (k in 1:length(lookup.t.no)-1){ #k는 1부터 98 (99-1)
        if(vsize < lookup.t.no[1]){
          #print(paste(baby,vsize,sep=";"))
        rank=0
        break
     # } else if ((vsize >= lookup.t.no[k])==TRUE && (vsize < lookup.t.no[k+1])==TRUE){ #아이가 단어를 이해하는 갯수 (vsize)가 k번째 lookup.t.no과 k+1번째 lookup.t.no사이 값이라면
      } else if (vsize >= lookup.t.no[k] && vsize < lookup.t.no[k+1]){ #아이가 단어를 이해하는 갯수 (vsize)가 k번째 lookup.t.no과 k+1번째 lookup.t.no사이 값이라면
        rank=k    #rank는 k번째가 되고 (퍼센타일이 정해진다.)
        break #for문을 멈춘다. #아이에게 맞는 퍼센타일을 찾음
      } else {
        rank = 99 #아이의 퍼센타일을 찾지 못한 경우, rank는 99가 된다. 99이상의 뛰어난 아이임
      }
    }
  }
  
  col_percentile = rank   #col_percentile은 아이에게 맞는 퍼센타일 값이다.
  eachPercentile <-  c(eachPercentile, col_percentile)   #각 아이별로 추출한 퍼센타일 값을 eachPercentile에 넣어준다.
}

Part III

Identify unique participants based on their email.

unique <- shape[!duplicated(shape$email), ]

Draw a graph showing the effects of the proportion of object nouns on the percentile scores in each age group. What do you observe? Do you see that the pattern somewhat changes when the children turns 24 months?

colnames(unique)

##  [1] "child"          "gender"         "아동.생일"      "검사.날짜"     
##  [5] "phone"          "email"          "아버지의.나이"  "month"         
##  [9] "prop"           "prop_obj"       "vsize"          "eachPercentile"

library(ggplot2)
library(dplyr)
ggplot(unique, mapping = aes(x=prop_obj, y=eachPercentile, group = gender) ) +
  geom_point(mapping = aes(alpha = 0.8))+
  geom_smooth(method = "lm", se = FALSE) +
  facet_wrap(~month,scales = "free") + # nrow = #
  labs(title = "Proportion Effect on Percentile Scores Across the Age",
       x = "proportion of shape-biased words",
       y = "percentile")+
  xlim (0, 1)+
  ylim(0, 100)

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).

## Warning: Removed 2 rows containing missing values (`geom_point()`).

Same graph, with ages filtered and standard error of means indicated.

#filter age and remove children with vsize under 5
unique <- unique%>%
  filter(month >17, month <25)%>%
  filter(vsize >5)

ggplot(unique, aes(x = prop_obj, y = eachPercentile)) +
  geom_point(position ="jitter") +
  geom_smooth(method = "lm", formula = y ~ x  , se = TRUE) +
  facet_wrap(~month, scales = "free") +
  labs(title = "Proportion Effect on Percentile Scores Across the Age",
       x = "shape proportion",
       y = "percentile")+
  xlim (0, 1)+
  ylim(0, 100)

## Warning in qt((1 - level)/2, df): NaN이 생성되었습니다

## Warning in qt((1 - level)/2, df): NaN이 생성되었습니다

## Warning in max(ids, na.rm = TRUE): max에 전달되는 인자들 중 누락이 있어 -Inf를
## 반환합니다

## Warning in max(ids, na.rm = TRUE): max에 전달되는 인자들 중 누락이 있어 -Inf를
## 반환합니다

## Warning in max(ids, na.rm = TRUE): max에 전달되는 인자들 중 누락이 있어 -Inf를
## 반환합니다

## Warning in max(ids, na.rm = TRUE): max에 전달되는 인자들 중 누락이 있어 -Inf를
## 반환합니다

## Warning in max(ids, na.rm = TRUE): max에 전달되는 인자들 중 누락이 있어 -Inf를
## 반환합니다

ShapeBias

Eonsuk Ko

2023-11-01

load the necessary packages

Part III