1.엑셀파일 불러오기
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
mlu<-read_excel("mlu.xlsx")
2.500문장 이하를 말한 엄마는 몇명인가
mlu %>% filter(utterances_mlu<=500)
## # A tibble: 5 x 8
## File age utterances_mlu words_mlu DurationTime DurationSec
## <chr> <chr> <dbl> <dbl> <dttm> <dbl>
## 1 27_A~ A0 470 813 1899-12-31 00:12:07 727
## 2 28_A~ A0 371 976 1899-12-31 00:11:53 713
## 3 15_A~ A1 381 1046 1899-12-31 00:15:14 974
## 4 12_A~ A2 481 1672 1899-12-31 00:17:01 1021
## 5 18_A~ A2 323 890 1899-12-31 00:08:47 527
## # ... with 2 more variables: Types_freq <dbl>, Token_freq <dbl>
3.DurationTime 과 DurationSec 는 제외하고 데이터 프레임을 다시 만들어서 새로운 이름으로 저장
mlu %>% select(-DurationTime, -DurationSec)->mlu_new
4.나이대 별로 평균 MLU
mlu_new$mlu<-(mlu_new$words_mlu/mlu_new$utterances_mlu)#mlu_new에 mlu파생변수 생성
mlu_new %>% group_by(age) %>% summarise(mean_mlu=mean(mlu))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 2
## age mean_mlu
## <chr> <dbl>
## 1 A0 2.50
## 2 A1 2.59
## 3 A2 2.99
5.각 그룹별로 token/type 비율의 평균
mlu_new$ratio<-(mlu_new$Token_freq/mlu_new$Types_freq)
mlu_new %>% group_by(age) %>% summarise(mean_ratio=mean(ratio))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 2
## age mean_ratio
## <chr> <dbl>
## 1 A0 2.41
## 2 A1 2.74
## 3 A2 2.66