mlu파일 분석

Q1.

library(readxl)
library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
mlu_data <- read_excel("C:\\Users\\user\\Documents\\20220124\\mlu.xls", sheet=2)
mlu_data_new <- mlu_data
mlu_data_new <- rename(mlu_data_new,utterances=utterances_mlu)
mlu_data_new <- rename(mlu_data_new,words=words_mlu)

Q2.

mlu_data_new %>% filter(words<=500)->test
dim(test)
## [1] 0 8

Q3.

df_mlu <- mlu_data_new %>% select(File,age,utterances,words,Types_freq,Token_freq)

Q4.

df_mlu$mlu <- df_mlu$words/df_mlu$utterances
library(dplyr)
df_mlu %>% 
group_by(age) %>%
summarise(mean_mlu = mean(mlu))
## # A tibble: 3 × 2
##   age   mean_mlu
##   <chr>    <dbl>
## 1 A0        2.50
## 2 A1        2.59
## 3 A2        2.99

Q5.

df_mlu$average <- df_mlu$Token_freq/df_mlu$Types_freq
df_mlu %>%  
group_by(age) %>% 
summarize(mean_average = mean(average))
## # A tibble: 3 × 2
##   age   mean_average
##   <chr>        <dbl>
## 1 A0            2.57
## 2 A1            2.74
## 3 A2            2.66