mlu파일 분석
Q1.
library(readxl)
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mlu_data <- read_excel("C:\\Users\\user\\Documents\\20220124\\mlu.xls", sheet=2)
mlu_data_new <- mlu_data
mlu_data_new <- rename(mlu_data_new,utterances=utterances_mlu)
mlu_data_new <- rename(mlu_data_new,words=words_mlu)
Q2.
mlu_data_new %>% filter(words<=500)->test
dim(test)
## [1] 0 8
Q3.
df_mlu <- mlu_data_new %>% select(File,age,utterances,words,Types_freq,Token_freq)
Q4.
df_mlu$mlu <- df_mlu$words/df_mlu$utterances
library(dplyr)
df_mlu %>%
group_by(age) %>%
summarise(mean_mlu = mean(mlu))
## # A tibble: 3 × 2
## age mean_mlu
## <chr> <dbl>
## 1 A0 2.50
## 2 A1 2.59
## 3 A2 2.99
Q5.
df_mlu$average <- df_mlu$Token_freq/df_mlu$Types_freq
df_mlu %>%
group_by(age) %>%
summarize(mean_average = mean(average))
## # A tibble: 3 × 2
## age mean_average
## <chr> <dbl>
## 1 A0 2.57
## 2 A1 2.74
## 3 A2 2.66