library(readxl)
mlu <- read_excel("mlu.xls", sheet = "mlu_timedur_freq_cds")
summary(mlu)
## File age utterances_mlu words_mlu
## Length:35 Length:35 Min. :323.0 Min. : 813
## Class :character Class :character 1st Qu.:561.0 1st Qu.:1368
## Mode :character Mode :character Median :621.0 Median :1716
## Mean :631.8 Mean :1710
## 3rd Qu.:716.0 3rd Qu.:2060
## Max. :890.0 Max. :2766
##
## DurationTime DurationSec Types_freq Token_freq
## Length:35 Min. : 527 Min. : 378.0 Min. : 832
## Class :character 1st Qu.: 924 1st Qu.: 567.5 1st Qu.:1446
## Mode :character Median :1060 Median : 694.0 Median :1798
## Mean :1086 Mean : 669.1 Mean :1778
## 3rd Qu.:1246 3rd Qu.: 775.5 3rd Qu.:2134
## Max. :1762 Max. :1014.0 Max. :2827
## NA's :1
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mlu_500 <- mlu %>% filter(utterances_mlu <= 500)
length(mlu_500)
## [1] 8
그러므로, 주어진 녹음 시간 동안 500 문장 이하를 말한 엄마는 총 8명입니다.
mlu_new <- as.data.frame(mlu %>% select(-DurationTime, -DurationSec))
summary(mlu_new)
## File age utterances_mlu words_mlu
## Length:35 Length:35 Min. :323.0 Min. : 813
## Class :character Class :character 1st Qu.:561.0 1st Qu.:1368
## Mode :character Mode :character Median :621.0 Median :1716
## Mean :631.8 Mean :1710
## 3rd Qu.:716.0 3rd Qu.:2060
## Max. :890.0 Max. :2766
## Types_freq Token_freq
## Min. : 378.0 Min. : 832
## 1st Qu.: 567.5 1st Qu.:1446
## Median : 694.0 Median :1798
## Mean : 669.1 Mean :1778
## 3rd Qu.: 775.5 3rd Qu.:2134
## Max. :1014.0 Max. :2827
library(dplyr)
mlu_new <- dplyr::rename(mlu_new, utterances= utterances_mlu)
mlu_new <- dplyr::rename(mlu_new, words = words_mlu)
mlu_new$MLU <- (mlu_new$words/mlu_new$utterances)
summary(mlu_new)
## File age utterances words
## Length:35 Length:35 Min. :323.0 Min. : 813
## Class :character Class :character 1st Qu.:561.0 1st Qu.:1368
## Mode :character Mode :character Median :621.0 Median :1716
## Mean :631.8 Mean :1710
## 3rd Qu.:716.0 3rd Qu.:2060
## Max. :890.0 Max. :2766
## Types_freq Token_freq MLU
## Min. : 378.0 Min. : 832 Min. :1.730
## 1st Qu.: 567.5 1st Qu.:1446 1st Qu.:2.447
## Median : 694.0 Median :1798 Median :2.745
## Mean : 669.1 Mean :1778 Mean :2.696
## 3rd Qu.: 775.5 3rd Qu.:2134 3rd Qu.:2.916
## Max. :1014.0 Max. :2827 Max. :3.476
mlu_new %>% group_by(age) %>%
summarise(mean_MLU = mean(MLU))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 2
## age mean_MLU
## <chr> <dbl>
## 1 A0 2.50
## 2 A1 2.59
## 3 A2 2.99
mlu_new %>% group_by(age) %>%
summarise(mean_ratio = mean(Token_freq/Types_freq))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 2
## age mean_ratio
## <chr> <dbl>
## 1 A0 2.57
## 2 A1 2.74
## 3 A2 2.66