library(readxl)
read_excel("/Users/andahui/Library/Containers/com.microsoft.Excel/Data/Downloads/mlu.xls", sheet = 2) ->mlu
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mlu %>% filter(utterances_mlu <= 500) ->test
test
## # A tibble: 5 × 8
## File age utterances_mlu words_mlu DurationTime DurationSec Types_freq
## <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 27_A0P06M.… A0 470 813 00:12:07 727 378
## 2 28_A0P07M.… A0 371 976 00:11:53 713 419
## 3 15_A1P05F.… A1 381 1046 00:15:14 914 555
## 4 12_A2P04M.… A2 481 1672 00:17:01 1021 921
## 5 18_A2P07M.… A2 323 890 00:08:47 527 476
## # ℹ 1 more variable: Token_freq <dbl>
dim(test)[1]
## [1] 5
5명입니다.
mlu %>% select(-DurationTime, -DurationSec) ->mlu_a
mlu$mlu_wu<- (mlu$words_mlu/mlu$utterances_mlu)
mlu %>% group_by(age)->mlu_age
mlu_age %>% summarize(mean(mlu_wu)) ->mlu_sum
mlu_sum
## # A tibble: 3 × 2
## age `mean(mlu_wu)`
## <chr> <dbl>
## 1 A0 2.50
## 2 A1 2.59
## 3 A2 2.99
mlu$mlu_tt <- (mlu$Token_freq/mlu$Types_freq)
mlu_age %>% summarize(mean(mlu$mlu_tt))
## # A tibble: 3 × 2
## age `mean(mlu$mlu_tt)`
## <chr> <dbl>
## 1 A0 2.65
## 2 A1 2.65
## 3 A2 2.65