과제

library(readxl)

read_excel("/Users/andahui/Library/Containers/com.microsoft.Excel/Data/Downloads/mlu.xls", sheet = 2) ->mlu

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

mlu %>% filter(utterances_mlu <= 500) ->test
test

## # A tibble: 5 × 8
##   File        age   utterances_mlu words_mlu DurationTime DurationSec Types_freq
##   <chr>       <chr>          <dbl>     <dbl> <chr>              <dbl>      <dbl>
## 1 27_A0P06M.… A0               470       813 00:12:07             727        378
## 2 28_A0P07M.… A0               371       976 00:11:53             713        419
## 3 15_A1P05F.… A1               381      1046 00:15:14             914        555
## 4 12_A2P04M.… A2               481      1672 00:17:01            1021        921
## 5 18_A2P07M.… A2               323       890 00:08:47             527        476
## # ℹ 1 more variable: Token_freq <dbl>

dim(test)[1]

## [1] 5

5명입니다.

mlu %>% select(-DurationTime, -DurationSec) ->mlu_a

mlu$mlu_wu<- (mlu$words_mlu/mlu$utterances_mlu)

mlu %>% group_by(age)->mlu_age

mlu_age %>% summarize(mean(mlu_wu)) ->mlu_sum
mlu_sum

## # A tibble: 3 × 2
##   age   `mean(mlu_wu)`
##   <chr>          <dbl>
## 1 A0              2.50
## 2 A1              2.59
## 3 A2              2.99

mlu$mlu_tt <- (mlu$Token_freq/mlu$Types_freq)

mlu_age %>% summarize(mean(mlu$mlu_tt))

## # A tibble: 3 × 2
##   age   `mean(mlu$mlu_tt)`
##   <chr>              <dbl>
## 1 A0                  2.65
## 2 A1                  2.65
## 3 A2                  2.65

과제

2023-10-11