dd.utf8

mlu

1.엑셀파일 불러오기

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readxl)

mlu<-read_excel("mlu.xlsx")

2.500문장 이하를 말한 엄마는 몇명인가

mlu %>% filter(utterances_mlu<=500)

## # A tibble: 5 x 8
##   File  age   utterances_mlu words_mlu DurationTime        DurationSec
##   <chr> <chr>          <dbl>     <dbl> <dttm>                    <dbl>
## 1 27_A~ A0               470       813 1899-12-31 00:12:07         727
## 2 28_A~ A0               371       976 1899-12-31 00:11:53         713
## 3 15_A~ A1               381      1046 1899-12-31 00:15:14         974
## 4 12_A~ A2               481      1672 1899-12-31 00:17:01        1021
## 5 18_A~ A2               323       890 1899-12-31 00:08:47         527
## # ... with 2 more variables: Types_freq <dbl>, Token_freq <dbl>

3.DurationTime 과 DurationSec 는 제외하고 데이터 프레임을 다시 만들어서 새로운 이름으로 저장

mlu %>% select(-DurationTime, -DurationSec)->mlu_new

4.나이대 별로 평균 MLU

mlu_new$mlu<-(mlu_new$words_mlu/mlu_new$utterances_mlu)#mlu_new에 mlu파생변수 생성

mlu_new %>% group_by(age) %>% summarise(mean_mlu=mean(mlu))

## `summarise()` ungrouping output (override with `.groups` argument)

## # A tibble: 3 x 2
##   age   mean_mlu
##   <chr>    <dbl>
## 1 A0        2.50
## 2 A1        2.59
## 3 A2        2.99

5.각 그룹별로 token/type 비율의 평균

mlu_new$ratio<-(mlu_new$Token_freq/mlu_new$Types_freq)

mlu_new %>% group_by(age) %>% summarise(mean_ratio=mean(ratio))

## `summarise()` ungrouping output (override with `.groups` argument)

## # A tibble: 3 x 2
##   age   mean_ratio
##   <chr>      <dbl>
## 1 A0          2.41
## 2 A1          2.74
## 3 A2          2.66