data 분석 그래프_0430과제

1. 데이터 파일 불러오기, library 실행

d_class <- read.table("C:\\R_project\\rproject_data\\all_data.txt", sep="\t", header = TRUE)
d_class_agn <-d_class
head(d_class)
##   filename phoneme duration item height  voice position speed subj
## 1   tack_f       T   45.338 tack    low -voice    final  fast   F1
## 2   tack_f     AE1  150.627 tack    low -voice    final  fast   F1
## 3   tack_f       K   88.059 tack    low -voice    final  fast   F1
## 4  tack_f1       T   47.490 tack    low -voice    final  fast   F1
## 5  tack_f1     AE1  148.429 tack    low -voice    final  fast   F1
## 6  tack_f1       K  110.553 tack    low -voice    final  fast   F1
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

2. 결측치 확인 및 제거

table(is.na(d_class$speed))
## 
## FALSE 
##  4542
table(is.na(d_class$voice))
## 
## FALSE 
##  4542
table(is.na(d_class$duration))
## 
## FALSE 
##  4542
table(is.na(d_class$position))
## 
## FALSE 
##  4542

-> 데이터에 결측치가 없기에 전처리를 하지 않아도 된다.

3. Speaking rate

1) vowel duration_speaking rate

vowel_speaking_rate <- d_class %>% 
  filter(phoneme %in% (c("AE1","EH1","IH1"))) %>% 
  group_by(voice, speed) %>% 
  summarise(vowel_duration=mean(duration))
## `summarise()` has grouped output by 'voice'. You can override using the `.groups` argument.
ggplot(data = vowel_speaking_rate, aes(x=speed, y=vowel_duration, fill=voice))+geom_col(position="dodge")

2) coda duration_speaking rate

coda_speaking_rate <- d_class %>%
  filter(phoneme %in% (c("G","K","T"))) %>%
  group_by(voice,speed) %>%
  summarise(coda_duration=mean(duration))
## `summarise()` has grouped output by 'voice'. You can override using the `.groups` argument.
ggplot(data = coda_speaking_rate, aes(x=speed, y=coda_duration, fill=voice))+geom_col(position="dodge")

4. Position

1) vowel duration_position

vowel_position <- d_class %>% 
  filter(phoneme %in% (c("AE1","EH1","IH1"))) %>% 
  group_by(voice, position) %>% 
  summarise(vowel_duration=mean(duration))
## `summarise()` has grouped output by 'voice'. You can override using the `.groups` argument.
ggplot(data = vowel_position, aes(x=position, y=vowel_duration, fill=voice))+geom_col(position="dodge")

2) coda duration_position

coda_position <- d_class %>% 
  filter(phoneme %in% (c("G","K","T"))) %>% 
  group_by(voice, position) %>% 
  summarise(vowel_duration=mean(duration))
## `summarise()` has grouped output by 'voice'. You can override using the `.groups` argument.
ggplot(data = coda_position, aes(x=position, y=vowel_duration, fill=voice))+geom_col(position="dodge")

5. Vowel height

1) vowel duration_height

vowel_height <- d_class %>% 
  filter(phoneme %in% (c("AE1","EH1","IH1"))) %>% 
  group_by(height, voice) %>% 
  summarise(vowel_duration=mean(duration))
## `summarise()` has grouped output by 'height'. You can override using the `.groups` argument.
ggplot(data=vowel_height, aes(x=height, y=vowel_duration, fill=voice))+geom_col(position="dodge")

2) coda duration_height

coda_height <- d_class %>% 
  filter(phoneme %in% (c("G","K","T"))) %>% 
  group_by(height, voice) %>% 
  summarise(vowel_duration=mean(duration))
## `summarise()` has grouped output by 'height'. You can override using the `.groups` argument.
ggplot(data=coda_height, aes(x=height, y=vowel_duration, fill=voice))+geom_col(position="dodge")