dat <- read.csv("https://bit.ly/2vQXaXL")
Is it true that word.dur depends on speaker?
summary(dat)
## speaker index word time
## brs02:163 Min. : 1.00 d\303\266gg : 19 Min. : 0.5972
## bte03:157 1st Qu.: 41.00 detta : 15 1st Qu.:154.0102
## jj04 :151 Median : 82.50 dult : 15 Median :285.2568
## shg05:160 Mean : 83.08 feits : 15 Mean :304.2501
## tt01 :175 3rd Qu.:124.75 feitt : 15 3rd Qu.:423.7956
## Max. :181.00 f\303\246ddi: 15 Max. :868.1988
## (Other) :712
## word.dur voicing.dur vowel.dur cluster.dur
## Min. :229.5 Min. : 37.98 Min. : 22.78 Min. : 41.46
## 1st Qu.:398.4 1st Qu.: 90.70 1st Qu.: 71.14 1st Qu.:161.62
## Median :461.9 Median :113.25 Median : 84.77 Median :192.57
## Mean :456.7 Mean :128.50 Mean : 86.68 Mean :190.10
## 3rd Qu.:514.7 3rd Qu.:162.57 3rd Qu.: 99.31 3rd Qu.:221.91
## Max. :728.4 Max. :322.08 Max. :214.48 Max. :334.19
##
## spreading.dur sonorant.dur closure.dur vor
## Min. : 0.00 Min. : 43.74 Min. : 0.00 Min. : 79.44
## 1st Qu.: 5.95 1st Qu.: 99.52 1st Qu.: 77.90 1st Qu.:240.77
## Median : 66.40 Median :118.18 Median : 96.26 Median :276.45
## Mean : 63.11 Mean :121.77 Mean :107.24 Mean :276.73
## 3rd Qu.: 99.79 3rd Qu.:140.33 3rd Qu.:125.52 3rd Qu.:311.11
## Max. :196.83 Max. :242.61 Max. :272.54 Max. :450.57
## NA's :250 NA's :247 NA's :16 NA's :16
## voffr mor cond_no
## Min. : 24.57 Min. :131.2 Min. : 1.0
## 1st Qu.:114.68 1st Qu.:180.9 1st Qu.:15.0
## Median :149.50 Median :204.1 Median :31.5
## Mean :150.73 Mean :207.6 Mean :30.9
## 3rd Qu.:187.24 3rd Qu.:230.9 3rd Qu.:46.0
## Max. :298.98 Max. :334.2 Max. :60.0
## NA's :16 NA's :263
## ipa cons1 vowel height
## t\305\223kk : 19 asp :304 e :167 diph: 99
## celta : 15 fri : 15 a :160 high:217
## cel\314\245ta : 15 nasp:133 o :132 low :160
## cetta : 15 no : 94 i : 97 mid :330
## c\312\260empa : 15 vls :142 \312\217: 97
## c\312\260emt\311\252: 15 voi :118 ou : 40
## (Other) :712 (Other) :113
## anteroposterior roundness consonant manner
## back :172 round :300 \312\260t:127 geminate: 15
## front:634 unrounded:506 tt : 80 lateral : 84
## \312\260p: 72 nasal :189
## m : 59 rhotic : 30
## p : 53 stop :488
## kk : 49
## (Other) :366
## place aspiration syllables syl_structure gloss
## coronal:467 no :401 di :409 cvccv :230 comb : 71
## labial :248 yes:405 mono:397 cvcc :209 fat : 30
## velar : 91 cvncv : 92 thaw : 30
## cvvcc : 57 feed : 29
## cvnc : 55 decoration: 28
## vcc : 36 pound : 27
## (Other):127 (Other) :591
## pos comp contx
## adj:111 c:586 cc:631
## adv: 27 x:220 cx:175
## n :358
## v :310
##
##
##
boxplot(word.dur ~ speaker, data=dat)
fit <- aov(word.dur ~ speaker, data=dat)
summary(fit)
## Df Sum Sq Mean Sq F value Pr(>F)
## speaker 4 2116328 529082 140.2 <2e-16 ***
## Residuals 801 3021700 3772
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t.test(dat$word.dur)
##
## One Sample t-test
##
## data: dat$word.dur
## t = 162.3, df = 805, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 451.2018 462.2493
## sample estimates:
## mean of x
## 456.7255
\(CI=[\bar x - s, \bar x + s]\) \(s = 1.96 (standard.deviation.of.x) / sqrt(n)\)
Find CI for word.dur.
left <- t.test(dat$word.dur)$conf[1]
right <- t.test(dat$word.dur)$conf[2]
t.test(dat$word.dur, conf.level = 0.95)
##
## One Sample t-test
##
## data: dat$word.dur
## t = 162.3, df = 805, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 451.2018 462.2493
## sample estimates:
## mean of x
## 456.7255
install.packages("tidyverse")
##
## The downloaded binary packages are in
## /var/folders/79/1y_t9vcx3ws9shyf4nd1vblc0000gn/T//RtmpAKBRgQ/downloaded_packages
library(tidyverse)
## -- Attaching packages -------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ----------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
dat %>% select(speaker, time)
select(dat, speaker, time)
dat %>% select(-speaker) -> dat_without_speaker
dat_without_speaker
dat %>% select(index:time)
dat %>% filter(word=='kokk',
speaker=='brs02',
time > mean(time) * 1.45)
dat %>% group_by(speaker) %>% summarise(word.dur.mean=mean(word.dur), word.dur.sd=sd(word.dur))
Sys.setlocale(locale="UTF-8")
## [1] "C/UTF-8/C/C/C/C"
dat %>% group_by(speaker, word) %>% summarise(wd_mean=mean(word.dur))
dat %>% count(speaker, name = 'number')
dat %>% count(aspiration)
dat %>% count(speaker, aspiration)
dat %>% count(speaker, aspiration) %>% pivot_wider(names_from='aspiration', values_from = 'n') %>% pivot_longer(yes:no, names_to="aspiration")
dat %>% arrange(desc(word.dur))
Find a speaker with largest average word.dur.