Calculating Cohen's d

  load("~/Documents/Classes/FAAV_Data/basic_context.Rdata")
  uw <- load.vowels(c("uw","Tuw"), path = path)
## Loading required package: plyr
  uw <- subset(uw, Context == "Internal" & 
                 !PreSegTrans %in% c("W","Y","L","R") & 
                  !FolSegTrans %in% c("L","R"))
  uw$VClass <- "uw"  
  uw$VClass[uw$PreSegTrans %in% c("CH", "D",
                                  "JH", "SH",
                                  "T", "TH",
                                  "Z", "ZH",
                                  "N")] <- "Tuw"

  head(uw)
##                     File   Name Age Sex Eth Edu   Street Year    F1   F2
## 1  PH00-1-1-JStevens.txt John S  21   m i/r  14 Buchanan 2000 420.9 1231
## 2  PH00-1-1-JStevens.txt John S  21   m i/r  14 Buchanan 2000 423.9 1272
## 16 PH00-1-1-JStevens.txt John S  21   m i/r  14 Buchanan 2000 453.3 1052
## 18 PH00-1-1-JStevens.txt John S  21   m i/r  14 Buchanan 2000 438.6 1421
## 19 PH00-1-1-JStevens.txt John S  21   m i/r  14 Buchanan 2000 408.0 1876
## 20 PH00-1-1-JStevens.txt John S  21   m i/r  14 Buchanan 2000 414.0 1869
##      F3 VClass    Manner       Place     Voice       PreSeg       FolSeq
## 1  2470     uw fricative labiodental    voiced nasal labial complex_coda
## 2  2255     uw fricative labiodental    voiced nasal labial complex_coda
## 16 2570     uw fricative labiodental    voiced nasal labial one_fol_syll
## 18 2097     uw fricative labiodental    voiced nasal labial complex_coda
## 19 2396    Tuw      stop      apical voiceless      palatal         <NA>
## 20 2414    Tuw      stop      apical voiceless      palatal one_fol_syll
##    Stress_Dur                  Info     Word    Time  Context PreSegTrans
## 1        1.59      MOVED /5/ 38.713    MOVED   38.71 Internal           M
## 2        1.90      MOVED /5/ 47.713    MOVED   47.71 Internal           M
## 16       1.10    MOVING /5/ 666.871   MOVING  666.87 Internal           M
## 18       1.60    MOVED /5/ 1141.623    MOVED 1141.62 Internal           M
## 19       1.50     SHOOT /6/ 1233.14    SHOOT 1233.14 Internal          SH
## 20       1.60 SHOOTING /5/ 1237.203 SHOOTING 1237.20 Internal          SH
##    FolSegTrans Fole2SegTrans           Trans FolTrans Stress Dur_msec
## 1            V             D       M UW1 V D       sp      1       59
## 2            V             D       M UW1 V D  F ER0 M      1       90
## 16           V           AH0   M UW1 V AH0 N       sp      1      100
## 18           V             D       M UW1 V D    IH1 N      1       60
## 19           T            DH        SH UW1 T   DH AH0      1       50
## 20           T           IH0 SH UW1 T IH0 NG    AH0 T      1       60
##      F1.n    F2.n  DOB DOB0 Decade                   NormFile
## 1  -1.591 -0.8842 1979   91    9.1 PH00-1-1-JStevens-norm.txt
## 2  -1.554 -0.7567 1979   91    9.1 PH00-1-1-JStevens-norm.txt
## 16 -1.186 -1.4471 1979   91    9.1 PH00-1-1-JStevens-norm.txt
## 18 -1.370 -0.2884 1979   91    9.1 PH00-1-1-JStevens-norm.txt
## 19 -1.753  1.1424 1979   91    9.1 PH00-1-1-JStevens-norm.txt
## 20 -1.677  1.1191 1979   91    9.1 PH00-1-1-JStevens-norm.txt

Using the dplyr package for this.

library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, desc, failwith, id, mutate, summarise
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

The %.% operator strings together dplyr functions. Here I'm creating the valyes necessary to calculate Cohen's d.

uw_means <- uw %.%
              group_by(File, DOB, Age, Sex, Eth, VClass) %.%
              summarise(mu = mean(F2.n), sd = sd(F2.n), n = length(F2.n)) %.%
              filter(n > 3, length(mu)==2)
uw_means
## Source: local data frame [335 x 9]
## Groups: File, DOB, Age, Sex, Eth
## 
##                      File  DOB Age Sex   Eth VClass       mu     sd  n
## 1       PH90-2-7-JDoe.txt 1951  39   m   r/i    Tuw -0.38507 0.3032  4
## 2       PH85-3-9-Paul.txt 1946  39   m    NA    Tuw  0.18702 0.5148 14
## 3       PH82-1-1-Rosy.txt 1926  56   f    NA    Tuw  0.71130 0.1954  4
## 4     PH81-3-6-JLewis.txt 1913  68   m i/w/g    Tuw  0.04321 0.2641  5
## 5     PH81-0-5-AJones.txt 1951  30   m     a    Tuw -0.78569 0.1558  5
## 6      PH80-2-7-MRose.txt 1953  27   f     r    Tuw  0.79013 0.6738  6
## 7  PH78-4-3-MrGarbies.txt 1904  74   m     w    Tuw  0.61306 0.3744  6
## 8     PH73-0-5-CCappo.txt 1932  41   f     r    Tuw  0.45999 0.4401  7
## 9     PH77-4-1-TOHara.txt 1957  20   m     r    Tuw -0.04156 0.1920  4
## 10       PH96-3-2-Kay.txt 1941  55   f     p    Tuw -0.09579 1.0551 13
## ..                    ...  ... ... ...   ...    ...      ...    ... ..
library(ggplot2)

ggplot(uw_means, aes(DOB, mu, color = VClass)) + geom_point() + stat_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-4

Here's the Cohen's d calculations.

uw_cohen <-  uw_means %.%
              filter(length(mu)==2) %.% # Only use data from speakers who have both vowels
              arrange(VClass) %.%       # Keep the subtraction order consistant
              summarise(delta = diff(mu),                            # difference in means
                        pooled_sd = sqrt(sum(((n-1)*sd))/(sum(n)-2)), # pooled sd
                        d = delta/pooled_sd) %.%                     # difference in means/pooled sd
              filter(is.finite(d)) # Filter out any NAs
uw_cohen
## Source: local data frame [114 x 8]
## Groups: File, DOB, Age, Sex
## 
##                           File  DOB Age Sex Eth    delta pooled_sd
## 1          PH06-2-3-Amanda.txt 1965  41   f  NA  0.48532    1.1086
## 2        PH00-1-1-JStevens.txt 1979  21   m i/r -0.53685    0.8446
## 3  PH76-3-2-AB-MrHellerman.txt 1932  44   m   j -0.04796    0.6809
## 4         PH00-1-2-NJulian.txt 1948  52   f o/i -0.05465    0.8227
## 5       PH00-1-3-AB-GSalvi.txt 1982  18   m  NA -1.93697    0.7067
## 6       PH04-3-2-AB-ECrane.txt 1979  25   m w/j -1.02102    0.6414
## 7      PH00-1-5-BagODonuts.txt 1930  70   m  NA -1.15538    0.5888
## 8     PH82-1-3-AB-JOMalley.txt 1913  69   m   r -0.06118    1.1773
## 9         PH82-3-5-RFlores.txt 1970  12   f  NA  0.35060    0.6667
## 10     PH02-2-8-ABC-GJones.txt 1956  46   m   a -1.25060    0.6664
## ..                         ...  ... ... ... ...      ...       ...
## Variables not shown: d (dbl)
ggplot(uw_cohen, aes(DOB, d)) + geom_point() + stat_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-6