1 Introduction

This is the analysis of the PRICE vowel for Oxbury & McCarthy’s UKLVC12 presentation.

1.1 Inspect the data

Load packages:

library(tidyverse)
library(brms)
library(sjstats)
library(bayesplot)

Load data:

data <- read_csv("data/tidied_data.csv")
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
  .default = col_double(),
  participant = col_character(),
  vowel = col_character(),
  word = col_character(),
  task = col_character(),
  fol.phonOLOG.seg = col_character(),
  fol.PHONETIC.seg = col_character(),
  age = col_character(),
  sex = col_character(),
  rol.var = col_character(),
  face.l = col_character(),
  price.l = col_character(),
  fol_seg = col_character()
)
See spec(...) for full column specifications.
str(data)
Classes ‘spec_tbl_df’, ‘tbl_df’, ‘tbl’ and 'data.frame':    11641 obs. of  63 variables:
 $ X1              : num  1 2 3 4 5 6 7 8 9 10 ...
 $ participant     : chr  "Ali" "Ali" "Ali" "Ali" ...
 $ vowel           : chr  "face" "face" "face" "face" ...
 $ word            : chr  "AGE" "AGES" "AGES" "AWAY" ...
 $ sound_start     : num  1771 1853 288 1998 116 ...
 $ sound_end       : num  1771 1853 288 1998 116 ...
 $ task            : chr  "soc.int" "soc.int" "soc.int" "soc.int" ...
 $ F1_20           : num  563 428 410 451 539 ...
 $ F1_35           : num  472 449 382 405 510 ...
 $ F1_50           : num  454 409 392 402 439 ...
 $ F1_65           : num  471 416 384 457 481 ...
 $ F1_80           : num  439 403 383 422 456 ...
 $ F2_20           : num  2147 2104 2140 1644 1882 ...
 $ F2_35           : num  2083 2178 2188 1678 1932 ...
 $ F2_50           : num  2043 2163 2216 1654 1997 ...
 $ F2_65           : num  2006 2242 2202 1616 1994 ...
 $ F2_80           : num  1990 2220 2255 1589 2023 ...
 $ fol.phonOLOG.seg: chr  "dÊ’" "dÊ’" "dÊ’" "w" ...
 $ fol.PHONETIC.seg: chr  "dÊ’" "dÊ’" "dÊ’" "w" ...
 $ lex.stress      : num  1 1 1 1 1 1 1 1 1 1 ...
 $ age             : chr  "adolescent" "adolescent" "adolescent" "adolescent" ...
 $ sex             : chr  "M" "M" "M" "M" ...
 $ meanF1          : num  480 421 390 427 485 ...
 $ meanF2          : num  2054 2181 2200 1636 1966 ...
 $ meanFleeceF1    : num  375 375 375 375 375 ...
 $ meanFleeceF2    : num  2128 2128 2128 2128 2128 ...
 $ meanTRAPF1      : num  706 706 706 706 706 ...
 $ u_F1            : num  375 375 375 375 375 ...
 $ u_F2            : num  375 375 375 375 375 ...
 $ trapF2          : num  1252 1252 1252 1252 1252 ...
 $ S_F1            : num  485 485 485 485 485 ...
 $ S_F2            : num  1252 1252 1252 1252 1252 ...
 $ normF1_20       : num  1.16 0.881 0.845 0.928 1.111 ...
 $ normF1_35       : num  0.971 0.925 0.787 0.834 1.05 ...
 $ normF1_50       : num  0.935 0.843 0.808 0.828 0.903 ...
 $ normF1_65       : num  0.971 0.856 0.79 0.942 0.991 ...
 $ normF1_80       : num  0.904 0.83 0.789 0.869 0.938 ...
 $ normF2_20       : num  1.72 1.68 1.71 1.31 1.5 ...
 $ normF2_35       : num  1.66 1.74 1.75 1.34 1.54 ...
 $ normF2_50       : num  1.63 1.73 1.77 1.32 1.6 ...
 $ normF2_65       : num  1.6 1.79 1.76 1.29 1.59 ...
 $ normF2_80       : num  1.59 1.77 1.8 1.27 1.62 ...
 $ duration        : num  0.1935 0.1485 0.1199 0.0772 0.1354 ...
 $ changeF1        : num  -124.5 -24.8 -27.3 -28.8 -83.9 ...
 $ normChangeF1    : num  -0.2564 -0.051 -0.0562 -0.0593 -0.1728 ...
 $ changeF2        : num  -156.3 116.4 115.6 -54.3 140.3 ...
 $ normChangeF2    : num  -0.1249 0.093 0.0924 -0.0434 0.1121 ...
 $ VL              : num  199.8 119 118.8 61.4 163.4 ...
 $ normVL          : num  0.2852 0.1061 0.1081 0.0735 0.206 ...
 $ normVSL1        : num  0.1953 0.0736 0.0691 0.098 0.0734 ...
 $ normVSL2        : num  0.0483 0.0828 0.0312 0.0206 0.155 ...
 $ normVSL3        : num  0.0461 0.0646 0.0213 0.1182 0.0877 ...
 $ normVSL4        : num  0.0681 0.0313 0.0428 0.0761 0.0574 ...
 $ VSL1            : num  111.5 76.9 55.4 57.3 58.2 ...
 $ VSL2            : num  43.7 42.4 30.6 24.8 95.8 ...
 $ VSL3            : num  40.7 79.5 16.9 67.2 42.6 ...
 $ VSL4            : num  36.1 25.4 53.5 44.2 38.3 ...
 $ TrajLength      : num  232 224 156 193 235 ...
 $ norm_TL         : num  0.358 0.252 0.164 0.313 0.373 ...
 $ rol.var         : chr  "n.a." "n.a." "n.a." "n.a." ...
 $ face.l          : chr  "fine" "fine" "fine" "fine" ...
 $ price.l         : chr  "n.a." "n.a." "n.a." "n.a." ...
 $ fol_seg         : chr  "fol_voiced_clus" "fol_voiced_clus" "fol_voiced_clus" "fol_approximant" ...
 - attr(*, "spec")=
  .. cols(
  ..   X1 = col_double(),
  ..   participant = col_character(),
  ..   vowel = col_character(),
  ..   word = col_character(),
  ..   sound_start = col_double(),
  ..   sound_end = col_double(),
  ..   task = col_character(),
  ..   F1_20 = col_double(),
  ..   F1_35 = col_double(),
  ..   F1_50 = col_double(),
  ..   F1_65 = col_double(),
  ..   F1_80 = col_double(),
  ..   F2_20 = col_double(),
  ..   F2_35 = col_double(),
  ..   F2_50 = col_double(),
  ..   F2_65 = col_double(),
  ..   F2_80 = col_double(),
  ..   fol.phonOLOG.seg = col_character(),
  ..   fol.PHONETIC.seg = col_character(),
  ..   lex.stress = col_double(),
  ..   age = col_character(),
  ..   sex = col_character(),
  ..   meanF1 = col_double(),
  ..   meanF2 = col_double(),
  ..   meanFleeceF1 = col_double(),
  ..   meanFleeceF2 = col_double(),
  ..   meanTRAPF1 = col_double(),
  ..   u_F1 = col_double(),
  ..   u_F2 = col_double(),
  ..   trapF2 = col_double(),
  ..   S_F1 = col_double(),
  ..   S_F2 = col_double(),
  ..   normF1_20 = col_double(),
  ..   normF1_35 = col_double(),
  ..   normF1_50 = col_double(),
  ..   normF1_65 = col_double(),
  ..   normF1_80 = col_double(),
  ..   normF2_20 = col_double(),
  ..   normF2_35 = col_double(),
  ..   normF2_50 = col_double(),
  ..   normF2_65 = col_double(),
  ..   normF2_80 = col_double(),
  ..   duration = col_double(),
  ..   changeF1 = col_double(),
  ..   normChangeF1 = col_double(),
  ..   changeF2 = col_double(),
  ..   normChangeF2 = col_double(),
  ..   VL = col_double(),
  ..   normVL = col_double(),
  ..   normVSL1 = col_double(),
  ..   normVSL2 = col_double(),
  ..   normVSL3 = col_double(),
  ..   normVSL4 = col_double(),
  ..   VSL1 = col_double(),
  ..   VSL2 = col_double(),
  ..   VSL3 = col_double(),
  ..   VSL4 = col_double(),
  ..   TrajLength = col_double(),
  ..   norm_TL = col_double(),
  ..   rol.var = col_character(),
  ..   face.l = col_character(),
  ..   price.l = col_character(),
  ..   fol_seg = col_character()
  .. )
data <- data %>% mutate(
  X1 = NULL,
  participant = factor(participant),
  vowel = factor(vowel),
  word = factor(word),
  task = factor(task),
  fol.phonOLOG.seg = factor(fol.phonOLOG.seg),
  fol.PHONETIC.seg = factor(fol.PHONETIC.seg),
  age = factor(age),
  sex = factor(sex),
  rol.var = factor(rol.var),
  face.l = factor(face.l),
  price.l = factor(price.l),
  fol_seg = factor(fol_seg)
)

Subset the key vowels:

price_trap_fleece <- data %>% filter(vowel == "price" | vowel == "fleece" | vowel=="trap")

1.2 Frequent words

Check for really frequent words:

price_trap_fleece %>% group_by(word) %>% 
  summarise(N = n()) %>% arrange(desc(N))

LIKE and I are extremely frequent, so we will remove these. These could be considered for a separate analysis.

price_no_like_I <- price_trap_fleece %>% filter(word != "LIKE",
                                                word != "I")

Check distribution of duration:

ggplot(price_no_like_I, aes(x = duration, fill = age)) + geom_histogram(bins = 100)

Filter so that duration is not more than 0.75.

price2 <- price_no_like_I %>% filter(duration < 0.75)

Try plotting again

ggplot(price2, aes(x = duration, fill = age)) + geom_histogram(bins = 100)

Looks a bit better.

1.3 Data transformations

Log-transform duration

price3 <- mutate(price2,
               LogDur = log10(duration))

… and standardize it

price3 <- mutate(price3,
               Log_dur_z = scale(LogDur))

Recode sex var so that M is the default

price3$sex <- relevel(price3$sex, ref = "M")

Recode following segment so that ‘pause’ is the reference level:

price3$fol_seg <- relevel(price3$fol_seg, ref = "fol_pause")

2 PRICE F2

Select just PRICE and TRAP:

just_price <- price3 %>% filter(vowel=="price")
price_trap <- price3 %>% filter(vowel=="price" | vowel=="trap")

Run a model. Not specifying priors for now

price_F2_m <- brm(normF2_20 ~ Log_dur_z + age*sex + fol_seg + (1|participant) + (1|word), data = just_price)
Compiling the C++ model
recompiling to avoid crashing R session
Start sampling

SAMPLING FOR MODEL '1e2e842938811e2fe81287f02925f257' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.0006 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 6 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 34.3742 seconds (Warm-up)
Chain 1:                18.0267 seconds (Sampling)
Chain 1:                52.4009 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL '1e2e842938811e2fe81287f02925f257' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 0.000517 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 5.17 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 34.2707 seconds (Warm-up)
Chain 2:                17.6604 seconds (Sampling)
Chain 2:                51.9311 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL '1e2e842938811e2fe81287f02925f257' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 0.000568 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 5.68 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 33.8957 seconds (Warm-up)
Chain 3:                17.6197 seconds (Sampling)
Chain 3:                51.5154 seconds (Total)
Chain 3: 

SAMPLING FOR MODEL '1e2e842938811e2fe81287f02925f257' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 0.000485 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 4.85 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 34.0228 seconds (Warm-up)
Chain 4:                17.7701 seconds (Sampling)
Chain 4:                51.7928 seconds (Total)
Chain 4: 

Check the model summary:

tidy_stan(price_F2_m, prob=0.89, type="fixed", digits=4)
longer object length is not a multiple of shorter object length

# Summary Statistics of Stan-Model

                           estimate std.error          HDI(89%)  ratio   rhat   mcse
 Intercept                   1.1102    0.0226 [ 1.0734  1.1450] 0.3155 1.0000 0.0006
 Log_dur_z                  -0.0206    0.0029 [-0.0255 -0.0162] 1.3128 0.9995 0.0000
 Log_dur_z                  -0.0206    0.0029 [-0.0255 -0.0162] 1.3128 0.9995 0.0000
 agechild                   -0.0851    0.0328 [-0.1429 -0.0346] 0.3141 1.0023 0.0010
 sexF                       -0.1603    0.0329 [-0.2143 -0.1085] 0.3341 1.0000 0.0009
 fol_segfol_approximant     -0.0178    0.0163 [-0.0434  0.0072] 0.6619 1.0013 0.0003
 fol_segfol_nasal           -0.0285    0.0166 [-0.0532 -0.0006] 0.5835 1.0010 0.0003
 fol_segfol_voiced_C        -0.0171    0.0145 [-0.0399  0.0078] 0.6040 1.0004 0.0003
 fol_segfol_voiced_clus     -0.0333    0.0230 [-0.0676  0.0049] 0.8671 1.0000 0.0004
 fol_segfol_voiceless_C     -0.0263    0.0153 [-0.0510 -0.0020] 0.6000 1.0010 0.0003
 fol_segfol_voiceless_clus   0.0061    0.0203 [-0.0250  0.0384] 0.7592 0.9996 0.0004
 fol_segother               -0.0033    0.0145 [-0.0266  0.0195] 0.5296 1.0017 0.0003
 agechild.sexF               0.1842    0.0506 [ 0.1026  0.2651] 0.3429 1.0000 0.0014

Equivalence test:

equivalence_test(price_F2_m, ci=0.89)
Possible multicollinearity between b_fol_segother and b_fol_segfol_approximant (r = 0.71), b_fol_segfol_voiced_C and b_fol_segfol_nasal (r = 0.75), b_fol_segfol_voiceless_C and b_fol_segfol_nasal (r = 0.73), b_fol_segother and b_fol_segfol_nasal (r = 0.77), b_fol_segfol_voiceless_C and b_fol_segfol_voiced_C (r = 0.77), b_fol_segother and b_fol_segfol_voiced_C (r = 0.82), b_fol_segother and b_fol_segfol_voiceless_C (r = 0.8). This might lead to inappropriate results. See 'Details' in '?equivalence_test'.
# Test for Practical Equivalence

  ROPE: [-0.02 0.02]

                 Parameter        H0 inside ROPE       89% HDI
                 Intercept  Rejected      0.00 % [ 1.07  1.15]
                 Log_dur_z  Rejected      0.00 % [-0.03 -0.02]
                  agechild  Rejected      0.00 % [-0.14 -0.03]
                      sexF  Rejected      0.00 % [-0.21 -0.11]
    fol_segfol_approximant Undecided     44.96 % [-0.04  0.01]
          fol_segfol_nasal Undecided     20.44 % [-0.05 -0.00]
       fol_segfol_voiced_C Undecided     45.91 % [-0.04  0.01]
    fol_segfol_voiced_clus Undecided     19.97 % [-0.07  0.00]
    fol_segfol_voiceless_C Undecided     20.39 % [-0.05 -0.00]
 fol_segfol_voiceless_clus Undecided     60.21 % [-0.02  0.04]
              fol_segother Undecided     80.03 % [-0.03  0.02]
             agechild.sexF  Rejected      0.00 % [ 0.10  0.27]

Visualise the posteriors:

price_F2_pos <- as.matrix(price_F2_m)
#dimnames(face_posterior1)
#color_scheme_set("brightblue")
mcmc_intervals(price_F2_pos,
           pars = c("b_Log_dur_z",
                    "b_agechild", 
                    "b_sexF",
                    "b_fol_segfol_nasal",
                    "b_fol_segfol_approximant",
                    "b_fol_segfol_voiced_C",
                    "b_fol_segfol_voiced_clus",
                    "b_fol_segfol_voiceless_C",
                    "b_fol_segfol_voiceless_clus",
                    "b_fol_segother",
                    "b_agechild:sexF"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)

mcmc_areas(price_F2_pos,
           pars = c("b_Log_dur_z",
                    "b_agechild", 
                    "b_sexF",
                    "b_fol_segfol_nasal",
                    "b_fol_segfol_approximant",
                    "b_fol_segfol_voiced_C",
                    "b_fol_segfol_voiced_clus",
                    "b_fol_segfol_voiceless_C",
                    "b_fol_segfol_voiceless_clus",
                    "b_fol_segother",
                    "b_agechild:sexF"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)

2.1 Summary of model

So it looks like:

  • none of the following segments have much of an effect on F2 at onset. The posterior medians are all pretty close to zero, but also in most cases, the 89% credible interval contains 0.

  • Duration has a small but pretty definitive effect on F2 at onset: a longer duration predicts a lower F2, i.e. more back nucleus to PRICE.

  • Being a child as opposed to adolescent, and female adolescent as opposed to male adolescent, predicts a lower F2 at onset. Female adolescents are predicted to have an F2 at 20% than is around 0.15 (remember, working with normalized formants) lower than the boys.

  • There is a positive age-sex interaction: female children are expected to have a higher F2 than male children. This is the opposite of the sex effect among adolescents.

2.2 F2: plot

Establish palette

vowel_cols <- c("#0072B2", "#99CCFF")
pd <- position_dodge(0.9)
x <- ggplot(just_price, aes(x = sex, y = normF2_20, fill=sex)) + facet_grid(cols = vars(age)) +
  geom_boxplot(width = 0.3, position = position_dodge(0.9)) + 
  theme_minimal() +
  scale_fill_manual(values = vowel_cols) +
   theme(legend.title = element_blank(),
        strip.text = element_text(size = rel(2)),
        legend.text = element_blank(),
        axis.title = element_text(size = rel(2)), 
        axis.text = element_text(size = rel(2)), 
        plot.title = element_text(size = rel(2.5))) +
  labs(x = "Age", y = "Normalized F2", title = "PRICE: F2 at 20%")
x

3 F2: PRICE and TRAP

Here, we compare the F2 of PRICE at its onset to the F2 of TRAP. The logic is that in Cockney or RP, TRAP is central/front, while PRICE has a back onset; but then in MLE, the onsets of PRICE and TRAP are pretty similar and pretty central.

Check that the reference vowel is price:

price_trap$vowel <- relevel(price_trap$vowel, ref = "price")

Run the model:

price_trap_m <- brm(normF2_20 ~ Log_dur_z + vowel*age*sex + fol_seg + (1+vowel|participant) + (1|word), data = price_trap)
Compiling the C++ model
Start sampling

SAMPLING FOR MODEL '97662c0437317ce5ce93b02827263cfe' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.001139 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 11.39 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 58.0205 seconds (Warm-up)
Chain 1:                32.814 seconds (Sampling)
Chain 1:                90.8345 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL '97662c0437317ce5ce93b02827263cfe' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 0.000874 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 8.74 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 71.9545 seconds (Warm-up)
Chain 2:                30.8979 seconds (Sampling)
Chain 2:                102.852 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL '97662c0437317ce5ce93b02827263cfe' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 0.000678 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 6.78 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 68.7824 seconds (Warm-up)
Chain 3:                31.1332 seconds (Sampling)
Chain 3:                99.9157 seconds (Total)
Chain 3: 

SAMPLING FOR MODEL '97662c0437317ce5ce93b02827263cfe' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 0.000788 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 7.88 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 62.676 seconds (Warm-up)
Chain 4:                29.3766 seconds (Sampling)
Chain 4:                92.0526 seconds (Total)
Chain 4: 

Check the model summary:

tidy_stan(price_trap_m, prob=0.89, type="fixed", digits=4)
longer object length is not a multiple of shorter object length

# Summary Statistics of Stan-Model

                           estimate std.error          HDI(89%)  ratio   rhat   mcse
 Intercept                   1.1107    0.0233 [ 1.0728  1.1450] 0.1923 1.0041 0.0008
 Log_dur_z                  -0.0190    0.0027 [-0.0231 -0.0145] 1.2914 0.9993 0.0000
 Log_dur_z                  -0.0190    0.0027 [-0.0231 -0.0145] 1.2914 0.9993 0.0000
 voweltrap                  -0.0182    0.0193 [-0.0521  0.0117] 0.5536 0.9997 0.0004
 agechild                   -0.0894    0.0339 [-0.1466 -0.0373] 0.1937 1.0020 0.0012
 sexF                       -0.1631    0.0334 [-0.2170 -0.1054] 0.1897 1.0062 0.0012
 fol_segfol_approximant     -0.0167    0.0154 [-0.0393  0.0098] 0.4119 1.0002 0.0004
 fol_segfol_nasal           -0.0263    0.0157 [-0.0507 -0.0003] 0.3352 1.0003 0.0004
 fol_segfol_voiced_C        -0.0164    0.0143 [-0.0386  0.0074] 0.3712 1.0002 0.0004
 fol_segfol_voiced_clus     -0.0312    0.0222 [-0.0679  0.0042] 0.5691 1.0002 0.0005
 fol_segfol_voiceless_C     -0.0242    0.0146 [-0.0478  0.0000] 0.3849 0.9996 0.0004
 fol_segfol_voiceless_clus   0.0086    0.0198 [-0.0238  0.0392] 0.5419 1.0003 0.0004
 fol_segother               -0.0016    0.0144 [-0.0232  0.0217] 0.3358 1.0005 0.0004
 voweltrap.agechild          0.0356    0.0267 [-0.0095  0.0783] 0.5515 1.0006 0.0006
 voweltrap.sexF              0.0788    0.0289 [ 0.0289  0.1225] 0.6586 1.0001 0.0006
 agechild.sexF               0.1888    0.0538 [ 0.1061  0.2737] 0.2289 1.0035 0.0018
 voweltrap.agechild.sexF    -0.0741    0.0419 [-0.1470 -0.0096] 0.5786 1.0004 0.0009

Visualise the posteriors:

price_trap_pos <- as.matrix(price_trap_m)
#dimnames(price_trap_pos)
mcmc_intervals(price_trap_pos,
           pars = c("b_Log_dur_z",
                    "b_voweltrap",
                    "b_agechild", 
                    "b_sexF",
                    "b_agechild:sexF",
                    "b_voweltrap:agechild",
                    "b_voweltrap:sexF",
                    "b_agechild:sexF",
                    "b_voweltrap:agechild:sexF"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)

mcmc_areas(price_trap_pos,
           pars = c("b_Log_dur_z",
                    "b_voweltrap",
                    "b_agechild", 
                    "b_sexF",
                    "b_agechild:sexF",
                    "b_voweltrap:agechild",
                    "b_voweltrap:sexF",
                    "b_agechild:sexF",
                    "b_voweltrap:agechild:sexF"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)

3.1 Summary of model

From the model comparing PRICE and TRAP, it seems:

  • The vowel TRAP has a median F2 actually slightly lower than PRICE – whereas in Cockney or RP, we would expect TRAP to have a higher F2. This is a very small effect and the credible interval contains 0, so basically in frequentist terms, there isn’t a significant difference between PRICE and TRAP. In fact, if there is an effect (we think there isn’t), it’s in the opposite direction to what we would have predicted!

  • Looking at the voweltrap:agechild interaction: -0.02 + 0.04 = 0.02; male children seem to have a TRAP vowel that is more front than PRICE, based on the posterior median. This effect isn’t very robust though, as the 89% credible interval contains 0.

  • Looking at the voweltrap:sexF interaction: -0.02 + 0.08 = 0.06; female adolescents seem to have a TRAP vowel that is more front than PRICE. You can see from the graphs that the 89% credible interval does not contain 0; however, when you look at the posterior distribution in the mcmc_areas graph, the distribution does cross the 0 line. But when we plot PRICE and TRAP in a boxplot in a moment, you’ll see that female adolescents do indeed seem to have a separation between PRICE and TRAP.

  • Now the voweltrap.agechild.sexF interaction: -0.02 + 0.04 - 0.07 = -0.05; female children seem to have a TRAP vowel that is more back than PRICE – by a very tiny amount. This effect isn’t very robust though: the posterior is fairly wide and contains 0.

3.2 Random slopes

We can also have a look at the random slopes for this model.

#dimnames(price_trap_pos)
mcmc_intervals(price_trap_pos,
           pars = c("r_participant[Ali,voweltrap]",
                    "r_participant[Amanda,voweltrap]",
                    "r_participant[CB,voweltrap]",
                    "r_participant[Chantelle,voweltrap]",
                    "r_participant[ChrisB,voweltrap]",
                    "r_participant[Daniel,voweltrap]",
                    "r_participant[Denzel,voweltrap]",
                    "r_participant[F1,voweltrap]",
                    "r_participant[F10,voweltrap]",
                    "r_participant[F3,voweltrap]",
                    "r_participant[F4,voweltrap]",
                    "r_participant[F7,voweltrap]",
                    "r_participant[F8,voweltrap]",
                    "r_participant[F9,voweltrap]",
                    "r_participant[GW,voweltrap]",
                    "r_participant[Ibrahim,voweltrap]",
                    "r_participant[Jessica,voweltrap]",
                    "r_participant[Joe,voweltrap]",
                    "r_participant[Kai,voweltrap]",
                    "r_participant[Khadir,voweltrap]",
                    "r_participant[Lola,voweltrap]",
                    "r_participant[Lucy,voweltrap]",
                    "r_participant[M1,voweltrap]",
                    "r_participant[M3,voweltrap]",
                    "r_participant[M4,voweltrap]",
                    "r_participant[M5,voweltrap]",
                    "r_participant[M6,voweltrap]",
                    "r_participant[M7,voweltrap]",
                    "r_participant[M8,voweltrap]",
                    "r_participant[Matisse,voweltrap]",
                    "r_participant[Moses,voweltrap]",
                    "r_participant[Omar,voweltrap]",
                    "r_participant[Sami,voweltrap]",
                    "r_participant[SD,voweltrap]",
                    "r_participant[Shantel,voweltrap]",
                    "r_participant[Tariq,voweltrap]",
                    "r_participant[Tony,voweltrap]",
                    "r_participant[ZR,voweltrap]"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)

mcmc_areas(price_trap_pos,
           pars = c("r_participant[Ali,voweltrap]",
                    "r_participant[Amanda,voweltrap]",
                    "r_participant[CB,voweltrap]",
                    "r_participant[Chantelle,voweltrap]",
                    "r_participant[ChrisB,voweltrap]",
                    "r_participant[Daniel,voweltrap]",
                    "r_participant[Denzel,voweltrap]",
                    "r_participant[F1,voweltrap]",
                    "r_participant[F10,voweltrap]",
                    "r_participant[F3,voweltrap]",
                    "r_participant[F4,voweltrap]",
                    "r_participant[F7,voweltrap]",
                    "r_participant[F8,voweltrap]",
                    "r_participant[F9,voweltrap]",
                    "r_participant[GW,voweltrap]",
                    "r_participant[Ibrahim,voweltrap]",
                    "r_participant[Jessica,voweltrap]",
                    "r_participant[Joe,voweltrap]",
                    "r_participant[Kai,voweltrap]",
                    "r_participant[Khadir,voweltrap]",
                    "r_participant[Lola,voweltrap]",
                    "r_participant[Lucy,voweltrap]",
                    "r_participant[M1,voweltrap]",
                    "r_participant[M3,voweltrap]",
                    "r_participant[M4,voweltrap]",
                    "r_participant[M5,voweltrap]",
                    "r_participant[M6,voweltrap]",
                    "r_participant[M7,voweltrap]",
                    "r_participant[M8,voweltrap]",
                    "r_participant[Matisse,voweltrap]",
                    "r_participant[Moses,voweltrap]",
                    "r_participant[Omar,voweltrap]",
                    "r_participant[Sami,voweltrap]",
                    "r_participant[SD,voweltrap]",
                    "r_participant[Shantel,voweltrap]",
                    "r_participant[Tariq,voweltrap]",
                    "r_participant[Tony,voweltrap]",
                    "r_participant[ZR,voweltrap]"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)

3.3 PRICE and TRAP boxplot

Establish palette:

vowel_cols <- c("#0072B2", "#D55E00")
pd <- position_dodge(0.9)
x2 <- ggplot(price_trap,
             aes(x = sex,
                 y = normF2_20,
                 fill = vowel)) + 
  facet_grid(cols = vars(age)) +
  geom_boxplot(width = 0.3, position = pd) + 
  theme_minimal() + 
  scale_fill_manual(values = vowel_cols) + 
  theme(legend.title = element_text(size = rel(1.5)),
        strip.text = element_text(size = rel(1.5)),
        legend.text = element_text(size = rel(1.5)),
        axis.title = element_text(size = rel(1.5)), 
        axis.text = element_text(size = rel(1.5)), 
        plot.title = element_text(size = rel(2))) +
  labs(x = "Age", y = "Normalized F2", title = "PRICE: F2 at 20%")
x2

This boxplot shows that adolescent females tend to have a more front TRAP than PRICE.

4 PRICE Trajectory Length

We’re going to look at Trajectory Length now, so we’ll subset out PRICE and FLEECE – the logic being that we want to compare the Trajectory Length of PRICE, which is supposed to be a diphthong, with FLEECE, which is supposed to be a monophthong.

price_fleece <- price3 %>% filter(vowel=="price" | vowel =="fleece")

Check distribution of duration

ggplot(price_fleece, aes(x = norm_TL, fill=vowel)) + geom_histogram(bins = 100)

Get rid of outliers

price_fleece2 <- price_fleece %>% filter(norm_TL <2.1)

Set vowel reference level to price:

price_fleece2$vowel <- relevel(price_fleece2$vowel, ref = "price")

Build the model:

price_traj_m <- brm(norm_TL ~ Log_dur_z + vowel*age*sex + fol_seg + (1+vowel|participant) + (1|word), data = price_fleece2)
Compiling the C++ model
recompiling to avoid crashing R session
Start sampling

SAMPLING FOR MODEL 'd93beac45d3bb02918b6ac08195de742' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.001181 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 11.81 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 47.9536 seconds (Warm-up)
Chain 1:                14.7824 seconds (Sampling)
Chain 1:                62.736 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL 'd93beac45d3bb02918b6ac08195de742' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 0.000986 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 9.86 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 44.8161 seconds (Warm-up)
Chain 2:                17.332 seconds (Sampling)
Chain 2:                62.1481 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL 'd93beac45d3bb02918b6ac08195de742' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 0.000956 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 9.56 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 51.8732 seconds (Warm-up)
Chain 3:                19.2022 seconds (Sampling)
Chain 3:                71.0754 seconds (Total)
Chain 3: 

SAMPLING FOR MODEL 'd93beac45d3bb02918b6ac08195de742' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 0.000905 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 9.05 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 54.3576 seconds (Warm-up)
Chain 4:                16.6103 seconds (Sampling)
Chain 4:                70.9679 seconds (Total)
Chain 4: 

Model summary:

tidy_stan(price_traj_m, prob=0.89, type="fixed", digits=4)
longer object length is not a multiple of shorter object length

# Summary Statistics of Stan-Model

                           estimate std.error          HDI(89%)  ratio   rhat   mcse
 Intercept                   0.4097    0.0325 [ 0.3566  0.4599] 0.2131 1.0019 0.0011
 Log_dur_z                   0.1262    0.0052 [ 0.1176  0.1347] 1.2704 0.9998 0.0001
 Log_dur_z                   0.1262    0.0052 [ 0.1176  0.1347] 1.2704 0.9998 0.0001
 vowelfleece                -0.0762    0.0343 [-0.1285 -0.0179] 0.4352 1.0022 0.0008
 agechild                    0.2718    0.0385 [ 0.2072  0.3318] 0.2251 1.0022 0.0013
 sexF                        0.0837    0.0357 [ 0.0253  0.1409] 0.2112 1.0051 0.0013
 fol_segfol_approximant      0.0303    0.0306 [-0.0201  0.0796] 0.4187 1.0003 0.0008
 fol_segfol_nasal            0.0464    0.0309 [-0.0022  0.0936] 0.3207 1.0016 0.0008
 fol_segfol_voiced_C         0.0705    0.0276 [ 0.0256  0.1150] 0.3063 1.0018 0.0008
 fol_segfol_voiced_clus      0.0579    0.0431 [-0.0043  0.1341] 0.4907 1.0011 0.0010
 fol_segfol_voiceless_C      0.0407    0.0288 [-0.0070  0.0846] 0.3213 1.0014 0.0008
 fol_segfol_voiceless_clus   0.0228    0.0374 [-0.0365  0.0847] 0.3956 1.0029 0.0010
 fol_segother                0.0124    0.0277 [-0.0308  0.0559] 0.3217 1.0016 0.0008
 vowelfleece.agechild       -0.2743    0.0500 [-0.3558 -0.1927] 0.3942 1.0012 0.0013
 vowelfleece.sexF           -0.0258    0.0513 [-0.1088  0.0537] 0.4687 1.0018 0.0012
 agechild.sexF              -0.1142    0.0548 [-0.2064 -0.0224] 0.2209 1.0019 0.0020
 vowelfleece.agechild.sexF   0.0316    0.0740 [-0.0869  0.1530] 0.4348 1.0005 0.0018

Visualise these effects

price_traj_pos <- as.matrix(price_traj_m)
#dimnames(price_traj_pos)
mcmc_areas(price_traj_pos,
           pars = c("b_Log_dur_z",
                    "b_vowelfleece",
                    "b_agechild", 
                    "b_sexF",
                    "b_fol_segfol_approximant",
                    "b_fol_segfol_nasal",
                    "b_fol_segfol_voiced_C",
                    "b_fol_segfol_voiced_clus",
                    "b_fol_segfol_voiceless_C",
                    "b_fol_segfol_voiceless_clus",
                    "b_fol_segother",
                    "b_vowelfleece:agechild",
                    "b_vowelfleece:sexF",
                    "b_agechild:sexF",
                    "b_vowelfleece:agechild:sexF"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)

mcmc_intervals(price_traj_pos,
           pars = c("b_Log_dur_z",
                    "b_vowelfleece",
                    "b_agechild", 
                    "b_sexF",
                    "b_fol_segfol_approximant",
                    "b_fol_segfol_nasal",
                    "b_fol_segfol_voiced_C",
                    "b_fol_segfol_voiced_clus",
                    "b_fol_segfol_voiceless_C",
                    "b_fol_segfol_voiceless_clus",
                    "b_fol_segother",
                    "b_vowelfleece:agechild",
                    "b_vowelfleece:sexF",
                    "b_agechild:sexF",
                    "b_vowelfleece:agechild:sexF"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)

4.1 Summary of model

What this tells us is that:

  • An increase in duration predicts an increase in Trajectory Length (unsurprisingly)

  • FLEECE tends to have a shorter TL than PRICE - by about 0.08. The 89% credible interval does not contain 0 (though the posterior distribution, i.e. the 100% credible interval, does): this is important because it means that even adolescent males, who tend to lead in monophthongization, robustly have a PRICE vowel that is more diphthongal than their FLEECE vowel. By contrast, we found that for FACE and GOAT, the adolescent boys don’t seem to have a FACE or GOAT vowel that is any more diphthongal than their FLEECE.

  • Being a child as opposed to adolescent predicts a greater TL, by about 0.27 – this is quite a big effect. This effect is pretty robust: the posterior does not contain 0. So, children are much more diphthongal than adolescents (take a look at the boxplot below!)

  • Being a female adolescent as opposed to male adolescent predicts a greater TL by about 0.08. The 89% credible interval does not contain 0, though the posterior distribution does.

  • Age-sex interaction: 0.27 - 0.1142 = 0.1558, i.e. female children have a smaller TL than male children by about 0.11 / female children have a greater TL than male adolescents by about 0.16. However you want to put it! The 89% credible interval on this interaction term does not contain 0, but notice that the posterior distribution (the 100% credible interval!) does contain 0.

  • Most following segments predict a greater TL compared to following pause – so following pause predicts a more monophthongal realization? These effects are pretty small – less than 0.01 – and for all except following voiced consonant, the 89% credible interval contains 0.

  • There is a negative vowelfleece:agechild interaction: -0.08 - 0.27 = -0.35: for children, the TL of FLEECE tends to be about 0.35 less than the TL of PRICE.

  • vowelfleece.sexF interaction: -0.08 -0.03 = -0.11: For adolescent girls, FLEECE has a shorter TL than PRICE by about 0.11. This effect isn’t robust: 89% credible interval contains 0, so it’s possible that the difference between FLEECE’s TL and PRICE’s TL for adolescent girls isn’t any different from what it is for adolescent boys. It’s possible that the interaction is actually -0.08 + 0.0537 = -0.03, i.e. for adolescent girls, the difference between FLEECE and PRICE is actually less than what it is for adolescent boys.

  • vowelfleece.agechild.sexF: this posterior is also very wide, with the 89% credible interval containing 0, so we wouldn’t want to conclude that the difference between FLEECE and PRICE for female children is any different from what it is for male children.

4.2 Boxplot of PRICE Traj

price_fleece_cols <- c("#0072B2", "#E69F00")
z <- ggplot(price_fleece2, aes(x = sex, y = norm_TL, fill = vowel)) + facet_grid(cols = vars(age)) +
  geom_boxplot(width = 0.3, position = position_dodge(0.9)) + 
  theme_minimal() + 
  scale_fill_manual(values = price_fleece_cols) + 
   theme(legend.title = element_text(size = rel(2)),
        strip.text = element_text(size = rel(2)),
        legend.text = element_text(size = rel(2)),
        axis.title = element_text(size = rel(2)), 
        axis.text = element_text(size = rel(2)), 
        plot.title = element_text(size = rel(2.5))) +
  labs(x = "Age", y = "Trajectory Length", title = "PRICE: Trajectory Length") +
  ylim(0, 2)
z

price_fleece_cols <- c("#0072B2", "#E69F00")
transparency <- c(1, 0.2)
faded <- c("#000000", "#CCCCCC")
z2 <- ggplot(price_fleece2, aes(x = sex, y = norm_TL, fill = vowel, alpha=vowel)) + facet_grid(cols = vars(age)) +
  geom_boxplot(aes(colour=vowel), width = 0.3, position = position_dodge(0.9)) + 
  theme_minimal() + 
   scale_colour_manual(values = faded) +
  scale_fill_manual(values = price_fleece_cols) + 
  scale_alpha_manual(values = transparency) + 
   theme(legend.title = element_text(size = rel(2)),
        strip.text = element_text(size = rel(2)),
        legend.text = element_text(size = rel(2)),
        axis.title = element_text(size = rel(2)), 
        axis.text = element_text(size = rel(2)), 
        plot.title = element_text(size = rel(2.5))) +
  labs(x = "Age", y = "Trajectory Length", title = "PRICE: Trajectory Length") +
  ylim(0, 2)
z2

---
title: "UKLVC analysis: PRICE"
author: "Rosie Oxbury"
date: "September 2019"
output:
  html_notebook:
    toc: true
    toc_float: true
    number_sections: true
    toc_depth: 3
---

# Introduction

This is the analysis of the PRICE vowel for Oxbury & McCarthy's UKLVC12 presentation.

## Inspect the data
Load packages:
```{r}
library(tidyverse)
library(brms)
library(sjstats)
library(bayesplot)
```

Load data:
```{r}
data <- read_csv("data/tidied_data.csv")

str(data)

data <- data %>% mutate(
  X1 = NULL,
  participant = factor(participant),
  vowel = factor(vowel),
  word = factor(word),
  task = factor(task),
  fol.phonOLOG.seg = factor(fol.phonOLOG.seg),
  fol.PHONETIC.seg = factor(fol.PHONETIC.seg),
  age = factor(age),
  sex = factor(sex),
  rol.var = factor(rol.var),
  face.l = factor(face.l),
  price.l = factor(price.l),
  fol_seg = factor(fol_seg)
)
```

Subset the key vowels:
```{r}
price_trap_fleece <- data %>% filter(vowel == "price" | vowel == "fleece" | vowel=="trap")
```

## Frequent words
Check for really frequent words:
```{r}
price_trap_fleece %>% group_by(word) %>% 
  summarise(N = n()) %>% arrange(desc(N))
```
LIKE and I are extremely frequent, so we will remove these. These could be considered for a separate analysis.

```{r}
price_no_like_I <- price_trap_fleece %>% filter(word != "LIKE",
                                                word != "I")
```

Check distribution of duration:
```{r}
ggplot(price_no_like_I, aes(x = duration, fill = age)) + geom_histogram(bins = 100)
```
Filter so that duration is not more than 0.75.
```{r}
price2 <- price_no_like_I %>% filter(duration < 0.75)
```
Try plotting again
```{r}
ggplot(price2, aes(x = duration, fill = age)) + geom_histogram(bins = 100)
```

Looks a bit better.

## Data transformations
Log-transform duration
```{r}
price3 <- mutate(price2,
               LogDur = log10(duration))
```

... and standardize it
```{r}
price3 <- mutate(price3,
               Log_dur_z = scale(LogDur))
```

Recode sex var so that M is the default
```{r}
price3$sex <- relevel(price3$sex, ref = "M")
```

Recode following segment so that 'pause' is the reference level:
```{r}
price3$fol_seg <- relevel(price3$fol_seg, ref = "fol_pause")
```


# PRICE F2
Select just PRICE and TRAP:
```{r}
just_price <- price3 %>% filter(vowel=="price")
price_trap <- price3 %>% filter(vowel=="price" | vowel=="trap")
```


Run a model. Not specifying priors for now
```{r}
price_F2_m <- brm(normF2_20 ~ Log_dur_z + age*sex + fol_seg + (1|participant) + (1|word), data = just_price)
```

Check the model summary:
```{r}
tidy_stan(price_F2_m, prob=0.89, type="fixed", digits=4)
```

Equivalence test:
```{r}
equivalence_test(price_F2_m, ci=0.89)
```

Visualise the posteriors:
```{r fig.width=6, fig.height=8}
price_F2_pos <- as.matrix(price_F2_m)

#dimnames(face_posterior1)

#color_scheme_set("brightblue")
mcmc_intervals(price_F2_pos,
           pars = c("b_Log_dur_z",
                    "b_agechild", 
                    "b_sexF",
                    "b_fol_segfol_nasal",
                    "b_fol_segfol_approximant",
                    "b_fol_segfol_voiced_C",
                    "b_fol_segfol_voiced_clus",
                    "b_fol_segfol_voiceless_C",
                    "b_fol_segfol_voiceless_clus",
                    "b_fol_segother",
                    "b_agechild:sexF"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)

mcmc_areas(price_F2_pos,
           pars = c("b_Log_dur_z",
                    "b_agechild", 
                    "b_sexF",
                    "b_fol_segfol_nasal",
                    "b_fol_segfol_approximant",
                    "b_fol_segfol_voiced_C",
                    "b_fol_segfol_voiced_clus",
                    "b_fol_segfol_voiceless_C",
                    "b_fol_segfol_voiceless_clus",
                    "b_fol_segother",
                    "b_agechild:sexF"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)
```

## Summary of model

So it looks like:

- none of the following segments have much of an effect on F2 at onset. The posterior medians are all pretty close to zero, but also in most cases, the 89% credible interval contains 0.

- Duration has a small but pretty definitive effect on F2 at onset: a longer duration predicts a lower F2, i.e. more back nucleus to PRICE.

- Being a child as opposed to adolescent, and female adolescent as opposed to male adolescent, predicts a lower F2 at onset. Female adolescents are predicted to have an F2 at 20% than is around 0.15 (remember, working with normalized formants) lower than the boys.

- There is a positive age-sex interaction: female children are expected to have a *higher* F2 than male children. This is the opposite of the sex effect among adolescents.

## F2: plot
Establish palette
```{r}
vowel_cols <- c("#0072B2", "#99CCFF")
pd <- position_dodge(0.9)
```


```{r fig.width=8, fig.height=6}
x <- ggplot(just_price, aes(x = sex, y = normF2_20, fill=sex)) + facet_grid(cols = vars(age)) +
  geom_boxplot(width = 0.3, position = position_dodge(0.9)) + 
  theme_minimal() +
  scale_fill_manual(values = vowel_cols) +
   theme(legend.title = element_blank(),
        strip.text = element_text(size = rel(2)),
        legend.text = element_blank(),
        axis.title = element_text(size = rel(2)), 
        axis.text = element_text(size = rel(2)), 
        plot.title = element_text(size = rel(2.5))) +
  labs(x = "Age", y = "Normalized F2", title = "PRICE: F2 at 20%")
x
```

# F2: PRICE and TRAP

Here, we compare the F2 of PRICE at its onset to the F2 of TRAP. The logic is that in Cockney or RP, TRAP is central/front, while PRICE has a back onset; but then in MLE, the onsets of PRICE and TRAP are pretty similar and pretty central. 

Check that the reference vowel is price:
```{r}
price_trap$vowel <- relevel(price_trap$vowel, ref = "price")
```

Run the model:
```{r}
price_trap_m <- brm(normF2_20 ~ Log_dur_z + vowel*age*sex + fol_seg + (1+vowel|participant) + (1|word), data = price_trap)
```

Check the model summary:
```{r}
tidy_stan(price_trap_m, prob=0.89, type="fixed", digits=4)
```

Visualise the posteriors:
```{r fig.width=6, fig.height=8}
price_trap_pos <- as.matrix(price_trap_m)

#dimnames(price_trap_pos)
mcmc_intervals(price_trap_pos,
           pars = c("b_Log_dur_z",
                    "b_voweltrap",
                    "b_agechild", 
                    "b_sexF",
                    "b_agechild:sexF",
                    "b_voweltrap:agechild",
                    "b_voweltrap:sexF",
                    "b_agechild:sexF",
                    "b_voweltrap:agechild:sexF"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)

mcmc_areas(price_trap_pos,
           pars = c("b_Log_dur_z",
                    "b_voweltrap",
                    "b_agechild", 
                    "b_sexF",
                    "b_agechild:sexF",
                    "b_voweltrap:agechild",
                    "b_voweltrap:sexF",
                    "b_agechild:sexF",
                    "b_voweltrap:agechild:sexF"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)
```

## Summary of model

From the model comparing PRICE and TRAP, it seems:

- The vowel TRAP has a median F2 actually slightly *lower* than PRICE -- whereas in Cockney or RP, we would expect TRAP to have a *higher* F2. This is a very small effect and the credible interval contains 0, so basically in frequentist terms, there isn't a significant difference between PRICE and TRAP. In fact, if there is an effect (we think there isn't), it's in the opposite direction to what we would have predicted!

- Looking at the voweltrap:agechild interaction: -0.02 + 0.04 = 0.02; male children seem to have a TRAP vowel that is more front than PRICE, based on the posterior median. This effect isn't very robust though, as the 89% credible interval contains 0.

- Looking at the voweltrap:sexF interaction: -0.02 + 0.08 = 0.06; female adolescents seem to have a TRAP vowel that is more front than PRICE. You can see from the graphs that the 89% credible interval does not contain 0; however, when you look at the posterior distribution in the mcmc_areas graph, the distribution does cross the 0 line. But when we plot PRICE and TRAP in a boxplot in a moment, you'll see that female adolescents do indeed seem to have a separation between PRICE and TRAP.

- Now the voweltrap.agechild.sexF interaction: -0.02 + 0.04 - 0.07 = -0.05; female children seem to have a TRAP vowel that is more back than PRICE -- by a very tiny amount. This effect isn't very robust though: the posterior is fairly wide and contains 0.

## Random slopes

We can also have a look at the random slopes for this model.

```{r fig.width=6, fig.height=8}
#dimnames(price_trap_pos)

mcmc_intervals(price_trap_pos,
           pars = c("r_participant[Ali,voweltrap]",
                    "r_participant[Amanda,voweltrap]",
                    "r_participant[CB,voweltrap]",
                    "r_participant[Chantelle,voweltrap]",
                    "r_participant[ChrisB,voweltrap]",
                    "r_participant[Daniel,voweltrap]",
                    "r_participant[Denzel,voweltrap]",
                    "r_participant[F1,voweltrap]",
                    "r_participant[F10,voweltrap]",
                    "r_participant[F3,voweltrap]",
                    "r_participant[F4,voweltrap]",
                    "r_participant[F7,voweltrap]",
                    "r_participant[F8,voweltrap]",
                    "r_participant[F9,voweltrap]",
                    "r_participant[GW,voweltrap]",
                    "r_participant[Ibrahim,voweltrap]",
                    "r_participant[Jessica,voweltrap]",
                    "r_participant[Joe,voweltrap]",
                    "r_participant[Kai,voweltrap]",
                    "r_participant[Khadir,voweltrap]",
                    "r_participant[Lola,voweltrap]",
                    "r_participant[Lucy,voweltrap]",
                    "r_participant[M1,voweltrap]",
                    "r_participant[M3,voweltrap]",
                    "r_participant[M4,voweltrap]",
                    "r_participant[M5,voweltrap]",
                    "r_participant[M6,voweltrap]",
                    "r_participant[M7,voweltrap]",
                    "r_participant[M8,voweltrap]",
                    "r_participant[Matisse,voweltrap]",
                    "r_participant[Moses,voweltrap]",
                    "r_participant[Omar,voweltrap]",
                    "r_participant[Sami,voweltrap]",
                    "r_participant[SD,voweltrap]",
                    "r_participant[Shantel,voweltrap]",
                    "r_participant[Tariq,voweltrap]",
                    "r_participant[Tony,voweltrap]",
                    "r_participant[ZR,voweltrap]"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)

mcmc_areas(price_trap_pos,
           pars = c("r_participant[Ali,voweltrap]",
                    "r_participant[Amanda,voweltrap]",
                    "r_participant[CB,voweltrap]",
                    "r_participant[Chantelle,voweltrap]",
                    "r_participant[ChrisB,voweltrap]",
                    "r_participant[Daniel,voweltrap]",
                    "r_participant[Denzel,voweltrap]",
                    "r_participant[F1,voweltrap]",
                    "r_participant[F10,voweltrap]",
                    "r_participant[F3,voweltrap]",
                    "r_participant[F4,voweltrap]",
                    "r_participant[F7,voweltrap]",
                    "r_participant[F8,voweltrap]",
                    "r_participant[F9,voweltrap]",
                    "r_participant[GW,voweltrap]",
                    "r_participant[Ibrahim,voweltrap]",
                    "r_participant[Jessica,voweltrap]",
                    "r_participant[Joe,voweltrap]",
                    "r_participant[Kai,voweltrap]",
                    "r_participant[Khadir,voweltrap]",
                    "r_participant[Lola,voweltrap]",
                    "r_participant[Lucy,voweltrap]",
                    "r_participant[M1,voweltrap]",
                    "r_participant[M3,voweltrap]",
                    "r_participant[M4,voweltrap]",
                    "r_participant[M5,voweltrap]",
                    "r_participant[M6,voweltrap]",
                    "r_participant[M7,voweltrap]",
                    "r_participant[M8,voweltrap]",
                    "r_participant[Matisse,voweltrap]",
                    "r_participant[Moses,voweltrap]",
                    "r_participant[Omar,voweltrap]",
                    "r_participant[Sami,voweltrap]",
                    "r_participant[SD,voweltrap]",
                    "r_participant[Shantel,voweltrap]",
                    "r_participant[Tariq,voweltrap]",
                    "r_participant[Tony,voweltrap]",
                    "r_participant[ZR,voweltrap]"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)
```

## PRICE and TRAP boxplot

Establish palette:
```{r}
vowel_cols <- c("#0072B2", "#D55E00")
pd <- position_dodge(0.9)
```


```{r fig.width=8, fig.height=6}
x2 <- ggplot(price_trap,
             aes(x = sex,
                 y = normF2_20,
                 fill = vowel)) + 
  facet_grid(cols = vars(age)) +
  geom_boxplot(width = 0.3, position = pd) + 
  theme_minimal() + 
  scale_fill_manual(values = vowel_cols) + 
  theme(legend.title = element_text(size = rel(1.5)),
        strip.text = element_text(size = rel(1.5)),
        legend.text = element_text(size = rel(1.5)),
        axis.title = element_text(size = rel(1.5)), 
        axis.text = element_text(size = rel(1.5)), 
        plot.title = element_text(size = rel(2))) +
  labs(x = "Age", y = "Normalized F2", title = "PRICE: F2 at 20%")
x2
```

This boxplot shows that adolescent females tend to have a more front TRAP than PRICE.

# PRICE Trajectory Length

We're going to look at Trajectory Length now, so we'll subset out PRICE and FLEECE -- the logic being that we want to compare the Trajectory Length of PRICE, which is supposed to be a diphthong, with FLEECE, which is supposed to be a monophthong.

```{r}
price_fleece <- price3 %>% filter(vowel=="price" | vowel =="fleece")
```

Check distribution of duration
```{r}
ggplot(price_fleece, aes(x = norm_TL, fill=vowel)) + geom_histogram(bins = 100)
```
Get rid of outliers
```{r}
price_fleece2 <- price_fleece %>% filter(norm_TL <2.1)
```

Set vowel reference level to price:
```{r}
price_fleece2$vowel <- relevel(price_fleece2$vowel, ref = "price")
```

Build the model:
```{r}
price_traj_m <- brm(norm_TL ~ Log_dur_z + vowel*age*sex + fol_seg + (1+vowel|participant) + (1|word), data = price_fleece2)
```

Model summary:
```{r}
tidy_stan(price_traj_m, prob=0.89, type="fixed", digits=4)
```

Visualise these effects
```{r fig.width=6, fig.height=8}
price_traj_pos <- as.matrix(price_traj_m)

#dimnames(price_traj_pos)
mcmc_areas(price_traj_pos,
           pars = c("b_Log_dur_z",
                    "b_vowelfleece",
                    "b_agechild", 
                    "b_sexF",
                    "b_fol_segfol_approximant",
                    "b_fol_segfol_nasal",
                    "b_fol_segfol_voiced_C",
                    "b_fol_segfol_voiced_clus",
                    "b_fol_segfol_voiceless_C",
                    "b_fol_segfol_voiceless_clus",
                    "b_fol_segother",
                    "b_vowelfleece:agechild",
                    "b_vowelfleece:sexF",
                    "b_agechild:sexF",
                    "b_vowelfleece:agechild:sexF"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)

mcmc_intervals(price_traj_pos,
           pars = c("b_Log_dur_z",
                    "b_vowelfleece",
                    "b_agechild", 
                    "b_sexF",
                    "b_fol_segfol_approximant",
                    "b_fol_segfol_nasal",
                    "b_fol_segfol_voiced_C",
                    "b_fol_segfol_voiced_clus",
                    "b_fol_segfol_voiceless_C",
                    "b_fol_segfol_voiceless_clus",
                    "b_fol_segother",
                    "b_vowelfleece:agechild",
                    "b_vowelfleece:sexF",
                    "b_agechild:sexF",
                    "b_vowelfleece:agechild:sexF"),
           prob=0.89) + ggplot2::theme_minimal() + ggplot2::geom_vline(xintercept=0)
```

## Summary of model

What this tells us is that:

- An increase in duration predicts an increase in Trajectory Length (unsurprisingly)

- FLEECE tends to have a shorter TL than PRICE - by about 0.08. The 89% credible interval does not contain 0 (though the posterior distribution, i.e. the 100% credible interval, does): this is important because it means that even adolescent males, who tend to lead in monophthongization, robustly have a PRICE vowel that is more diphthongal than their FLEECE vowel. By contrast, we found that for FACE and GOAT, the adolescent boys don't seem to have a FACE or GOAT vowel that is any more diphthongal than their FLEECE.

- Being a child as opposed to adolescent predicts a greater TL, by about 0.27 -- this is quite a big effect. This effect is pretty robust: the posterior does not contain 0. So, children are much more diphthongal than adolescents (take a look at the boxplot below!)

- Being a female adolescent as opposed to male adolescent predicts a greater TL by about 0.08. The 89% credible interval does not contain 0, though the posterior distribution does.

- Age-sex interaction: 0.27 - 0.1142 = 0.1558, i.e. female children have a smaller TL than male children by about 0.11 / female children have a greater TL than male adolescents by about 0.16. However you want to put it! The 89% credible interval on this interaction term does not contain 0, but notice that the posterior distribution (the 100% credible interval!) does contain 0.

- Most following segments predict a greater TL compared to following pause -- so following pause predicts a more monophthongal realization? These effects are pretty small -- less than 0.01 -- and for all except following voiced consonant, the 89% credible interval contains 0.

- There is a negative vowelfleece:agechild interaction: -0.08 - 0.27 = -0.35: for children, the TL of FLEECE tends to be about 0.35 less than the TL of PRICE.

- vowelfleece.sexF interaction: -0.08 -0.03 = -0.11: For adolescent girls, FLEECE has a shorter TL than PRICE by about 0.11. This effect isn't robust: 89% credible interval contains 0, so it's possible that the difference between FLEECE's TL and PRICE's TL for adolescent girls isn't any different from what it is for adolescent boys. It's possible that the interaction is actually -0.08 + 0.0537 = -0.03, i.e. for adolescent girls, the difference between FLEECE and PRICE is actually less than what it is for adolescent boys.

- vowelfleece.agechild.sexF: this posterior is also very wide, with the 89% credible interval containing 0, so we wouldn't want to conclude that the difference between FLEECE and PRICE for female children is any different from what it is for male children.

## Boxplot of PRICE Traj
```{r fig.width=8, fig.height=6}
price_fleece_cols <- c("#0072B2", "#E69F00")


z <- ggplot(price_fleece2, aes(x = sex, y = norm_TL, fill = vowel)) + facet_grid(cols = vars(age)) +
  geom_boxplot(width = 0.3, position = position_dodge(0.9)) + 
  theme_minimal() + 
  scale_fill_manual(values = price_fleece_cols) + 
   theme(legend.title = element_text(size = rel(2)),
        strip.text = element_text(size = rel(2)),
        legend.text = element_text(size = rel(2)),
        axis.title = element_text(size = rel(2)), 
        axis.text = element_text(size = rel(2)), 
        plot.title = element_text(size = rel(2.5))) +
  labs(x = "Age", y = "Trajectory Length", title = "PRICE: Trajectory Length") +
  ylim(0, 2)
z
```

```{r fig.width=8, fig.height=6}
price_fleece_cols <- c("#0072B2", "#E69F00")

transparency <- c(1, 0.2)
faded <- c("#000000", "#CCCCCC")

z2 <- ggplot(price_fleece2, aes(x = sex, y = norm_TL, fill = vowel, alpha=vowel)) + facet_grid(cols = vars(age)) +
  geom_boxplot(aes(colour=vowel), width = 0.3, position = position_dodge(0.9)) + 
  theme_minimal() + 
   scale_colour_manual(values = faded) +
  scale_fill_manual(values = price_fleece_cols) + 
  scale_alpha_manual(values = transparency) + 
   theme(legend.title = element_text(size = rel(2)),
        strip.text = element_text(size = rel(2)),
        legend.text = element_text(size = rel(2)),
        axis.title = element_text(size = rel(2)), 
        axis.text = element_text(size = rel(2)), 
        plot.title = element_text(size = rel(2.5))) +
  labs(x = "Age", y = "Trajectory Length", title = "PRICE: Trajectory Length") +
  ylim(0, 2)
z2
```
