library(tidyverse)
library(here)
prosodic_features <- read_csv(here("data", "aligning", "joined-data-prosodic-uncertainty-aligned.csv"))
content_log <- read_csv(here("data", "aligning", "LogClass_IG__T103 14-04-22 Content Log Merged-aligned.csv"))
Note that the values in frame_time_minutes_aligned
- e.g., (approximately) 2.95 are aligned to (or, in the units of) the UT. Concomitantly, a frame_time_minutes
value of 0.059 corresponds to a UT time of 2.95. Thus, the frame_time_minutes_aligned
variable represents the time stamp in terms of the UT and it can be used to join this data with other data on the UT.
Here’s a brief look at the two variables:
prosodic_features %>%
select(frame_time_minutes,
frame_time_minutes_aligned)
## # A tibble: 2,913 × 2
## frame_time_minutes frame_time_minutes_aligned
## <dbl> <dbl>
## 1 0.0593 2.96
## 2 0.251 3.15
## 3 0.394 3.29
## 4 0.410 3.31
## 5 0.426 3.33
## 6 0.469 3.37
## 7 0.475 3.38
## 8 0.492 3.39
## 9 0.579 3.48
## 10 0.634 3.53
## # … with 2,903 more rows
Next, we can filter the prosodic features data to include only the segment with the small group we are focused on:
4 minutes, 50 seconds = 4.8333333 25 minutes, 50 seconds = 25.8333333
prosodic_features_filtered <- prosodic_features %>%
filter(frame_time_minutes_aligned > (4 + 50/60) & # time stamps are greater than 5.45
frame_time_minutes_aligned < (25 + 50/60))
Let’s look just at the key variables:
prosodic_features_filtered_key_vars <- prosodic_features_filtered %>%
select(frame_time_minutes_aligned, F0_SMA:pcm_loudness_sma, frame_time_minutes)
prosodic_features_filtered_key_vars %>%
skimr::skim()
Name | Piped data |
Number of rows | 550 |
Number of columns | 4 |
_______________________ | |
Column type frequency: | |
numeric | 4 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
frame_time_minutes_aligned | 0 | 1 | 15.15 | 6.27 | 4.84 | 9.52 | 15.08 | 20.79 | 25.78 | ▇▆▇▆▇ |
F0_SMA | 0 | 1 | 74.92 | 54.37 | 0.00 | 40.68 | 65.08 | 97.60 | 359.13 | ▇▅▁▁▁ |
pcm_loudness_sma | 0 | 1 | 0.16 | 0.06 | 0.06 | 0.12 | 0.15 | 0.19 | 0.42 | ▆▇▃▁▁ |
frame_time_minutes | 0 | 1 | 12.25 | 6.27 | 1.94 | 6.62 | 12.18 | 17.89 | 22.88 | ▇▆▇▆▇ |
Let’s write this file so we can use it in other analyses.
write_csv(prosodic_features_filtered_key_vars, "prosodic-features-filtered-key-vars.csv")
Let’s visualize these variables for the roughly 20 minute segment:
p <- prosodic_features_filtered_key_vars %>%
ggplot(aes(x = frame_time_minutes_aligned, y = F0_SMA)) +
geom_point()
plotly::ggplotly(p)
p <- prosodic_features_filtered_key_vars %>%
ggplot(aes(x = frame_time_minutes_aligned, y = pcm_loudness_sma)) +
geom_point()
plotly::ggplotly(p)
needs help
4 minutes, 50 seconds = 4.8333333 25 minutes, 50 seconds = 25.8333333
content_log_segmented <- content_log %>%
select(code01, starttime_minutes) %>%
filter(starttime_minutes > 4 + 50/60 &
starttime_minutes < 25 + 50/60)
content_log_segmented %>%
ggplot(aes(x = starttime_minutes, y = 1, color = code01)) +
xlim(4 + 50/60, 25 + 50/60) +
geom_point()
trying to viz content log + prosodic features - pcm_loudness
prosodic_features_filtered_key_vars %>%
ggplot(aes(x = frame_time_minutes_aligned, y = pcm_loudness_sma)) +
geom_point() +
geom_point(data = content_log_segmented, aes(x = starttime_minutes, y = 0, color = code01), shape = 3, size = 4)
trying to viz content log + prosodic features - F0_SMA
prosodic_features_filtered_key_vars %>%
ggplot(aes(x = frame_time_minutes_aligned, y = F0_SMA)) +
geom_point() +
geom_point(data = content_log_segmented, aes(x = starttime_minutes, y = 0, color = code01), shape = 3, size = 4)