First check the directory. To check the directory use
getwd(), and use setwd() to set the directory
if not done yet.
getwd()
## [1] "/Users/saurabhkumarnath/Desktop/Transcription/FastTrack"
setwd("/Users/saurabhkumarnath/Desktop/Transcription/FastTrack")
Now, load all the libraries needed for the project.
library(dplyr)
library(data.table)
library(ggplot2)
library(vowels)
library(ggpubr)
library(phonR)
library(phonTools)
Let’s import the required files from Fastrack. There are two ways, one is to use file.choose() or paste the file path. FastTrack output files are usually broken. Correct it mannualy before importing. file, output file, & vowel A
#import1 <- read.csv(file.choose())
#import2 <- read.csv(file.choose())
import1 <- read.csv("/Users/saurabhkumarnath/Desktop/kamrup/kusum/processed_data/aggregated_data.csv")
import2 <- read.csv("/Users/saurabhkumarnath/Desktop/kamrup/kusum/KamrupHajoKusumchoudhuryF01H6_segmentation_info.csv")
Run the following to see the files before proceeding to merge.
#View(import1)
#View(import2)
Now, merge the files. Check if the two column names file and outputfile already present in the imported files.
import <- merge(import1, import2, by.x = "file", by.y = "outputfile", all.y = TRUE)
Add syllable column. This will give us the syllable structure of the works in terms of c and v
import$syllable <- import$word
import$syllable <- gsub("ph|th|kh|bh|dh|gh|ng|p|t|k|b|d|g|t|r|l|h|x|n|s|z|m", "c", import$syllable)
import$syllable <- gsub("a|e|i|o|u|y", "v", import$syllable)
#Concatenate strings without space between 'c' and 'v'
import$syllable <- gsub("c\\s*v", "cv", import$syllable, ignore.case = TRUE, perl = TRUE)
To view the output use View(import). The following code
will provide use the position of the target vowel.
# Count number of 'v's in each syllable
import$vowelnumbers <- sapply(strsplit(import$syllable, ""), function(x) sum(x == "v"))
import1 <-import %>%
group_by(vowelnumbers, word_start) %>%
arrange(start) %>%
mutate(v_rank = row_number())
import2 <- import1 %>%
mutate(vposition = case_when(
vowelnumbers == 1 ~ "only",
vowelnumbers == 2 & v_rank %% 2 == 1 ~ "first",
vowelnumbers == 2 & v_rank %% 2 == 0 ~ "second",
vowelnumbers == 3 & (v_rank %% 3 == 1) ~ "first",
vowelnumbers == 3 & (v_rank %% 3 == 2) ~ "second",
vowelnumbers == 3 & (v_rank %% 3 == 0) ~ "third",
TRUE ~ "later"
))
To set the result, use View(import) Check overall
frequency of words by syllable structure
syllable_frequency <- table(import2$syllable)
# Sort the frequencies in descending order
syllable_frequency_sorted <- sort(syllable_frequency, decreasing = TRUE)
# Print the sorted frequency of values in the "syllable" column
print(syllable_frequency_sorted)
##
## cvcv cvv cvcvc cvcvv vcv cvcvcv cvc
## 330 140 114 114 112 105 77
## cvcvvv cv vcvc cvccv cvcvcvcv vcvcv cvcvcvc
## 76 51 46 42 28 24 21
## cvvcvcv vcvcvcv cvvcv cvcvcvv vccv vcvv cvccvcv
## 20 20 18 16 16 15 12
## cvccvcvc cvcvcvcvc cvvc cvvcvc cvvv vcvcvc vcvvv
## 12 12 12 12 12 12 12
## vvcv cvvvcvcv v vcvvcv cvccvc vccvcv vvccv
## 12 10 9 8 6 6 6
## cvcvvcvcv cvvcvcvcv cvvcvcvcvc vcvcvvvc vvcvcvcvc cvcvccvv cvcvvvc
## 5 5 5 5 5 4 4
## cvvcvcvc cvvcvv cvvvcv cvwvc vccvccvv vvcvcv vvcvcvc
## 4 4 4 4 4 4 4
## vvcvv vvvcv ccvvcv cvccvv cvcvvc cvvccv cvvvc
## 4 4 3 3 3 3 3
## cvwvcv vccvcvc ccvccvc ccvv vc vv ccvc
## 3 3 2 2 2 2 1
## cvcc cvw
## 1 1
Remove unnecesary token from present data, only keep cvc, cvcvc, cvccv, cvcv, cvccvc and their first vowels
import3<-import2%>%
filter(syllable == "cvcv" & vposition == "first" | syllable == "cvcvc"
& vposition == "first" | syllable == "cvccv" & vposition == "first" |
syllable == "cvc" | syllable == "cvccvc" & vposition == "first")
Check selected words’ frequency
frequency_table <- table(import3$syllable, import3$vowel)
# Print the frequency table
print(frequency_table)
##
## a A e i u
## cvc 43 26 1 4 3
## cvccv 11 5 1 1 3
## cvccvc 2 0 0 0 1
## cvcv 55 47 14 30 19
## cvcvc 24 19 7 4 3
import4<-import3%>%
filter(vowel == "a" | vowel == "A" | vowel =="e" | vowel == "o" | vowel == "u" | vowel == "i")
Add the height of following vowels
import5 <- import4 %>%
mutate(fvowel = ifelse(vowelnumbers == 2 & vposition == "first", sub(".*([aeiouAEIOU]).*", "\\1", word), "nil"))
# Modify the fvowelheight column based on fvowel value
import6 <- import5 %>%
mutate(fvowelheight = ifelse(fvowel == "i" | fvowel == "u", "high",
ifelse(fvowel == "nil", "nil", "nonhigh")))
Add syllabletype of the target vowel
import7 <- import6 %>%
mutate(syllabletype = case_when(
vowelnumbers == 1 & substr(syllable, nchar(syllable), nchar(syllable)) == "c" ~ "closed",
vowelnumbers == 1 & substr(syllable, nchar(syllable), nchar(syllable)) == "v" ~ "open",
vowelnumbers == 2 & grepl("[v]cc[v]", syllable) ~ "closed",
vowelnumbers == 2 & grepl("[v]c[v]", syllable) ~ "open",
TRUE ~ "nil"
))
import8 <- import7 %>%
mutate(syllabletype = case_when(
vowelnumbers == 2 & vposition == "second" & substr(syllable, nchar(syllable), nchar(syllable)) == "c" ~ "closed",
vowelnumbers == 2 & vposition == "second" & substr(syllable, nchar(syllable), nchar(syllable)) == "v" ~ "open",
TRUE ~ syllabletype
))
import9 <- import8 %>%
select(-duration.x, -v_rank, -fvowel,-label, -group, -color, -number, -interval, -omit, -word_interval)
Add following consonant of the target vowel
vowelalldata11 <- import9 %>%
mutate(fconsonant = case_when(
syllabletype == "closed" & (vposition == "second" | vposition == "only") ~ substr(word, nchar(word) - 1, nchar(word)),
syllabletype == "closed" & vposition == "first" ~ {
first_vowel_index <- regexpr("[aeiouAEIOU]", word)
substr(word, first_vowel_index + 1, first_vowel_index + 2)
},
syllabletype == "open" ~ "q",
TRUE ~ "q" # Handle any other cases with nil
))
# Remove vowels from the fconsonant column
vowelalldata12 <- vowelalldata11 %>%
mutate(fconsonant = gsub("[aeiouAEIOU]", "", fconsonant))
# Assuming your dataframe is named vowelalldata12
vowelalldata12$fconsonant <- gsub("(ph|th|kh|bh|dh|gh|ng)|([a-zA-Z])([a-zA-Z])", "\\1\\2", vowelalldata12$fconsonant)
# Assuming your dataframe is named vowelalldata12
vowelalldata12$fconsonant <- ifelse(vowelalldata12$syllabletype == "open" & vowelalldata12$vposition == "first",
gsub(".*[aeiouAEIOU]([a-zA-Z]+)[aeiouAEIOU].*", "\\1", vowelalldata12$word),
vowelalldata12$fconsonant)
# Assuming your dataframe is named vowelalldata12
vowelalldata12$maner <- case_when(
vowelalldata12$fconsonant %in% c("p", "t", "k", "b", "d", "g") ~ "stops",
vowelalldata12$fconsonant %in% c("ph", "th", "kh", "bh", "dh", "gh") ~ "aspirated",
vowelalldata12$fconsonant %in% c("s", "z", "x", "h") ~ "fricative",
vowelalldata12$fconsonant %in% c("r", "l") ~ "nonobstruents",
vowelalldata12$fconsonant %in% c("m", "n", "ng") ~ "nasal",
vowelalldata12$fconsonant == "q" ~ "q", # Keep 'q' as it is
TRUE ~ NA_character_ # If none of the above conditions are met, assign NA
)
Indentify E harmony vowels, basically CVCV
# Create a new column svowel with the same values as the original vowel column
vowelalldata12$svowel <- vowelalldata12$vowel
# Apply the condition to update values
vowelalldata12$svowel[vowelalldata12$svowel == "e" & vowelalldata12$vposition == "first" & vowelalldata12$syllabletype == "open" & vowelalldata12$fvowelheight == "high"] <- "E"
# Change 'e' to 'E' and 'E' to 'e' in the 'svowel' column simultaneously
vowelalldata12$svowel <- chartr("eE", "Ee", vowelalldata12$svowel)
Similarly, indentify A harmony vowels
# Apply the condition to update values
vowelalldata12$svowel[vowelalldata12$svowel == "A" & vowelalldata12$vposition == "first" & vowelalldata12$syllabletype == "open" & vowelalldata12$fvowelheight == "high"] <- "M"
#remove -- rows,
vowelalldata12 <- vowelalldata12[vowelalldata12$file != "--", ]
#View(vowelalldata12)
Check selected vowels’ distribution
frequency_table <- table(vowelalldata12$syllable, vowelalldata12$svowel, vowelalldata12$fvowelheight)
# Print the frequency table
print(frequency_table)
## , , = high
##
##
## a A e E i M u
## cvc 0 0 0 0 0 0 0
## cvccv 3 2 0 1 0 0 0
## cvccvc 0 0 0 0 0 0 0
## cvcv 28 0 10 0 17 24 9
## cvcvc 9 0 3 0 0 13 0
##
## , , = nil
##
##
## a A e E i M u
## cvc 41 22 0 1 4 0 3
## cvccv 0 0 0 0 0 0 0
## cvccvc 0 0 0 0 0 0 0
## cvcv 0 0 0 0 0 0 0
## cvcvc 0 0 0 0 0 0 0
##
## , , = nonhigh
##
##
## a A e E i M u
## cvc 0 0 0 0 0 0 0
## cvccv 8 3 0 0 1 0 3
## cvccvc 2 0 0 0 0 0 1
## cvcv 26 14 0 4 7 0 6
## cvcvc 14 4 0 4 3 0 3
#View(vowelalldata12)
Save file
#output_file <- "/Users/saurabhkumarnath/Desktop/Transcription/ranju/ranjufinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/Transcription/sunita/sunitafinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/Transcription/Anima/animafinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/Transcription/Girija/girijafinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/Transcription/parme/parmefinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/Transcription/gayari/gayarifinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/Transcription/pratibha/pratibhafinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/bongaigaon/ashokfinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/bongaigaon/dilipfinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/bongaigaon/nimalfinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/bongaigaon/raheswarfinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/kamrup/birendrabaihatafinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/kamrup/gitikachaigaonfinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/kamrup/jogeshfinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/kamrup/binafinal.csv"
#output_file <- "/Users/saurabhkumarnath/Desktop/kamrup/kusumfinal.csv"
#write.csv(vowelalldata12, file = output_file, row.names = FALSE)