data <- read.csv("/cloud/project/2024_US_Olympic.csv")
data$First_Name <- str_trim(data$First_Name, side = "right")
head(data, n = 5)
## First_Name Last_Name Sport Hometown_City Hometown_State
## 1 BRADY ELLISON Archery Billings Montana
## 2 CATALINA GNORIEGA Archery San Diego California
## 3 CASEY KAUFHOLD Archery Lancaster Pennsylvania
## 4 JENNIFER MUCINO-FERNANDEZ Archery Chula Vista California
## 5 ANITA ALVAREZ Artistic Swimming Buffalo New York
## Gender Event
## 1 Male Men's Individual
## 2 Female Women's Individual, Women's Team
## 3 Female Women's Individual, Women's Team
## 4 Female Women's Individual, Women's Team
## 5 Female Team
#Q1: 100 Names, Women are more likely to.
countmale <- sum(data$Gender == "Male" & (str_detect(data$First_Name, "^[aeiouAEIOU]", )))
countfemale <- sum(data$Gender == "Female" & (str_detect(data$First_Name, "^[aeiouAEIOU]", )))
cat("Men: ", countmale)
## Men: 29
cat("Female: ", countfemale)
## Female: 71
cat("All together: ", countfemale + countmale)
## All together: 100
#Q2: 211 names, Women are more likey to
countmale <- sum(data$Gender == "Male" & (str_detect(data$First_Name, "[aeiouAEIOU]$", )))
countfemale <- sum(data$Gender == "Female" & (str_detect(data$First_Name, "[aeiouAEIOU]$", )))
cat("Men: ", countmale)
## Men: 37
cat("Female: ", countfemale)
## Female: 174
cat("All together: ", countfemale + countmale)
## All together: 211
#Q3: 31 first names
countname <- sum(str_detect(data$First_Name, "[^A-Za-z]" ))
cat("Atypical First names: ", countname)
## Atypical First names: 31
#Q4: #a) ZACHERY #b) BRITTNEY CHRISTOPHER CHRISTIAN KRISTI KRISTI RYLAN WILLIAM DERRICK SCOTT CONNER LYNN MAXIMILIAN TRINITY GRIFFIN GRIFFIN KRISTINA CHRISTOPHER CHRISTOPHER AALIYAH NICKOLE CHRISTIAN BRYNN TAYLOR WHITTNI CHRISTOPHER EMILY MARY CHASE WILLIAM #c) CONNER LYNN BRYNN TAYLOR
name1 <- str_subset(data$First_Name, "^(Z|ZH).*Y$")
cat("Starts with “Z” or “ZH” and ends with “Y”: ", name1)
## Starts with “Z” or “ZH” and ends with “Y”: ZACHERY
name2 <- str_subset(data$First_Name, "[^aeiouAEOU]{6,}")
cat("Has more than 3 vowels in a row in it: ", name2)
## Has more than 3 vowels in a row in it: BRITTNEY CHRISTOPHER CHRISTIAN KRISTI KRISTI RYLAN WILLIAM DERRICK SCOTT CONNER LYNN MAXIMILIAN TRINITY GRIFFIN GRIFFIN KRISTINA CHRISTOPHER CHRISTOPHER AALIYAH NICKOLE CHRISTIAN BRYNN TAYLOR WHITTNI CHRISTOPHER EMILY MARY CHASE WILLIAM
name3 <- str_subset(data$First_Name, "[^aeiouAEIOU]{6,}")
cat("Has more than 5 consonants in a row in it #: ", name3)
## Has more than 5 consonants in a row in it #: CONNER LYNN BRYNN TAYLOR
#Q5: 18 Atypical last names
countname <- sum(str_detect(data$Last_Name, "[^A-Za-z]" ))
cat("Atypical Last names: ", countname)
## Atypical Last names: 18
#6: #a) VAN LITH KLOTH LEIBFARTH HOLLINGSWORTH SMITH BOOTH SMITH SMITH SMITH SMITH #b) ZHANG ZHANG ZHANG
name1 <- str_subset(data$Last_Name, "TH$")
cat("Ends with “TH”: ", name1)
## Ends with “TH”: VAN LITH KLOTH LEIBFARTH HOLLINGSWORTH SMITH BOOTH SMITH SMITH SMITH SMITH
name2 <-str_subset(data$Last_Name, "^ZH")
cat("Starts with “ZH”: ", name2)
## Starts with “ZH”: ZHANG ZHANG ZHANG
#7 See Output!
fName <- str_to_title(str_to_lower(data$First_Name))
LName <- str_to_title(str_to_lower(data$Last_Name))
cat(fName[1], LName[1])
## Brady Ellison
cat(fName[2], LName[2])
## Catalina Gnoriega
cat(fName[3], LName[3])
## Casey Kaufhold
cat(fName[4], LName[4])
## Jennifer Mucino-Fernandez
cat(fName[5], LName[5])
## Anita Alvarez
#8 2 Hometown Cities
count <- sum(str_length(data$Hometown_City) > 20)
cat("Greater than 20 characters: ", count)
## Greater than 20 characters: 2
#9 11 Hometown Cities
count <- sum(str_count(data$Hometown_City, " ") + 1 >= 3)
cat("Greater than 3 or more words: ", count)
## Greater than 3 or more words: 11
#10 See Output
ftable <- data$Sport[str_detect(data$Sport, "\\(")]
table(ftable)
## ftable
## Basketball (3x3) Basketball (5x5) BMX (Freestyle)
## 8 24 4
## BMX (Racing) Cycling (Mountain Bike)
## 5 4
sparanthesis <- str_replace(ftable, "^(.*)\\s*(\\(.*\\))$", "\\2 \\1")
print(table(sparanthesis))
## sparanthesis
## (3x3) Basketball (5x5) Basketball (Freestyle) BMX
## 8 24 4
## (Mountain Bike) Cycling (Racing) BMX
## 4 5
#11 208 Participants
checkTeam <- str_detect(data$Event, "Team")
checkEvent <- str_detect(data$Sport, "Basketball|Field Hockey|Rugby|Soccer")
finalcheck <- sum(checkTeam|checkEvent)
cat("Number of participants in team events: ", finalcheck)
## Number of participants in team events: 208