data <- read.csv("/cloud/project/2024_US_Olympic.csv")
data$First_Name <- str_trim(data$First_Name, side = "right")

head(data, n = 5)
##   First_Name        Last_Name             Sport Hometown_City Hometown_State
## 1      BRADY          ELLISON           Archery      Billings        Montana
## 2   CATALINA         GNORIEGA           Archery     San Diego     California
## 3      CASEY         KAUFHOLD           Archery     Lancaster   Pennsylvania
## 4   JENNIFER MUCINO-FERNANDEZ           Archery   Chula Vista     California
## 5      ANITA          ALVAREZ Artistic Swimming       Buffalo       New York
##   Gender                            Event
## 1   Male                 Men's Individual
## 2 Female Women's Individual, Women's Team
## 3 Female Women's Individual, Women's Team
## 4 Female Women's Individual, Women's Team
## 5 Female                             Team

#Q1: 100 Names, Women are more likely to.

countmale <- sum(data$Gender == "Male" & (str_detect(data$First_Name, "^[aeiouAEIOU]", )))
            
countfemale <- sum(data$Gender == "Female" & (str_detect(data$First_Name, "^[aeiouAEIOU]", )))
                                      
cat("Men: ", countmale)
## Men:  29
cat("Female: ", countfemale)
## Female:  71
cat("All together: ", countfemale + countmale)
## All together:  100

#Q2: 211 names, Women are more likey to

countmale <- sum(data$Gender == "Male" & (str_detect(data$First_Name, "[aeiouAEIOU]$", )))
            
countfemale <- sum(data$Gender == "Female" & (str_detect(data$First_Name, "[aeiouAEIOU]$", )))
                                      
cat("Men: ", countmale)
## Men:  37
cat("Female: ", countfemale)
## Female:  174
cat("All together: ", countfemale + countmale)
## All together:  211

#Q3: 31 first names

countname <- sum(str_detect(data$First_Name, "[^A-Za-z]" ))
cat("Atypical First names: ", countname)
## Atypical First names:  31

#Q4: #a) ZACHERY #b) BRITTNEY CHRISTOPHER CHRISTIAN KRISTI KRISTI RYLAN WILLIAM DERRICK SCOTT CONNER LYNN MAXIMILIAN TRINITY GRIFFIN GRIFFIN KRISTINA CHRISTOPHER CHRISTOPHER AALIYAH NICKOLE CHRISTIAN BRYNN TAYLOR WHITTNI CHRISTOPHER EMILY MARY CHASE WILLIAM #c) CONNER LYNN BRYNN TAYLOR

name1 <- str_subset(data$First_Name, "^(Z|ZH).*Y$")
cat("Starts with “Z” or “ZH” and ends with “Y”: ", name1)
## Starts with “Z” or “ZH” and ends with “Y”:  ZACHERY
name2 <- str_subset(data$First_Name, "[^aeiouAEOU]{6,}")
cat("Has more than 3 vowels in a row in it: ", name2)
## Has more than 3 vowels in a row in it:  BRITTNEY CHRISTOPHER CHRISTIAN KRISTI KRISTI RYLAN WILLIAM DERRICK SCOTT CONNER LYNN MAXIMILIAN TRINITY GRIFFIN GRIFFIN KRISTINA CHRISTOPHER CHRISTOPHER AALIYAH NICKOLE CHRISTIAN BRYNN TAYLOR WHITTNI CHRISTOPHER EMILY MARY CHASE WILLIAM
name3 <- str_subset(data$First_Name, "[^aeiouAEIOU]{6,}")
cat("Has more than 5 consonants in a row in it #: ", name3)
## Has more than 5 consonants in a row in it #:  CONNER LYNN BRYNN TAYLOR

#Q5: 18 Atypical last names

countname <- sum(str_detect(data$Last_Name, "[^A-Za-z]" ))
cat("Atypical Last names: ", countname)
## Atypical Last names:  18

#6: #a) VAN LITH KLOTH LEIBFARTH HOLLINGSWORTH SMITH BOOTH SMITH SMITH SMITH SMITH #b) ZHANG ZHANG ZHANG

name1 <- str_subset(data$Last_Name, "TH$")
cat("Ends with “TH”: ", name1)
## Ends with “TH”:  VAN LITH KLOTH LEIBFARTH HOLLINGSWORTH SMITH BOOTH SMITH SMITH SMITH SMITH
name2 <-str_subset(data$Last_Name, "^ZH")
cat("Starts with “ZH”: ", name2)
## Starts with “ZH”:  ZHANG ZHANG ZHANG

#7 See Output!

fName <- str_to_title(str_to_lower(data$First_Name))
LName <- str_to_title(str_to_lower(data$Last_Name))

cat(fName[1], LName[1])
## Brady Ellison
cat(fName[2], LName[2])
## Catalina Gnoriega
cat(fName[3], LName[3])
## Casey Kaufhold
cat(fName[4], LName[4])
## Jennifer Mucino-Fernandez
cat(fName[5], LName[5])
## Anita Alvarez

#8 2 Hometown Cities

count <- sum(str_length(data$Hometown_City) > 20)
cat("Greater than 20 characters: ", count)
## Greater than 20 characters:  2

#9 11 Hometown Cities

count <- sum(str_count(data$Hometown_City, " ") + 1 >= 3)
cat("Greater than 3 or more words: ", count)
## Greater than 3 or more words:  11

#10 See Output

ftable <- data$Sport[str_detect(data$Sport, "\\(")]
table(ftable)
## ftable
##        Basketball (3x3)        Basketball (5x5)         BMX (Freestyle) 
##                       8                      24                       4 
##            BMX (Racing) Cycling (Mountain Bike) 
##                       5                       4
sparanthesis <- str_replace(ftable, "^(.*)\\s*(\\(.*\\))$", "\\2 \\1")

print(table(sparanthesis))
## sparanthesis
##        (3x3) Basketball         (5x5) Basketball          (Freestyle) BMX  
##                        8                       24                        4 
## (Mountain Bike) Cycling             (Racing) BMX  
##                        4                        5

#11 208 Participants

checkTeam <- str_detect(data$Event, "Team")

checkEvent <- str_detect(data$Sport, "Basketball|Field Hockey|Rugby|Soccer")

finalcheck <- sum(checkTeam|checkEvent)

cat("Number of participants in team events: ", finalcheck)
## Number of participants in team events:  208