Homework 1

data <- read.csv("/cloud/project/2024_US_Olympic.csv")
data$First_Name <- str_trim(data$First_Name, side = "right")

head(data, n = 5)

##   First_Name        Last_Name             Sport Hometown_City Hometown_State
## 1      BRADY          ELLISON           Archery      Billings        Montana
## 2   CATALINA         GNORIEGA           Archery     San Diego     California
## 3      CASEY         KAUFHOLD           Archery     Lancaster   Pennsylvania
## 4   JENNIFER MUCINO-FERNANDEZ           Archery   Chula Vista     California
## 5      ANITA          ALVAREZ Artistic Swimming       Buffalo       New York
##   Gender                            Event
## 1   Male                 Men's Individual
## 2 Female Women's Individual, Women's Team
## 3 Female Women's Individual, Women's Team
## 4 Female Women's Individual, Women's Team
## 5 Female                             Team

#Q1: 100 Names, Women are more likely to.

countmale <- sum(data$Gender == "Male" & (str_detect(data$First_Name, "^[aeiouAEIOU]", )))
            
countfemale <- sum(data$Gender == "Female" & (str_detect(data$First_Name, "^[aeiouAEIOU]", )))
                                      
cat("Men: ", countmale)

## Men:  29

cat("Female: ", countfemale)

## Female:  71

cat("All together: ", countfemale + countmale)

## All together:  100

#Q2: 211 names, Women are more likey to

countmale <- sum(data$Gender == "Male" & (str_detect(data$First_Name, "[aeiouAEIOU]$", )))
            
countfemale <- sum(data$Gender == "Female" & (str_detect(data$First_Name, "[aeiouAEIOU]$", )))
                                      
cat("Men: ", countmale)

## Men:  37

cat("Female: ", countfemale)

## Female:  174

cat("All together: ", countfemale + countmale)

## All together:  211

#Q3: 31 first names

countname <- sum(str_detect(data$First_Name, "[^A-Za-z]" ))
cat("Atypical First names: ", countname)

## Atypical First names:  31

#Q4: #a) ZACHERY #b) BRITTNEY CHRISTOPHER CHRISTIAN KRISTI KRISTI RYLAN WILLIAM DERRICK SCOTT CONNER LYNN MAXIMILIAN TRINITY GRIFFIN GRIFFIN KRISTINA CHRISTOPHER CHRISTOPHER AALIYAH NICKOLE CHRISTIAN BRYNN TAYLOR WHITTNI CHRISTOPHER EMILY MARY CHASE WILLIAM #c) CONNER LYNN BRYNN TAYLOR

name1 <- str_subset(data$First_Name, "^(Z|ZH).*Y$")
cat("Starts with “Z” or “ZH” and ends with “Y”: ", name1)

## Starts with “Z” or “ZH” and ends with “Y”:  ZACHERY

name2 <- str_subset(data$First_Name, "[^aeiouAEOU]{6,}")
cat("Has more than 3 vowels in a row in it: ", name2)

## Has more than 3 vowels in a row in it:  BRITTNEY CHRISTOPHER CHRISTIAN KRISTI KRISTI RYLAN WILLIAM DERRICK SCOTT CONNER LYNN MAXIMILIAN TRINITY GRIFFIN GRIFFIN KRISTINA CHRISTOPHER CHRISTOPHER AALIYAH NICKOLE CHRISTIAN BRYNN TAYLOR WHITTNI CHRISTOPHER EMILY MARY CHASE WILLIAM

name3 <- str_subset(data$First_Name, "[^aeiouAEIOU]{6,}")
cat("Has more than 5 consonants in a row in it #: ", name3)

## Has more than 5 consonants in a row in it #:  CONNER LYNN BRYNN TAYLOR

#Q5: 18 Atypical last names

countname <- sum(str_detect(data$Last_Name, "[^A-Za-z]" ))
cat("Atypical Last names: ", countname)

## Atypical Last names:  18

#6: #a) VAN LITH KLOTH LEIBFARTH HOLLINGSWORTH SMITH BOOTH SMITH SMITH SMITH SMITH #b) ZHANG ZHANG ZHANG

name1 <- str_subset(data$Last_Name, "TH$")
cat("Ends with “TH”: ", name1)

## Ends with “TH”:  VAN LITH KLOTH LEIBFARTH HOLLINGSWORTH SMITH BOOTH SMITH SMITH SMITH SMITH

name2 <-str_subset(data$Last_Name, "^ZH")
cat("Starts with “ZH”: ", name2)

## Starts with “ZH”:  ZHANG ZHANG ZHANG

#7 See Output!

fName <- str_to_title(str_to_lower(data$First_Name))
LName <- str_to_title(str_to_lower(data$Last_Name))

cat(fName[1], LName[1])

## Brady Ellison

cat(fName[2], LName[2])

## Catalina Gnoriega

cat(fName[3], LName[3])

## Casey Kaufhold

cat(fName[4], LName[4])

## Jennifer Mucino-Fernandez

cat(fName[5], LName[5])

## Anita Alvarez

#8 2 Hometown Cities

count <- sum(str_length(data$Hometown_City) > 20)
cat("Greater than 20 characters: ", count)

## Greater than 20 characters:  2

#9 11 Hometown Cities

count <- sum(str_count(data$Hometown_City, " ") + 1 >= 3)
cat("Greater than 3 or more words: ", count)

## Greater than 3 or more words:  11

#10 See Output

ftable <- data$Sport[str_detect(data$Sport, "\\(")]
table(ftable)

## ftable
##        Basketball (3x3)        Basketball (5x5)         BMX (Freestyle) 
##                       8                      24                       4 
##            BMX (Racing) Cycling (Mountain Bike) 
##                       5                       4

sparanthesis <- str_replace(ftable, "^(.*)\\s*(\\(.*\\))$", "\\2 \\1")

print(table(sparanthesis))

## sparanthesis
##        (3x3) Basketball         (5x5) Basketball          (Freestyle) BMX  
##                        8                       24                        4 
## (Mountain Bike) Cycling             (Racing) BMX  
##                        4                        5

#11 208 Participants

checkTeam <- str_detect(data$Event, "Team")

checkEvent <- str_detect(data$Sport, "Basketball|Field Hockey|Rugby|Soccer")

finalcheck <- sum(checkTeam|checkEvent)

cat("Number of participants in team events: ", finalcheck)

## Number of participants in team events:  208

Homework 1

Dev Amin

2025-09-25