#lab_assignment
#Doug Mui

#task 1: load students.csv then print the first couple of rows. also print structure as string
students = read.csv("students.csv")#assign csv read file to a name
head(students)#prints first couple of rows
##   student_id      name                      email        dob registration_date
## 1        101    Alice         alice123@deanza.edu 2002-05-12        2023-08-15
## 2        102       Bob        bob_smith@gmail.com 2001-11-23        2023-08-16
## 3        103  Charlie  charlie@missioncollege.edu 2003-02-05        2023-08-15
## 4        104     David         david123@yahoo.com 2000-09-30        2023-08-17
str(students)#prints structure as string
## 'data.frame':    4 obs. of  5 variables:
##  $ student_id       : int  101 102 103 104
##  $ name             : chr  " Alice " "Bob" " Charlie " "David"
##  $ email            : chr  "alice123@deanza.edu" "bob_smith@gmail.com" "charlie@missioncollege.edu" "david123@yahoo.com"
##  $ dob              : chr  "2002-05-12" "2001-11-23" "2003-02-05" "2000-09-30"
##  $ registration_date: chr  "2023-08-15" "2023-08-16" "2023-08-15" "2023-08-17"
#task 2: cleans up name column, removing spacing and capitalize name
library(stringr) #import stringr
students$name <- str_trim(students$name) #removes spaces 
students$name <- str_to_title(students$name) #capitalize first letter of name
print(students$name) #prints the cleaned names
## [1] "Alice"   "Bob"     "Charlie" "David"
#task 3: convert dob to date, calculate students age then display name with age in years
students$dob <- as.Date(students$dob) #convert dob to date
today <- Sys.Date() #assign todays date using Sys.Date
students$age <- as.numeric(difftime(today, students$dob, units = "weeks"))/52 #calculate the difference between students dob and today's date in weeks and divide by weeks in a year
students$age <- floor(students$age)#round students age down
print(students[,c("name", "age")]) #print student name with age
##      name age
## 1   Alice  24
## 2     Bob  24
## 3 Charlie  23
## 4   David  25
#task 4: convert reg date to date format, then create year and weekday and print
students$registration_date <- as.Date(students$registration_date)#convert registration date to Date
students$registration_year <- format(students$registration_date, "%Y")#create new column using format() to get the year
students$registration_weekday <- weekdays(students$registration_date)#create new column using weekdays() to get day of the week

print(students[, c("name", "registration_date", "registration_year", "registration_weekday")])#print name, reg date, year, weekday
##      name registration_date registration_year registration_weekday
## 1   Alice        2023-08-15              2023              Tuesday
## 2     Bob        2023-08-16              2023            Wednesday
## 3 Charlie        2023-08-15              2023              Tuesday
## 4   David        2023-08-17              2023             Thursday
#task 5: use regex pattern to identify emails ending with .edu, then list those students
regex_pattern <- "\\.edu$" #use regex pattern to return emails ending in .edu
students$valid_edu <- grepl(regex_pattern, students$email)#use grepl to return values that end with .edu
print(students[, c("name", "email", "valid_edu")])#print name, email and valid_edu returning True for students with .edu emails and false for all else
##      name                      email valid_edu
## 1   Alice        alice123@deanza.edu      TRUE
## 2     Bob        bob_smith@gmail.com     FALSE
## 3 Charlie charlie@missioncollege.edu      TRUE
## 4   David         david123@yahoo.com     FALSE
#task 6: gives total student count, total students with edu email and average student age
total_students <- nrow(students)#counts the total number of students
sum_student_edu <- sum(students$valid_edu)#counts total number of students with .edu emails
average_age <- mean(students$age)#returns the average student age

cat("Total Student Count:",total_students,"\n")#print description of total student count with value
## Total Student Count: 4
cat("Number of students with edu emails:", sum_student_edu, "\n")#prints students with edu emails with count of edu students
## Number of students with edu emails: 2
cat("Average Student Age:", average_age, "\n")#prints average student age with value
## Average Student Age: 24
#task 7: finds students with Tuesday registration and lists their name and date
tuesday_students <- students[students$registration_weekday == "Tuesday",]#filters to students with student registration on Tuesday
print(tuesday_students[,c("name","registration_date")])#prints name of students with their reg date
##      name registration_date
## 1   Alice        2023-08-15
## 3 Charlie        2023-08-15