#lab_assignment
#Doug Mui
#task 1: load students.csv then print the first couple of rows. also print structure as string
students = read.csv("students.csv")#assign csv read file to a name
head(students)#prints first couple of rows
## student_id name email dob registration_date
## 1 101 Alice alice123@deanza.edu 2002-05-12 2023-08-15
## 2 102 Bob bob_smith@gmail.com 2001-11-23 2023-08-16
## 3 103 Charlie charlie@missioncollege.edu 2003-02-05 2023-08-15
## 4 104 David david123@yahoo.com 2000-09-30 2023-08-17
str(students)#prints structure as string
## 'data.frame': 4 obs. of 5 variables:
## $ student_id : int 101 102 103 104
## $ name : chr " Alice " "Bob" " Charlie " "David"
## $ email : chr "alice123@deanza.edu" "bob_smith@gmail.com" "charlie@missioncollege.edu" "david123@yahoo.com"
## $ dob : chr "2002-05-12" "2001-11-23" "2003-02-05" "2000-09-30"
## $ registration_date: chr "2023-08-15" "2023-08-16" "2023-08-15" "2023-08-17"
#task 2: cleans up name column, removing spacing and capitalize name
library(stringr) #import stringr
students$name <- str_trim(students$name) #removes spaces
students$name <- str_to_title(students$name) #capitalize first letter of name
print(students$name) #prints the cleaned names
## [1] "Alice" "Bob" "Charlie" "David"
#task 3: convert dob to date, calculate students age then display name with age in years
students$dob <- as.Date(students$dob) #convert dob to date
today <- Sys.Date() #assign todays date using Sys.Date
students$age <- as.numeric(difftime(today, students$dob, units = "weeks"))/52 #calculate the difference between students dob and today's date in weeks and divide by weeks in a year
students$age <- floor(students$age)#round students age down
print(students[,c("name", "age")]) #print student name with age
## name age
## 1 Alice 24
## 2 Bob 24
## 3 Charlie 23
## 4 David 25
#task 4: convert reg date to date format, then create year and weekday and print
students$registration_date <- as.Date(students$registration_date)#convert registration date to Date
students$registration_year <- format(students$registration_date, "%Y")#create new column using format() to get the year
students$registration_weekday <- weekdays(students$registration_date)#create new column using weekdays() to get day of the week
print(students[, c("name", "registration_date", "registration_year", "registration_weekday")])#print name, reg date, year, weekday
## name registration_date registration_year registration_weekday
## 1 Alice 2023-08-15 2023 Tuesday
## 2 Bob 2023-08-16 2023 Wednesday
## 3 Charlie 2023-08-15 2023 Tuesday
## 4 David 2023-08-17 2023 Thursday
#task 5: use regex pattern to identify emails ending with .edu, then list those students
regex_pattern <- "\\.edu$" #use regex pattern to return emails ending in .edu
students$valid_edu <- grepl(regex_pattern, students$email)#use grepl to return values that end with .edu
print(students[, c("name", "email", "valid_edu")])#print name, email and valid_edu returning True for students with .edu emails and false for all else
## name email valid_edu
## 1 Alice alice123@deanza.edu TRUE
## 2 Bob bob_smith@gmail.com FALSE
## 3 Charlie charlie@missioncollege.edu TRUE
## 4 David david123@yahoo.com FALSE
#task 6: gives total student count, total students with edu email and average student age
total_students <- nrow(students)#counts the total number of students
sum_student_edu <- sum(students$valid_edu)#counts total number of students with .edu emails
average_age <- mean(students$age)#returns the average student age
cat("Total Student Count:",total_students,"\n")#print description of total student count with value
## Total Student Count: 4
cat("Number of students with edu emails:", sum_student_edu, "\n")#prints students with edu emails with count of edu students
## Number of students with edu emails: 2
cat("Average Student Age:", average_age, "\n")#prints average student age with value
## Average Student Age: 24
#task 7: finds students with Tuesday registration and lists their name and date
tuesday_students <- students[students$registration_weekday == "Tuesday",]#filters to students with student registration on Tuesday
print(tuesday_students[,c("name","registration_date")])#prints name of students with their reg date
## name registration_date
## 1 Alice 2023-08-15
## 3 Charlie 2023-08-15