Load the dataset into R:
#Load the dataset
#theURL <- "https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/mediation/student.csv"
#mediation_stud <- read.csv(file = theURL , header = TRUE , sep = ",")
#print(head(mediation_stud))
#Question 7
theURL <- "https://raw.githubusercontent.com/letisalbal/R-Bridge-Homework-2/main/mediationstudent.csv"
mediation_stud <- read.csv(file = theURL , header = TRUE , sep = ",")
print(head(mediation_stud))
## X fight attachment work score late coed smorale gender income free pared
## 1 1 0 0 0 46 1 1 5 1 10 3 0
## 2 2 1 0 0 48 5 1 5 0 9 3 0
## 3 3 1 1 0 72 3 1 5 1 13 3 1
## 4 4 0 1 1 57 1 1 5 0 12 3 0
## 5 5 0 1 0 51 2 1 5 0 11 3 0
## 6 6 0 1 1 52 2 1 5 0 7 3 0
## catholic SCH_ID
## 1 0 1
## 2 0 1
## 3 0 1
## 4 0 1
## 5 0 1
## 6 0 1
2. Create a new data frame with a subset of the columns and rows. Make sure to rename it.
#Create the new data frame
media_stud_subset <- mediation_stud[1:10, c("fight", "score", "late", "coed", "gender", "income")]
media_stud_subset
## fight score late coed gender income
## 1 0 46 1 1 1 10
## 2 1 48 5 1 0 9
## 3 1 72 3 1 1 13
## 4 0 57 1 1 0 12
## 5 0 51 2 1 0 11
## 6 0 52 2 1 0 7
## 7 0 42 3 1 0 7
## 8 0 41 2 1 0 4
## 9 0 28 2 1 1 8
## 10 0 56 1 1 0 12
# Use data.frame to subset the columns and rows
#subset score to be greater than or equal to 65
mediation_stud_subset <- subset(mediation_stud, score >= 65, select = c(score))
head(mediation_stud_subset)
## score
## 3 72
## 58 67
## 75 70
## 83 65
## 104 66
## 114 66
tail(mediation_stud_subset)
## score
## 9661 68
## 9665 76
## 9670 66
## 9674 65
## 9676 70
## 9678 68
#subset late to be less than 3
mediation_stud_subset2 <- subset(mediation_stud, late < 3, select = c(late))
head(mediation_stud_subset2)
## late
## 1 1
## 4 1
## 5 2
## 6 2
## 8 2
## 9 2
tail(mediation_stud_subset2)
## late
## 9671 2
## 9673 2
## 9674 2
## 9677 1
## 9678 1
## 9679 1
#subset income to be less than or equal to 9
mediation_stud_subset3 <- subset(mediation_stud, income <= 9, select = c(income))
head(mediation_stud_subset3)
## income
## 2 9
## 6 7
## 7 7
## 8 4
## 9 8
## 12 9
tail(mediation_stud_subset3)
## income
## 9660 9
## 9664 9
## 9666 7
## 9668 4
## 9671 6
## 9675 8
3. Create new column names for the new data frame.
# Get column names
colnames(media_stud_subset, do.NULL = TRUE, prefix = "col")
## [1] "fight" "score" "late" "coed" "gender" "income"
# Create new column names
colnames(media_stud_subset) <- c("Fight_sub", "Score_sub", "Late_sub", "CoEd_sub", "Gender_sub", "Income_sub")
colnames(media_stud_subset)
## [1] "Fight_sub" "Score_sub" "Late_sub" "CoEd_sub" "Gender_sub"
## [6] "Income_sub"
5. For at least 3 values in a column please rename so that every value in that column is renamed. For example, suppose I have 20 values of the letter “e” in one column. Rename those values so that all 20 would show as “excellent”.
library("stringr")
vectorx <- media_stud_subset[, "Gender_sub"]
vectorx <- sapply(vectorx, as.character)
unique(vectorx)
## [1] "1" "0"
I only had two different values in many of my columns of my data frame so I decided to replace the numbers “0” and “1” in the Gender_sub column.
#Replacing "0" with "Male" and "1" for "Female"
vectorx <- str_replace(string = vectorx, pattern = "0", "Male")
vectorx <- str_replace(string = vectorx, pattern = "1", "Female")
unique(vectorx)
## [1] "Female" "Male"
media_stud_subset[, "Gender_sub"] <- vectorx
# Print the new data frame
head(media_stud_subset)
## Fight_sub Score_sub Late_sub CoEd_sub Gender_sub Income_sub
## 1 0 46 1 1 Female 10
## 2 1 48 5 1 Male 9
## 3 1 72 3 1 Female 13
## 4 0 57 1 1 Male 12
## 5 0 51 2 1 Male 11
## 6 0 52 2 1 Male 7
6. Display enough rows to see examples of all of steps 1 - 5 above.
See steps 1 - 5
7. BONUS - place the original.csv in a github file and have R read from the link. This will be very useful skill as you progress in your data science education and career.
See Part 1 for Question 7
Please submit your .rmd file and the .csv file as well as a link to your RPubs.