1 - Read file marks1.csv
marks <- read.csv("marks1.csv")2 - Check the data frame info using a few available functions
head(marks, 3)## X X.1 test asgn Prsnt Final q1 q2 q3 q4
## 1 60001 Ahmad 15 14 17 13 0 9 2 4
## 2 60003 Abu 26 13 18 22 3 5 8 6
## 3 60006 Samy 21 15 19 25 6 7 4 8
str(marks)## 'data.frame': 11 obs. of 10 variables:
## $ X : int 60001 60003 60006 60008 60009 60011 60014 60015 60023 60025 ...
## $ X.1 : chr "Ahmad" "Abu" "Samy" "Chong" ...
## $ test : int 15 26 21 25 25 18 30 16 18 30 ...
## $ asgn : int 14 13 15 10 15 15 15 15 13 14 ...
## $ Prsnt: int 17 18 19 17 16 19 19 19 18 18 ...
## $ Final: int 13 22 25 14 20 22 28 20 22 24 ...
## $ q1 : num 0 3 6 2 3 4 4 4 2 5.5 ...
## $ q2 : int 9 5 7 3 7 7 5 5 5 6 ...
## $ q3 : int 2 8 4 4 6 4 9 6 7 5 ...
## $ q4 : num 4 6 8 5 4 7 10 5 8 7.5 ...
3 - Check the names of the variables in the data frame
names(marks)## [1] "X" "X.1" "test" "asgn" "Prsnt" "Final" "q1" "q2" "q3"
## [10] "q4"
4 - Rename the first variable X to ID
names(marks)[1] <- "ID"5 - Rename the second variable X.1 to StuName
colnames(marks)[which(colnames(marks) == "X.1")] <- "StuName"
b_marks <- marks #backup of marks df for later use6 - Remove the first two column from the data frame
col <- c("ID", "StuName")
myCols <- colnames(marks) %in% col
marks <- marks[!myCols]
marks #first two columns removed## test asgn Prsnt Final q1 q2 q3 q4
## 1 15 14 17 13 0.0 9 2 4.0
## 2 26 13 18 22 3.0 5 8 6.0
## 3 21 15 19 25 6.0 7 4 8.0
## 4 25 10 17 14 2.0 3 4 5.0
## 5 25 15 16 20 3.0 7 6 4.0
## 6 18 15 19 22 4.0 7 4 7.0
## 7 30 15 19 28 4.0 5 9 10.0
## 8 16 15 19 20 4.0 5 6 5.0
## 9 18 13 18 22 2.0 5 7 8.0
## 10 30 14 18 24 5.5 6 5 7.5
## 11 12 10 12 12 1.0 5 1 6.0
7 - Use apply() function to sum all the marks in the data frame and put them in a new vector called Total and bind the vector to the data frame
marks <- b_marks #restore our df before removing two columns
Total <- apply(marks[,c(-1:-2)], 1, sum)
marks <- cbind(marks, Total)
marks## ID StuName test asgn Prsnt Final q1 q2 q3 q4 Total
## 1 60001 Ahmad 15 14 17 13 0.0 9 2 4.0 74
## 2 60003 Abu 26 13 18 22 3.0 5 8 6.0 101
## 3 60006 Samy 21 15 19 25 6.0 7 4 8.0 105
## 4 60008 Chong 25 10 17 14 2.0 3 4 5.0 80
## 5 60009 Paul 25 15 16 20 3.0 7 6 4.0 96
## 6 60011 John 18 15 19 22 4.0 7 4 7.0 96
## 7 60014 Devi 30 15 19 28 4.0 5 9 10.0 120
## 8 60015 Pillip 16 15 19 20 4.0 5 6 5.0 90
## 9 60023 Meilin 18 13 18 22 2.0 5 7 8.0 93
## 10 60025 Lily 30 14 18 24 5.5 6 5 7.5 110
## 11 60026 Jamil 12 10 12 12 1.0 5 1 6.0 59
8 - Using a user defined function called function(), use the apply() function to add variable 1 to variable 3, and write to a new variable in the data frame called CW.
marks_cw = marks[c("test", "asgn", "Prsnt")]
marks$CW <- apply(marks_cw, 1, FUN = function(x) {x[1] + x[2] + x[3]})
marks## ID StuName test asgn Prsnt Final q1 q2 q3 q4 Total CW
## 1 60001 Ahmad 15 14 17 13 0.0 9 2 4.0 74 46
## 2 60003 Abu 26 13 18 22 3.0 5 8 6.0 101 57
## 3 60006 Samy 21 15 19 25 6.0 7 4 8.0 105 55
## 4 60008 Chong 25 10 17 14 2.0 3 4 5.0 80 52
## 5 60009 Paul 25 15 16 20 3.0 7 6 4.0 96 56
## 6 60011 John 18 15 19 22 4.0 7 4 7.0 96 52
## 7 60014 Devi 30 15 19 28 4.0 5 9 10.0 120 64
## 8 60015 Pillip 16 15 19 20 4.0 5 6 5.0 90 50
## 9 60023 Meilin 18 13 18 22 2.0 5 7 8.0 93 49
## 10 60025 Lily 30 14 18 24 5.5 6 5 7.5 110 62
## 11 60026 Jamil 12 10 12 12 1.0 5 1 6.0 59 34
Lab Practice
Create a data frame mydata, with the following data
mydata <- data.frame(matrix(ncol = 8, nrow = 0))
mydata_list<-list(49,95,32,11,21,3,'F',82)
mydata_list2<-list(80,46,96,56,41,46,'F',2)
mydata_list3<-list(79,3,48,96,73,90,'M',64)
mydata_list4<-list(41,100,96,48,47,42,'F',93)
mydata_list5<-list(41,1,61,47,6,89,'M',28)
mydata_list6<-list(52,59,54,84,20,48,'M',28)
mydata_list7<-list(28,65,36,5,69,78,'F',71)
mydata_list8<-list(8,82,18,84,77,82,'M',68)
mydata_list9<-list(76,17,73,47,26,16,'M',46)
mydata_list10<-list(8,20,67,16,79,65,'F',1)
mydata <- rbind(mydata, mydata_list,mydata_list2,mydata_list3,
mydata_list4,mydata_list5,mydata_list6,mydata_list7,
mydata_list8,mydata_list9,mydata_list10)
colnames(mydata) <- c("v1", "v2", "v3", "v4", "v5", "v6", "gender", "age")
mydata## v1 v2 v3 v4 v5 v6 gender age
## 1 49 95 32 11 21 3 F 82
## 2 80 46 96 56 41 46 F 2
## 3 79 3 48 96 73 90 M 64
## 4 41 100 96 48 47 42 F 93
## 5 41 1 61 47 6 89 M 28
## 6 52 59 54 84 20 48 M 28
## 7 28 65 36 5 69 78 F 71
## 8 8 82 18 84 77 82 M 68
## 9 76 17 73 47 26 16 M 46
## 10 8 20 67 16 79 65 F 1
b_mydata <- mydata #backupMake a list with values v1, v2, v3 and assign to myvars, then using myvars, select out variables v1 to v3 from mydata and keep in newdata. newdata should look as shown below.
myvars <- c("v1", "v2", "v3")
newdata <- mydata[myvars]
newdata## v1 v2 v3
## 1 49 95 32
## 2 80 46 96
## 3 79 3 48
## 4 41 100 96
## 5 41 1 61
## 6 52 59 54
## 7 28 65 36
## 8 8 82 18
## 9 76 17 73
## 10 8 20 67
Follow the example given for using %in% and select variables other than v1 to v3 using the ! negation sign, and keep in newdata1 as shown below
myvars <- names(mydata) %in% c("v1", "v2", "v3")
newdata1 = mydata[!myvars]
newdata1## v4 v5 v6 gender age
## 1 11 21 3 F 82
## 2 56 41 46 F 2
## 3 96 73 90 M 64
## 4 48 47 42 F 93
## 5 47 6 89 M 28
## 6 84 20 48 M 28
## 7 5 69 78 F 71
## 8 84 77 82 M 68
## 9 47 26 16 M 46
## 10 16 79 65 F 1
Exclude column 3 and 5 from mydata, and keep in newdata2 as shown below :
newdata2 <- mydata[c(-3,-5)]
newdata2## v1 v2 v4 v6 gender age
## 1 49 95 11 3 F 82
## 2 80 46 56 46 F 2
## 3 79 3 96 90 M 64
## 4 41 100 48 42 F 93
## 5 41 1 47 89 M 28
## 6 52 59 84 48 M 28
## 7 28 65 5 78 F 71
## 8 8 82 84 82 M 68
## 9 76 17 47 16 M 46
## 10 8 20 16 65 F 1
Remove the same columns using NULL value as in the example given
mydata[c(-1:-2, -4,-6)] <- NULL
mydata## v1 v2 v4 v6
## 1 49 95 11 3
## 2 80 46 56 46
## 3 79 3 96 90
## 4 41 100 48 42
## 5 41 1 47 89
## 6 52 59 84 48
## 7 28 65 5 78
## 8 8 82 84 82
## 9 76 17 47 16
## 10 8 20 16 65
Selecting observations (rows)
First 5 obs
mydata <- b_mydata
newdata5 <- mydata[1:5,]Based on variable values
newdataGA <- mydata[which(mydata$gender == 'F' & mydata$age > 65),]What is the data for newdata5 and newdataGA?
newdata5## v1 v2 v3 v4 v5 v6 gender age
## 1 49 95 32 11 21 3 F 82
## 2 80 46 96 56 41 46 F 2
## 3 79 3 48 96 73 90 M 64
## 4 41 100 96 48 47 42 F 93
## 5 41 1 61 47 6 89 M 28
newdataGA## v1 v2 v3 v4 v5 v6 gender age
## 1 49 95 32 11 21 3 F 82
## 4 41 100 96 48 47 42 F 93
## 7 28 65 36 5 69 78 F 71