Week 5 Lab Exercise

Zahiriddin Rustamov

November 19, 2021

1 - Read file marks1.csv

marks <- read.csv("marks1.csv")

2 - Check the data frame info using a few available functions

head(marks, 3)
##       X   X.1 test asgn Prsnt Final q1 q2 q3 q4
## 1 60001 Ahmad   15   14    17    13  0  9  2  4
## 2 60003   Abu   26   13    18    22  3  5  8  6
## 3 60006  Samy   21   15    19    25  6  7  4  8
str(marks)
## 'data.frame':    11 obs. of  10 variables:
##  $ X    : int  60001 60003 60006 60008 60009 60011 60014 60015 60023 60025 ...
##  $ X.1  : chr  "Ahmad" "Abu" "Samy" "Chong" ...
##  $ test : int  15 26 21 25 25 18 30 16 18 30 ...
##  $ asgn : int  14 13 15 10 15 15 15 15 13 14 ...
##  $ Prsnt: int  17 18 19 17 16 19 19 19 18 18 ...
##  $ Final: int  13 22 25 14 20 22 28 20 22 24 ...
##  $ q1   : num  0 3 6 2 3 4 4 4 2 5.5 ...
##  $ q2   : int  9 5 7 3 7 7 5 5 5 6 ...
##  $ q3   : int  2 8 4 4 6 4 9 6 7 5 ...
##  $ q4   : num  4 6 8 5 4 7 10 5 8 7.5 ...

3 - Check the names of the variables in the data frame

names(marks)
##  [1] "X"     "X.1"   "test"  "asgn"  "Prsnt" "Final" "q1"    "q2"    "q3"   
## [10] "q4"

4 - Rename the first variable X to ID

names(marks)[1] <- "ID"

5 - Rename the second variable X.1 to StuName

colnames(marks)[which(colnames(marks) == "X.1")] <- "StuName"
b_marks <- marks #backup of marks df for later use

6 - Remove the first two column from the data frame

col <- c("ID", "StuName")
myCols <- colnames(marks) %in% col

marks <- marks[!myCols]
marks #first two columns removed
##    test asgn Prsnt Final  q1 q2 q3   q4
## 1    15   14    17    13 0.0  9  2  4.0
## 2    26   13    18    22 3.0  5  8  6.0
## 3    21   15    19    25 6.0  7  4  8.0
## 4    25   10    17    14 2.0  3  4  5.0
## 5    25   15    16    20 3.0  7  6  4.0
## 6    18   15    19    22 4.0  7  4  7.0
## 7    30   15    19    28 4.0  5  9 10.0
## 8    16   15    19    20 4.0  5  6  5.0
## 9    18   13    18    22 2.0  5  7  8.0
## 10   30   14    18    24 5.5  6  5  7.5
## 11   12   10    12    12 1.0  5  1  6.0

7 - Use apply() function to sum all the marks in the data frame and put them in a new vector called Total and bind the vector to the data frame

marks <- b_marks #restore our df before removing two columns
Total <- apply(marks[,c(-1:-2)], 1, sum)
marks <- cbind(marks, Total)
marks
##       ID StuName test asgn Prsnt Final  q1 q2 q3   q4 Total
## 1  60001   Ahmad   15   14    17    13 0.0  9  2  4.0    74
## 2  60003     Abu   26   13    18    22 3.0  5  8  6.0   101
## 3  60006    Samy   21   15    19    25 6.0  7  4  8.0   105
## 4  60008   Chong   25   10    17    14 2.0  3  4  5.0    80
## 5  60009    Paul   25   15    16    20 3.0  7  6  4.0    96
## 6  60011    John   18   15    19    22 4.0  7  4  7.0    96
## 7  60014    Devi   30   15    19    28 4.0  5  9 10.0   120
## 8  60015  Pillip   16   15    19    20 4.0  5  6  5.0    90
## 9  60023  Meilin   18   13    18    22 2.0  5  7  8.0    93
## 10 60025    Lily   30   14    18    24 5.5  6  5  7.5   110
## 11 60026   Jamil   12   10    12    12 1.0  5  1  6.0    59

8 - Using a user defined function called function(), use the apply() function to add variable 1 to variable 3, and write to a new variable in the data frame called CW.

marks_cw = marks[c("test", "asgn", "Prsnt")]
marks$CW <- apply(marks_cw, 1,  FUN = function(x) {x[1] + x[2] + x[3]})
marks
##       ID StuName test asgn Prsnt Final  q1 q2 q3   q4 Total CW
## 1  60001   Ahmad   15   14    17    13 0.0  9  2  4.0    74 46
## 2  60003     Abu   26   13    18    22 3.0  5  8  6.0   101 57
## 3  60006    Samy   21   15    19    25 6.0  7  4  8.0   105 55
## 4  60008   Chong   25   10    17    14 2.0  3  4  5.0    80 52
## 5  60009    Paul   25   15    16    20 3.0  7  6  4.0    96 56
## 6  60011    John   18   15    19    22 4.0  7  4  7.0    96 52
## 7  60014    Devi   30   15    19    28 4.0  5  9 10.0   120 64
## 8  60015  Pillip   16   15    19    20 4.0  5  6  5.0    90 50
## 9  60023  Meilin   18   13    18    22 2.0  5  7  8.0    93 49
## 10 60025    Lily   30   14    18    24 5.5  6  5  7.5   110 62
## 11 60026   Jamil   12   10    12    12 1.0  5  1  6.0    59 34

Lab Practice

Create a data frame mydata, with the following data

mydata <- data.frame(matrix(ncol = 8, nrow = 0))
mydata_list<-list(49,95,32,11,21,3,'F',82)
mydata_list2<-list(80,46,96,56,41,46,'F',2)
mydata_list3<-list(79,3,48,96,73,90,'M',64)
mydata_list4<-list(41,100,96,48,47,42,'F',93)
mydata_list5<-list(41,1,61,47,6,89,'M',28)
mydata_list6<-list(52,59,54,84,20,48,'M',28)
mydata_list7<-list(28,65,36,5,69,78,'F',71)
mydata_list8<-list(8,82,18,84,77,82,'M',68)
mydata_list9<-list(76,17,73,47,26,16,'M',46)
mydata_list10<-list(8,20,67,16,79,65,'F',1)

mydata <- rbind(mydata, mydata_list,mydata_list2,mydata_list3,
                mydata_list4,mydata_list5,mydata_list6,mydata_list7,
                mydata_list8,mydata_list9,mydata_list10)

colnames(mydata) <- c("v1", "v2", "v3", "v4", "v5", "v6", "gender", "age")
mydata
##    v1  v2 v3 v4 v5 v6 gender age
## 1  49  95 32 11 21  3      F  82
## 2  80  46 96 56 41 46      F   2
## 3  79   3 48 96 73 90      M  64
## 4  41 100 96 48 47 42      F  93
## 5  41   1 61 47  6 89      M  28
## 6  52  59 54 84 20 48      M  28
## 7  28  65 36  5 69 78      F  71
## 8   8  82 18 84 77 82      M  68
## 9  76  17 73 47 26 16      M  46
## 10  8  20 67 16 79 65      F   1
b_mydata <- mydata #backup

Make a list with values v1, v2, v3 and assign to myvars, then using myvars, select out variables v1 to v3 from mydata and keep in newdata. newdata should look as shown below.

myvars <- c("v1", "v2", "v3")

newdata <- mydata[myvars]
newdata
##    v1  v2 v3
## 1  49  95 32
## 2  80  46 96
## 3  79   3 48
## 4  41 100 96
## 5  41   1 61
## 6  52  59 54
## 7  28  65 36
## 8   8  82 18
## 9  76  17 73
## 10  8  20 67

Follow the example given for using %in% and select variables other than v1 to v3 using the ! negation sign, and keep in newdata1 as shown below

myvars <- names(mydata) %in% c("v1", "v2", "v3")
newdata1 = mydata[!myvars]
newdata1
##    v4 v5 v6 gender age
## 1  11 21  3      F  82
## 2  56 41 46      F   2
## 3  96 73 90      M  64
## 4  48 47 42      F  93
## 5  47  6 89      M  28
## 6  84 20 48      M  28
## 7   5 69 78      F  71
## 8  84 77 82      M  68
## 9  47 26 16      M  46
## 10 16 79 65      F   1

Exclude column 3 and 5 from mydata, and keep in newdata2 as shown below :

newdata2 <- mydata[c(-3,-5)]
newdata2
##    v1  v2 v4 v6 gender age
## 1  49  95 11  3      F  82
## 2  80  46 56 46      F   2
## 3  79   3 96 90      M  64
## 4  41 100 48 42      F  93
## 5  41   1 47 89      M  28
## 6  52  59 84 48      M  28
## 7  28  65  5 78      F  71
## 8   8  82 84 82      M  68
## 9  76  17 47 16      M  46
## 10  8  20 16 65      F   1

Remove the same columns using NULL value as in the example given

mydata[c(-1:-2, -4,-6)] <- NULL
mydata
##    v1  v2 v4 v6
## 1  49  95 11  3
## 2  80  46 56 46
## 3  79   3 96 90
## 4  41 100 48 42
## 5  41   1 47 89
## 6  52  59 84 48
## 7  28  65  5 78
## 8   8  82 84 82
## 9  76  17 47 16
## 10  8  20 16 65

Selecting observations (rows)
First 5 obs

mydata <- b_mydata
newdata5 <- mydata[1:5,]

Based on variable values

newdataGA <- mydata[which(mydata$gender == 'F' & mydata$age > 65),]

What is the data for newdata5 and newdataGA?

newdata5
##   v1  v2 v3 v4 v5 v6 gender age
## 1 49  95 32 11 21  3      F  82
## 2 80  46 96 56 41 46      F   2
## 3 79   3 48 96 73 90      M  64
## 4 41 100 96 48 47 42      F  93
## 5 41   1 61 47  6 89      M  28
newdataGA
##   v1  v2 v3 v4 v5 v6 gender age
## 1 49  95 32 11 21  3      F  82
## 4 41 100 96 48 47 42      F  93
## 7 28  65 36  5 69 78      F  71

End of Document