1. Read file marks1.csv

marks_df <- read.csv(file ='/Users/salahkaf/Downloads/marks1.csv')
marks_df

##        X    X.1 test asgn Prsnt Final  q1 q2 q3   q4
## 1  60001  Ahmad   15   14    17    13 0.0  9  2  4.0
## 2  60003    Abu   26   13    18    22 3.0  5  8  6.0
## 3  60006   Samy   21   15    19    25 6.0  7  4  8.0
## 4  60008  Chong   25   10    17    14 2.0  3  4  5.0
## 5  60009   Paul   25   15    16    20 3.0  7  6  4.0
## 6  60011   John   18   15    19    22 4.0  7  4  7.0
## 7  60014   Devi   30   15    19    28 4.0  5  9 10.0
## 8  60015 Pillip   16   15    19    20 4.0  5  6  5.0
## 9  60023 Meilin   18   13    18    22 2.0  5  7  8.0
## 10 60025   Lily   30   14    18    24 5.5  6  5  7.5
## 11 60026  Jamil   12   10    12    12 1.0  5  1  6.0

2. Check the data frame info using a few available functions

head(marks_df) #Top 6 rows

##       X   X.1 test asgn Prsnt Final q1 q2 q3 q4
## 1 60001 Ahmad   15   14    17    13  0  9  2  4
## 2 60003   Abu   26   13    18    22  3  5  8  6
## 3 60006  Samy   21   15    19    25  6  7  4  8
## 4 60008 Chong   25   10    17    14  2  3  4  5
## 5 60009  Paul   25   15    16    20  3  7  6  4
## 6 60011  John   18   15    19    22  4  7  4  7

tail(marks_df) #last 6 rows

##        X    X.1 test asgn Prsnt Final  q1 q2 q3   q4
## 6  60011   John   18   15    19    22 4.0  7  4  7.0
## 7  60014   Devi   30   15    19    28 4.0  5  9 10.0
## 8  60015 Pillip   16   15    19    20 4.0  5  6  5.0
## 9  60023 Meilin   18   13    18    22 2.0  5  7  8.0
## 10 60025   Lily   30   14    18    24 5.5  6  5  7.5
## 11 60026  Jamil   12   10    12    12 1.0  5  1  6.0

dim(marks_df) #Dimensions of DF

## [1] 11 10

str(marks_df) #Structure of DF

## 'data.frame':    11 obs. of  10 variables:
##  $ X    : int  60001 60003 60006 60008 60009 60011 60014 60015 60023 60025 ...
##  $ X.1  : chr  "Ahmad" "Abu" "Samy" "Chong" ...
##  $ test : int  15 26 21 25 25 18 30 16 18 30 ...
##  $ asgn : int  14 13 15 10 15 15 15 15 13 14 ...
##  $ Prsnt: int  17 18 19 17 16 19 19 19 18 18 ...
##  $ Final: int  13 22 25 14 20 22 28 20 22 24 ...
##  $ q1   : num  0 3 6 2 3 4 4 4 2 5.5 ...
##  $ q2   : int  9 5 7 3 7 7 5 5 5 6 ...
##  $ q3   : int  2 8 4 4 6 4 9 6 7 5 ...
##  $ q4   : num  4 6 8 5 4 7 10 5 8 7.5 ...

summary(marks_df) #Summary of Df

##        X             X.1                 test            asgn      
##  Min.   :60001   Length:11          Min.   :12.00   Min.   :10.00  
##  1st Qu.:60007   Class :character   1st Qu.:17.00   1st Qu.:13.00  
##  Median :60011   Mode  :character   Median :21.00   Median :14.00  
##  Mean   :60013                      Mean   :21.45   Mean   :13.55  
##  3rd Qu.:60019                      3rd Qu.:25.50   3rd Qu.:15.00  
##  Max.   :60026                      Max.   :30.00   Max.   :15.00  
##      Prsnt           Final             q1              q2       
##  Min.   :12.00   Min.   :12.00   Min.   :0.000   Min.   :3.000  
##  1st Qu.:17.00   1st Qu.:17.00   1st Qu.:2.000   1st Qu.:5.000  
##  Median :18.00   Median :22.00   Median :3.000   Median :5.000  
##  Mean   :17.45   Mean   :20.18   Mean   :3.136   Mean   :5.818  
##  3rd Qu.:19.00   3rd Qu.:23.00   3rd Qu.:4.000   3rd Qu.:7.000  
##  Max.   :19.00   Max.   :28.00   Max.   :6.000   Max.   :9.000  
##        q3              q4        
##  Min.   :1.000   Min.   : 4.000  
##  1st Qu.:4.000   1st Qu.: 5.000  
##  Median :5.000   Median : 6.000  
##  Mean   :5.091   Mean   : 6.409  
##  3rd Qu.:6.500   3rd Qu.: 7.750  
##  Max.   :9.000   Max.   :10.000

3. Check the names of the variables in the data frame

colnames(marks_df) #Attributes names

##  [1] "X"     "X.1"   "test"  "asgn"  "Prsnt" "Final" "q1"    "q2"    "q3"   
## [10] "q4"

4. Rename the first variable X to ID

colnames(marks_df)[1] <- "ID"
marks_df

##       ID    X.1 test asgn Prsnt Final  q1 q2 q3   q4
## 1  60001  Ahmad   15   14    17    13 0.0  9  2  4.0
## 2  60003    Abu   26   13    18    22 3.0  5  8  6.0
## 3  60006   Samy   21   15    19    25 6.0  7  4  8.0
## 4  60008  Chong   25   10    17    14 2.0  3  4  5.0
## 5  60009   Paul   25   15    16    20 3.0  7  6  4.0
## 6  60011   John   18   15    19    22 4.0  7  4  7.0
## 7  60014   Devi   30   15    19    28 4.0  5  9 10.0
## 8  60015 Pillip   16   15    19    20 4.0  5  6  5.0
## 9  60023 Meilin   18   13    18    22 2.0  5  7  8.0
## 10 60025   Lily   30   14    18    24 5.5  6  5  7.5
## 11 60026  Jamil   12   10    12    12 1.0  5  1  6.0

5. Rename the second variable X.1 to StuName

colnames(marks_df)[2] <- "StuName"
marks_df

##       ID StuName test asgn Prsnt Final  q1 q2 q3   q4
## 1  60001   Ahmad   15   14    17    13 0.0  9  2  4.0
## 2  60003     Abu   26   13    18    22 3.0  5  8  6.0
## 3  60006    Samy   21   15    19    25 6.0  7  4  8.0
## 4  60008   Chong   25   10    17    14 2.0  3  4  5.0
## 5  60009    Paul   25   15    16    20 3.0  7  6  4.0
## 6  60011    John   18   15    19    22 4.0  7  4  7.0
## 7  60014    Devi   30   15    19    28 4.0  5  9 10.0
## 8  60015  Pillip   16   15    19    20 4.0  5  6  5.0
## 9  60023  Meilin   18   13    18    22 2.0  5  7  8.0
## 10 60025    Lily   30   14    18    24 5.5  6  5  7.5
## 11 60026   Jamil   12   10    12    12 1.0  5  1  6.0

#6. Remove the first two column from the data frame

# First let us make a backup DF 
backup_df <- marks_df
# Removing column 1 & 2 
marks_df[,c(1,2)] <- NULL
marks_df

##    test asgn Prsnt Final  q1 q2 q3   q4
## 1    15   14    17    13 0.0  9  2  4.0
## 2    26   13    18    22 3.0  5  8  6.0
## 3    21   15    19    25 6.0  7  4  8.0
## 4    25   10    17    14 2.0  3  4  5.0
## 5    25   15    16    20 3.0  7  6  4.0
## 6    18   15    19    22 4.0  7  4  7.0
## 7    30   15    19    28 4.0  5  9 10.0
## 8    16   15    19    20 4.0  5  6  5.0
## 9    18   13    18    22 2.0  5  7  8.0
## 10   30   14    18    24 5.5  6  5  7.5
## 11   12   10    12    12 1.0  5  1  6.0

7. Use apply() function to sum all the marks in the data frame and put them in a new vector called Total and bind the vector to the data frame

Total <- apply(marks_df,1,sum)
Total

##  [1]  74 101 105  80  96  96 120  90  93 110  59

marks_df$Total = Total
marks_df

##    test asgn Prsnt Final  q1 q2 q3   q4 Total
## 1    15   14    17    13 0.0  9  2  4.0    74
## 2    26   13    18    22 3.0  5  8  6.0   101
## 3    21   15    19    25 6.0  7  4  8.0   105
## 4    25   10    17    14 2.0  3  4  5.0    80
## 5    25   15    16    20 3.0  7  6  4.0    96
## 6    18   15    19    22 4.0  7  4  7.0    96
## 7    30   15    19    28 4.0  5  9 10.0   120
## 8    16   15    19    20 4.0  5  6  5.0    90
## 9    18   13    18    22 2.0  5  7  8.0    93
## 10   30   14    18    24 5.5  6  5  7.5   110
## 11   12   10    12    12 1.0  5  1  6.0    59

8. Using a user defined function called function(), use the apply() function to add variable 1 to variable 3, and write to a new variable in the data frame called CW.

cw_function <- function(x) {
  x[1]+x[2]+x[3]
}
cw <- cw_function(marks_df)
colnames(cw) <- "CW"
cw

##    CW
## 1  46
## 2  57
## 3  55
## 4  52
## 5  56
## 6  52
## 7  64
## 8  50
## 9  49
## 10 62
## 11 34

marks_df["CW"] <-cw
marks_df

##    test asgn Prsnt Final  q1 q2 q3   q4 Total CW
## 1    15   14    17    13 0.0  9  2  4.0    74 46
## 2    26   13    18    22 3.0  5  8  6.0   101 57
## 3    21   15    19    25 6.0  7  4  8.0   105 55
## 4    25   10    17    14 2.0  3  4  5.0    80 52
## 5    25   15    16    20 3.0  7  6  4.0    96 56
## 6    18   15    19    22 4.0  7  4  7.0    96 52
## 7    30   15    19    28 4.0  5  9 10.0   120 64
## 8    16   15    19    20 4.0  5  6  5.0    90 50
## 9    18   13    18    22 2.0  5  7  8.0    93 49
## 10   30   14    18    24 5.5  6  5  7.5   110 62
## 11   12   10    12    12 1.0  5  1  6.0    59 34

We stil need to restore the first two columns, i.e., ID and stuName from the backup_df

ID <- backup_df[,1]
stuName <- backup_df[,2]
marks_df["ID"] = ID
marks_df["stuName"] = stuName
#Reordering the DF to ma
marks_df

##    test asgn Prsnt Final  q1 q2 q3   q4 Total CW    ID stuName
## 1    15   14    17    13 0.0  9  2  4.0    74 46 60001   Ahmad
## 2    26   13    18    22 3.0  5  8  6.0   101 57 60003     Abu
## 3    21   15    19    25 6.0  7  4  8.0   105 55 60006    Samy
## 4    25   10    17    14 2.0  3  4  5.0    80 52 60008   Chong
## 5    25   15    16    20 3.0  7  6  4.0    96 56 60009    Paul
## 6    18   15    19    22 4.0  7  4  7.0    96 52 60011    John
## 7    30   15    19    28 4.0  5  9 10.0   120 64 60014    Devi
## 8    16   15    19    20 4.0  5  6  5.0    90 50 60015  Pillip
## 9    18   13    18    22 2.0  5  7  8.0    93 49 60023  Meilin
## 10   30   14    18    24 5.5  6  5  7.5   110 62 60025    Lily
## 11   12   10    12    12 1.0  5  1  6.0    59 34 60026   Jamil

Relocating ID and stuName columns to the beginning

require(dplyr)

## Loading required package: dplyr

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

marks_df <- marks_df %>% relocate (ID, stuName,.before= test)

#Final DF

marks_df

##       ID stuName test asgn Prsnt Final  q1 q2 q3   q4 Total CW
## 1  60001   Ahmad   15   14    17    13 0.0  9  2  4.0    74 46
## 2  60003     Abu   26   13    18    22 3.0  5  8  6.0   101 57
## 3  60006    Samy   21   15    19    25 6.0  7  4  8.0   105 55
## 4  60008   Chong   25   10    17    14 2.0  3  4  5.0    80 52
## 5  60009    Paul   25   15    16    20 3.0  7  6  4.0    96 56
## 6  60011    John   18   15    19    22 4.0  7  4  7.0    96 52
## 7  60014    Devi   30   15    19    28 4.0  5  9 10.0   120 64
## 8  60015  Pillip   16   15    19    20 4.0  5  6  5.0    90 50
## 9  60023  Meilin   18   13    18    22 2.0  5  7  8.0    93 49
## 10 60025    Lily   30   14    18    24 5.5  6  5  7.5   110 62
## 11 60026   Jamil   12   10    12    12 1.0  5  1  6.0    59 34

Data Frames Manipulation

Salah

11/24/2021

1. Read file marks1.csv

2. Check the data frame info using a few available functions

3. Check the names of the variables in the data frame

4. Rename the first variable X to ID

5. Rename the second variable X.1 to StuName

7. Use apply() function to sum all the marks in the data frame and put them in a new vector called Total and bind the vector to the data frame

8. Using a user defined function called function(), use the apply() function to add variable 1 to variable 3, and write to a new variable in the data frame called CW.

We stil need to restore the first two columns, i.e., ID and stuName from the backup_df

Relocating ID and stuName columns to the beginning