mydata <- data.frame("ID" = c(1, 2, 3, 4, 5),
                     "Age" = c(20, 21, 26, 17, 22),
                     "Gender" = c("M", "F", "F", "M", "F"))
print(mydata)
##   ID Age Gender
## 1  1  20      M
## 2  2  21      F
## 3  3  26      F
## 4  4  17      M
## 5  5  22      F
mean(mydata$Age)
## [1] 21.2

The average age of students is 21 years.

mydata$Height <- c(180, 198, 176, 87, 100)
mydata$Weight <- c(100, 78, 71, 87, 90)
print(mydata)
##   ID Age Gender Height Weight
## 1  1  20      M    180    100
## 2  2  21      F    198     78
## 3  3  26      F    176     71
## 4  4  17      M     87     87
## 5  5  22      F    100     90
mydata$BMI <- mydata$Weight / (mydata$Height/100)^2

creating new data frames that include only age and weight

mydata2 <- mydata [ , c(2, 4)]

remove the third row

mydata3 <- mydata2[ -3 , ]
print(mydata3)
##   Age Height
## 1  20    180
## 2  21    198
## 4  17     87
## 5  22    100

##Descriptive Analysis creating a summary whilst excluding the ID and gender

summary(mydata[ , c(-1, -3)])
##       Age           Height          Weight           BMI        
##  Min.   :17.0   Min.   : 87.0   Min.   : 71.0   Min.   : 19.90  
##  1st Qu.:20.0   1st Qu.:100.0   1st Qu.: 78.0   1st Qu.: 22.92  
##  Median :21.0   Median :176.0   Median : 87.0   Median : 30.86  
##  Mean   :21.2   Mean   :148.2   Mean   : 85.2   Mean   : 55.72  
##  3rd Qu.:22.0   3rd Qu.:180.0   3rd Qu.: 90.0   3rd Qu.: 90.00  
##  Max.   :26.0   Max.   :198.0   Max.   :100.0   Max.   :114.94

The youngest persion was 17 years old.

#install.packages("pastecs")
library(pastecs)
round(stat.desc(mydata[ , c(-1, -3)]), 2)
##                 Age  Height Weight     BMI
## nbr.val        5.00    5.00   5.00    5.00
## nbr.null       0.00    0.00   0.00    0.00
## nbr.na         0.00    0.00   0.00    0.00
## min           17.00   87.00  71.00   19.90
## max           26.00  198.00 100.00  114.94
## range          9.00  111.00  29.00   95.05
## sum          106.00  741.00 426.00  278.62
## median        21.00  176.00  87.00   30.86
## mean          21.20  148.20  85.20   55.72
## SE.mean        1.46   22.73   4.99   19.57
## CI.mean.0.95   4.06   63.11  13.87   54.33
## var           10.70 2583.20 124.70 1914.84
## std.dev        3.27   50.83  11.17   43.76
## coef.var       0.15    0.34   0.13    0.79

##creating a table that shows results of only Male

mydata_M <- mydata[ mydata$Gender == "M" , ]
print(mydata_M)
##   ID Age Gender Height Weight      BMI
## 1  1  20      M    180    100  30.8642
## 4  4  17      M     87     87 114.9425

##creating table of Male that are above 18 years

mydata_M <- mydata[ mydata$Gender == "M" & mydata$Age >= 18 , ]
print(mydata_M)
##   ID Age Gender Height Weight     BMI
## 1  1  20      M    180    100 30.8642