mydata <- data.frame("ID" = c(1, 2, 3, 4, 5),
"Age" = c(20, 21, 26, 17, 22),
"Gender" = c("M", "F", "F", "M", "F"))
print(mydata)
## ID Age Gender
## 1 1 20 M
## 2 2 21 F
## 3 3 26 F
## 4 4 17 M
## 5 5 22 F
mean(mydata$Age)
## [1] 21.2
The average age of students is 21 years.
mydata$Height <- c(180, 198, 176, 87, 100)
mydata$Weight <- c(100, 78, 71, 87, 90)
print(mydata)
## ID Age Gender Height Weight
## 1 1 20 M 180 100
## 2 2 21 F 198 78
## 3 3 26 F 176 71
## 4 4 17 M 87 87
## 5 5 22 F 100 90
mydata$BMI <- mydata$Weight / (mydata$Height/100)^2
creating new data frames that include only age and weight
mydata2 <- mydata [ , c(2, 4)]
remove the third row
mydata3 <- mydata2[ -3 , ]
print(mydata3)
## Age Height
## 1 20 180
## 2 21 198
## 4 17 87
## 5 22 100
##Descriptive Analysis creating a summary whilst excluding the ID and gender
summary(mydata[ , c(-1, -3)])
## Age Height Weight BMI
## Min. :17.0 Min. : 87.0 Min. : 71.0 Min. : 19.90
## 1st Qu.:20.0 1st Qu.:100.0 1st Qu.: 78.0 1st Qu.: 22.92
## Median :21.0 Median :176.0 Median : 87.0 Median : 30.86
## Mean :21.2 Mean :148.2 Mean : 85.2 Mean : 55.72
## 3rd Qu.:22.0 3rd Qu.:180.0 3rd Qu.: 90.0 3rd Qu.: 90.00
## Max. :26.0 Max. :198.0 Max. :100.0 Max. :114.94
The youngest persion was 17 years old.
#install.packages("pastecs")
library(pastecs)
round(stat.desc(mydata[ , c(-1, -3)]), 2)
## Age Height Weight BMI
## nbr.val 5.00 5.00 5.00 5.00
## nbr.null 0.00 0.00 0.00 0.00
## nbr.na 0.00 0.00 0.00 0.00
## min 17.00 87.00 71.00 19.90
## max 26.00 198.00 100.00 114.94
## range 9.00 111.00 29.00 95.05
## sum 106.00 741.00 426.00 278.62
## median 21.00 176.00 87.00 30.86
## mean 21.20 148.20 85.20 55.72
## SE.mean 1.46 22.73 4.99 19.57
## CI.mean.0.95 4.06 63.11 13.87 54.33
## var 10.70 2583.20 124.70 1914.84
## std.dev 3.27 50.83 11.17 43.76
## coef.var 0.15 0.34 0.13 0.79
##creating a table that shows results of only Male
mydata_M <- mydata[ mydata$Gender == "M" , ]
print(mydata_M)
## ID Age Gender Height Weight BMI
## 1 1 20 M 180 100 30.8642
## 4 4 17 M 87 87 114.9425
##creating table of Male that are above 18 years
mydata_M <- mydata[ mydata$Gender == "M" & mydata$Age >= 18 , ]
print(mydata_M)
## ID Age Gender Height Weight BMI
## 1 1 20 M 180 100 30.8642