mydata <- data.frame("ID" = c(1, 2, 3, 4),
"Age" = c(20, 22, 18, 24),
"Gender" = c("M", "F", "M", "M"))
print(mydata)
## ID Age Gender
## 1 1 20 M
## 2 2 22 F
## 3 3 18 M
## 4 4 24 M
mean(mydata$Age)
## [1] 21
sd(mydata$Age)
## [1] 2.581989
The average age of students is 21 years.
mydata$Height <- c(180, 170, 176, 177)
mydata$Weight <- c(76, 60, 72, 73)
mydata$BMI <- mydata$Weight / (mydata$Height/100)^2
Creating new dataframe, which includes only Age and Height
mydata2 <- mydata[ , c(2, 4)]
From mydata2 remove the third row
mydata3 <- mydata2[ -3 , ]
summary(mydata[ , c(-1, -3) ])
## Age Height Weight BMI
## Min. :18.0 Min. :170.0 Min. :60.00 Min. :20.76
## 1st Qu.:19.5 1st Qu.:174.5 1st Qu.:69.00 1st Qu.:22.62
## Median :21.0 Median :176.5 Median :72.50 Median :23.27
## Mean :21.0 Mean :175.8 Mean :70.25 Mean :22.69
## 3rd Qu.:22.5 3rd Qu.:177.8 3rd Qu.:73.75 3rd Qu.:23.34
## Max. :24.0 Max. :180.0 Max. :76.00 Max. :23.46
The youngest person in my sample was 18 years old
#install.packages("pastecs")
library(pastecs)
round(stat.desc(mydata[ , c(-1, -3) ]), 2)
## Age Height Weight BMI
## nbr.val 4.00 4.00 4.00 4.00
## nbr.null 0.00 0.00 0.00 0.00
## nbr.na 0.00 0.00 0.00 0.00
## min 18.00 170.00 60.00 20.76
## max 24.00 180.00 76.00 23.46
## range 6.00 10.00 16.00 2.70
## sum 84.00 703.00 281.00 90.76
## median 21.00 176.50 72.50 23.27
## mean 21.00 175.75 70.25 22.69
## SE.mean 1.29 2.10 3.52 0.64
## CI.mean.0.95 4.11 6.67 11.20 2.05
## var 6.67 17.58 49.58 1.66
## std.dev 2.58 4.19 7.04 1.29
## coef.var 0.12 0.02 0.10 0.06
mydata_M <- mydata[ mydata$Gender == "M" , ]
mydata_M1 <- mydata[ mydata$Gender == "M" & mydata$Age >= 20 , ]
Explanation