mydata <- data.frame("ID"= c(1, 2, 3, 4),
"Age" = c(20, 22, 18, 24),
"Gender"= c("M", "F", "M", "M"))
print(mydata)
## ID Age Gender
## 1 1 20 M
## 2 2 22 F
## 3 3 18 M
## 4 4 24 M
mean(mydata$Age)
## [1] 21
sd(mydata$Age)
## [1] 2.581989
The avg age of students is 21 years.
mydata$Height <- c(180, 170, 176, 177)
mydata$Weight <- c(76, 60, 72, 73)
Creating new dataframe, which includes only Age and Height.
mydata2 <- mydata [ ,c(2,4)]
From mydata2 remove the third row.
mydata3 <- mydata2[-3,]
summary(mydata[ , c(-1, -3)])
## Age Height Weight
## Min. :18.0 Min. :170.0 Min. :60.00
## 1st Qu.:19.5 1st Qu.:174.5 1st Qu.:69.00
## Median :21.0 Median :176.5 Median :72.50
## Mean :21.0 Mean :175.8 Mean :70.25
## 3rd Qu.:22.5 3rd Qu.:177.8 3rd Qu.:73.75
## Max. :24.0 Max. :180.0 Max. :76.00
#install.packages("pastecs")
library(pastecs)
round(stat.desc(mydata[, c(-1,-3)]),2)
## Age Height Weight
## nbr.val 4.00 4.00 4.00
## nbr.null 0.00 0.00 0.00
## nbr.na 0.00 0.00 0.00
## min 18.00 170.00 60.00
## max 24.00 180.00 76.00
## range 6.00 10.00 16.00
## sum 84.00 703.00 281.00
## median 21.00 176.50 72.50
## mean 21.00 175.75 70.25
## SE.mean 1.29 2.10 3.52
## CI.mean.0.95 4.11 6.67 11.20
## var 6.67 17.58 49.58
## std.dev 2.58 4.19 7.04
## coef.var 0.12 0.02 0.10
mydata_M <- mydata[mydata$Gender == "M", ]
mydata_M <- mydata[mydata$Gender== "M" & mydata >= 20, ]