mydata <- data.frame("ID" = c(1,2,3,4),
"Age" = c(22, 23, 24, 25),
"Height" = c(180, 186, 175, 170),
"Gender" = c(0, 0, 1, 1))
print(mydata)
## ID Age Height Gender
## 1 1 22 180 0
## 2 2 23 186 0
## 3 3 24 175 1
## 4 4 25 170 1
mydata[4, 3] <-169
print(mydata)
## ID Age Height Gender
## 1 1 22 180 0
## 2 2 23 186 0
## 3 3 24 175 1
## 4 4 25 169 1
mydata$weight <-c(85, 70, 72, 92)
Calculate the new variable called body mass index
mydata$BMI <- mydata$weight/((mydata$Height/100)^2)
print(mydata$BMI)
## [1] 26.23457 20.23355 23.51020 32.21176
summary(mydata$BMI)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20.23 22.69 24.87 25.55 27.73 32.21
#install.packages("pastecs")
library(pastecs)
round(stat.desc(mydata$BMI), 2)
## nbr.val nbr.null nbr.na min max range
## 4.00 0.00 0.00 20.23 32.21 11.98
## sum median mean SE.mean CI.mean.0.95 var
## 102.19 24.87 25.55 2.54 8.08 25.76
## std.dev coef.var
## 5.08 0.20
round(stat.desc(mydata), 2)
## ID Age Height Gender weight BMI
## nbr.val 4.00 4.00 4.00 4.00 4.00 4.00
## nbr.null 0.00 0.00 0.00 2.00 0.00 0.00
## nbr.na 0.00 0.00 0.00 0.00 0.00 0.00
## min 1.00 22.00 169.00 0.00 70.00 20.23
## max 4.00 25.00 186.00 1.00 92.00 32.21
## range 3.00 3.00 17.00 1.00 22.00 11.98
## sum 10.00 94.00 710.00 2.00 319.00 102.19
## median 2.50 23.50 177.50 0.50 78.50 24.87
## mean 2.50 23.50 177.50 0.50 79.75 25.55
## SE.mean 0.65 0.65 3.62 0.29 5.27 2.54
## CI.mean.0.95 2.05 2.05 11.51 0.92 16.76 8.08
## var 1.67 1.67 52.33 0.33 110.92 25.76
## std.dev 1.29 1.29 7.23 0.58 10.53 5.08
## coef.var 0.52 0.05 0.04 1.15 0.13 0.20
round(stat.desc(mydata[ , -c(1, 4)]), 2)
## Age Height weight BMI
## nbr.val 4.00 4.00 4.00 4.00
## nbr.null 0.00 0.00 0.00 0.00
## nbr.na 0.00 0.00 0.00 0.00
## min 22.00 169.00 70.00 20.23
## max 25.00 186.00 92.00 32.21
## range 3.00 17.00 22.00 11.98
## sum 94.00 710.00 319.00 102.19
## median 23.50 177.50 78.50 24.87
## mean 23.50 177.50 79.75 25.55
## SE.mean 0.65 3.62 5.27 2.54
## CI.mean.0.95 2.05 11.51 16.76 8.08
## var 1.67 52.33 110.92 25.76
## std.dev 1.29 7.23 10.53 5.08
## coef.var 0.05 0.04 0.13 0.20
sd(mydata$weight)
## [1] 10.5317
sapply(mydata[ ,-c(1, 4)], FUN = sd)
## Age Height weight BMI
## 1.290994 7.234178 10.531698 5.075201
mydata$Gender <- factor(mydata$Gender,
levels = c(0, 1),
labels = c("M", "F"))
summary(mydata)
## ID Age Height Gender weight
## Min. :1.00 Min. :22.00 Min. :169.0 M:2 Min. :70.00
## 1st Qu.:1.75 1st Qu.:22.75 1st Qu.:173.5 F:2 1st Qu.:71.50
## Median :2.50 Median :23.50 Median :177.5 Median :78.50
## Mean :2.50 Mean :23.50 Mean :177.5 Mean :79.75
## 3rd Qu.:3.25 3rd Qu.:24.25 3rd Qu.:181.5 3rd Qu.:86.75
## Max. :4.00 Max. :25.00 Max. :186.0 Max. :92.00
## BMI
## Min. :20.23
## 1st Qu.:22.69
## Median :24.87
## Mean :25.55
## 3rd Qu.:27.73
## Max. :32.21
Avarge height for females
mean(mydata$Height[mydata$Gender == "F"])
## [1] 172
#install.packages("psych")
library(psych)
describeBy(mydata$Height, mydata$Gender)
##
## Descriptive statistics by group
## group: M
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 2 183 4.24 183 183 4.45 180 186 6 0 -2.75 3
## ------------------------------------------------------------
## group: F
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 2 172 4.24 172 172 4.45 169 175 6 0 -2.75 3