mydata <- data.frame("ID" = c(1,2,3,4),
"AGE" = c(22,23,24,25),
"HEIGHT" = c(180,186,175,170),
"GENDER" = c(0,0,1,1))
print(mydata)
## ID AGE HEIGHT GENDER
## 1 1 22 180 0
## 2 2 23 186 0
## 3 3 24 175 1
## 4 4 25 170 1
mydata[ 4 , 3 ] <- 169
print(mydata)
## ID AGE HEIGHT GENDER
## 1 1 22 180 0
## 2 2 23 186 0
## 3 3 24 175 1
## 4 4 25 169 1
mydata$weight <- c(85,70,72,92)
Calculate the new variable, called BMI
mydata$BMI <- mydata$weight/((mydata$HEIGHT/100)^2)
summary(mydata$BMI)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20.23 22.69 24.87 25.55 27.73 32.21
#install.packages("pastecs")
library(pastecs)
round(stat.desc(mydata$BMI),2)
## nbr.val nbr.null nbr.na min max range
## 4.00 0.00 0.00 20.23 32.21 11.98
## sum median mean SE.mean CI.mean.0.95 var
## 102.19 24.87 25.55 2.54 8.08 25.76
## std.dev coef.var
## 5.08 0.20
round(stat.desc(mydata),2)
## ID AGE HEIGHT GENDER weight BMI
## nbr.val 4.00 4.00 4.00 4.00 4.00 4.00
## nbr.null 0.00 0.00 0.00 2.00 0.00 0.00
## nbr.na 0.00 0.00 0.00 0.00 0.00 0.00
## min 1.00 22.00 169.00 0.00 70.00 20.23
## max 4.00 25.00 186.00 1.00 92.00 32.21
## range 3.00 3.00 17.00 1.00 22.00 11.98
## sum 10.00 94.00 710.00 2.00 319.00 102.19
## median 2.50 23.50 177.50 0.50 78.50 24.87
## mean 2.50 23.50 177.50 0.50 79.75 25.55
## SE.mean 0.65 0.65 3.62 0.29 5.27 2.54
## CI.mean.0.95 2.05 2.05 11.51 0.92 16.76 8.08
## var 1.67 1.67 52.33 0.33 110.92 25.76
## std.dev 1.29 1.29 7.23 0.58 10.53 5.08
## coef.var 0.52 0.05 0.04 1.15 0.13 0.20
round(stat.desc(mydata [,c(-1,-4)]),2)
## AGE HEIGHT weight BMI
## nbr.val 4.00 4.00 4.00 4.00
## nbr.null 0.00 0.00 0.00 0.00
## nbr.na 0.00 0.00 0.00 0.00
## min 22.00 169.00 70.00 20.23
## max 25.00 186.00 92.00 32.21
## range 3.00 17.00 22.00 11.98
## sum 94.00 710.00 319.00 102.19
## median 23.50 177.50 78.50 24.87
## mean 23.50 177.50 79.75 25.55
## SE.mean 0.65 3.62 5.27 2.54
## CI.mean.0.95 2.05 11.51 16.76 8.08
## var 1.67 52.33 110.92 25.76
## std.dev 1.29 7.23 10.53 5.08
## coef.var 0.05 0.04 0.13 0.20
sd(mydata$weight)
## [1] 10.5317
round(var(mydata),2)
## ID AGE HEIGHT GENDER weight BMI
## ID 1.67 1.67 -7.33 0.67 3.83 3.53
## AGE 1.67 1.67 -7.33 0.67 3.83 3.53
## HEIGHT -7.33 -7.33 52.33 -3.67 -51.50 -31.67
## GENDER 0.67 0.67 -3.67 0.33 1.50 1.54
## weight 3.83 3.83 -51.50 1.50 110.92 50.95
## BMI 3.53 3.53 -31.67 1.54 50.95 25.76
sapply(mydata,FUN = sd)
## ID AGE HEIGHT GENDER weight BMI
## 1.2909944 1.2909944 7.2341781 0.5773503 10.5316982 5.0752010
sapply(mydata[,c(-1,-4)],FUN = sd)
## AGE HEIGHT weight BMI
## 1.290994 7.234178 10.531698 5.075201
mydata$GENDER <- factor(mydata$GENDER,
levels = c(0,1),
labels = c("male","female"))
summary(mydata)
## ID AGE HEIGHT GENDER weight
## Min. :1.00 Min. :22.00 Min. :169.0 male :2 Min. :70.00
## 1st Qu.:1.75 1st Qu.:22.75 1st Qu.:173.5 female:2 1st Qu.:71.50
## Median :2.50 Median :23.50 Median :177.5 Median :78.50
## Mean :2.50 Mean :23.50 Mean :177.5 Mean :79.75
## 3rd Qu.:3.25 3rd Qu.:24.25 3rd Qu.:181.5 3rd Qu.:86.75
## Max. :4.00 Max. :25.00 Max. :186.0 Max. :92.00
## BMI
## Min. :20.23
## 1st Qu.:22.69
## Median :24.87
## Mean :25.55
## 3rd Qu.:27.73
## Max. :32.21
male:2 and female:2 it is frequency, two men and two women
average height just for the females
mean(mydata$HEIGHT [mydata$GENDER =="female"]) #Calculatin mean for the female
## [1] 172
#install.packages("psych")
library(psych)
describe.by(mydata$HEIGHT,mydata$GENDER)
## Warning in describe.by(mydata$HEIGHT, mydata$GENDER): describe.by is
## deprecated. Please use the describeBy function
##
## Descriptive statistics by group
## group: male
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 2 183 4.24 183 183 4.45 180 186 6 0 -2.75 3
## ------------------------------------------------------------
## group: female
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 2 172 4.24 172 172 4.45 169 175 6 0 -2.75 3