mydata<-data.frame("ID"=c(1,2,3,4),
"Age"=c(22,23,24,25),
"Height"=c(180,186,175,170),
"Gender"=c(0,0,1,1))
print(mydata)
## ID Age Height Gender
## 1 1 22 180 0
## 2 2 23 186 0
## 3 3 24 175 1
## 4 4 25 170 1
mydata[4,3] <- 169
print(mydata)
## ID Age Height Gender
## 1 1 22 180 0
## 2 2 23 186 0
## 3 3 24 175 1
## 4 4 25 169 1
mydata$Weight <-c(85,70,72,92 )
##Calculate BMI (new variable)
mydata$BMI <- mydata$Weight/((mydata$Height/100)^2)
#Descriptiv statistic
summary(mydata$BMI)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20.23 22.69 24.87 25.55 27.73 32.21
Mean=most common value
#Activating pacages
#install.packages("pastecs")
library(pastecs)
round(stat.desc(mydata$BMI),2)
## nbr.val nbr.null nbr.na min max range
## 4.00 0.00 0.00 20.23 32.21 11.98
## sum median mean SE.mean CI.mean.0.95 var
## 102.19 24.87 25.55 2.54 8.08 25.76
## std.dev coef.var
## 5.08 0.20
##Tabel for all variables
round(stat.desc(mydata),2)
## ID Age Height Gender Weight BMI
## nbr.val 4.00 4.00 4.00 4.00 4.00 4.00
## nbr.null 0.00 0.00 0.00 2.00 0.00 0.00
## nbr.na 0.00 0.00 0.00 0.00 0.00 0.00
## min 1.00 22.00 169.00 0.00 70.00 20.23
## max 4.00 25.00 186.00 1.00 92.00 32.21
## range 3.00 3.00 17.00 1.00 22.00 11.98
## sum 10.00 94.00 710.00 2.00 319.00 102.19
## median 2.50 23.50 177.50 0.50 78.50 24.87
## mean 2.50 23.50 177.50 0.50 79.75 25.55
## SE.mean 0.65 0.65 3.62 0.29 5.27 2.54
## CI.mean.0.95 2.05 2.05 11.51 0.92 16.76 8.08
## var 1.67 1.67 52.33 0.33 110.92 25.76
## std.dev 1.29 1.29 7.23 0.58 10.53 5.08
## coef.var 0.52 0.05 0.04 1.15 0.13 0.20
##Removing ID and Age columns
round(stat.desc(mydata[ ,-c(1,4)]),2)
## Age Height Weight BMI
## nbr.val 4.00 4.00 4.00 4.00
## nbr.null 0.00 0.00 0.00 0.00
## nbr.na 0.00 0.00 0.00 0.00
## min 22.00 169.00 70.00 20.23
## max 25.00 186.00 92.00 32.21
## range 3.00 17.00 22.00 11.98
## sum 94.00 710.00 319.00 102.19
## median 23.50 177.50 78.50 24.87
## mean 23.50 177.50 79.75 25.55
## SE.mean 0.65 3.62 5.27 2.54
## CI.mean.0.95 2.05 11.51 16.76 8.08
## var 1.67 52.33 110.92 25.76
## std.dev 1.29 7.23 10.53 5.08
## coef.var 0.05 0.04 0.13 0.20
#Calculation of standard deviation for Weight
sd(mydata$Weight)
## [1] 10.5317
##All standard deviations for all variables but ID and Gender doesnt make seance so we exclude them
sapply(mydata[,-c(1,4)], FUN=sd)
## Age Height Weight BMI
## 1.290994 7.234178 10.531698 5.075201
##Converting 0 and 1 of gender to M and F (categorical variable)
mydata$Gender <- factor(mydata$Gender,
levels=c(0,1),
labels=c("M","F"))
##(Now we see frequency distribution for gender;two mailand two female)
summary(mydata)
## ID Age Height Gender Weight
## Min. :1.00 Min. :22.00 Min. :169.0 M:2 Min. :70.00
## 1st Qu.:1.75 1st Qu.:22.75 1st Qu.:173.5 F:2 1st Qu.:71.50
## Median :2.50 Median :23.50 Median :177.5 Median :78.50
## Mean :2.50 Mean :23.50 Mean :177.5 Mean :79.75
## 3rd Qu.:3.25 3rd Qu.:24.25 3rd Qu.:181.5 3rd Qu.:86.75
## Max. :4.00 Max. :25.00 Max. :186.0 Max. :92.00
## BMI
## Min. :20.23
## 1st Qu.:22.69
## Median :24.87
## Mean :25.55
## 3rd Qu.:27.73
## Max. :32.21
##Average height for females
mean(mydata$Height[mydata$Gender=="F"])
## [1] 172
##other option for calculating
library(psych)
describeBy(mydata$Height, mydata$Gender)
##
## Descriptive statistics by group
## group: M
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 2 183 4.24 183 183 4.45 180 186 6 0 -2.75 3
## ------------------------------------------------------------
## group: F
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 2 172 4.24 172 172 4.45 169 175 6 0 -2.75 3
Average hight of females is 172.