mydata <- data.frame("ID" = c(1,2,3), "Age" = c(30, 40, 20), "Gender" = c("F", "M", "M"))
print(mydata) #showing the data frame
## ID Age Gender
## 1 1 30 F
## 2 2 40 M
## 3 3 20 M
mydata[1,2] <- 28
mydata2 <- mydata[ , -3] #excluding the third variable
create mydata3 which includes only the first and second row of mydata
mydata3 <- mydata[-3,] #excluded the thrid row
mydata3 <- mydata[c(1, 2) , ]
print (mydata3)
## ID Age Gender
## 1 1 28 F
## 2 2 40 M
mydata$height <- c(197, 190, 175)
print(mydata)
## ID Age Gender height
## 1 1 28 F 197
## 2 2 40 M 190
## 3 3 20 M 175
mydata$height1 <- mydata$height + 2
print(mydata)
## ID Age Gender height height1
## 1 1 28 F 197 199
## 2 2 40 M 190 192
## 3 3 20 M 175 177
summary(mydata)
## ID Age Gender height
## Min. :1.0 Min. :20.00 Length:3 Min. :175.0
## 1st Qu.:1.5 1st Qu.:24.00 Class :character 1st Qu.:182.5
## Median :2.0 Median :28.00 Mode :character Median :190.0
## Mean :2.0 Mean :29.33 Mean :187.3
## 3rd Qu.:2.5 3rd Qu.:34.00 3rd Qu.:193.5
## Max. :3.0 Max. :40.00 Max. :197.0
## height1
## Min. :177.0
## 1st Qu.:184.5
## Median :192.0
## Mean :189.3
## 3rd Qu.:195.5
## Max. :199.0
the range for height for exm. is max - min 197 - 175=22
summary(mydata[ , -3] )
## ID Age height height1
## Min. :1.0 Min. :20.00 Min. :175.0 Min. :177.0
## 1st Qu.:1.5 1st Qu.:24.00 1st Qu.:182.5 1st Qu.:184.5
## Median :2.0 Median :28.00 Median :190.0 Median :192.0
## Mean :2.0 Mean :29.33 Mean :187.3 Mean :189.3
## 3rd Qu.:2.5 3rd Qu.:34.00 3rd Qu.:193.5 3rd Qu.:195.5
## Max. :3.0 Max. :40.00 Max. :197.0 Max. :199.0
mean(mydata$Age)
## [1] 29.33333
calculate standard deviation for height 1
sd(mydata$height1)
## [1] 11.23981
we would like to make descriptive statistics with function called “describe”
#install.packages("psych")
library(psych)
describe(mydata)
## vars n mean sd median trimmed mad min max range skew kurtosis
## ID 1 3 2.00 1.00 2 2.00 1.48 1 3 2 0.00 -2.33
## Age 2 3 29.33 10.07 28 29.33 11.86 20 40 20 0.13 -2.33
## Gender* 3 3 1.67 0.58 2 1.67 0.00 1 2 1 -0.38 -2.33
## height 4 3 187.33 11.24 190 187.33 10.38 175 197 22 -0.22 -2.33
## height1 5 3 189.33 11.24 192 189.33 10.38 177 199 22 -0.22 -2.33
## se
## ID 0.58
## Age 5.81
## Gender* 0.33
## height 6.49
## height1 6.49
#install.packages("pastecs")
library(pastecs)
round(stat.desc(mydata[ , c(-1, -3)]), 2) #round up the sums to not have decimals, and exclude 1 and 3 row
## Age height height1
## nbr.val 3.00 3.00 3.00
## nbr.null 0.00 0.00 0.00
## nbr.na 0.00 0.00 0.00
## min 20.00 175.00 177.00
## max 40.00 197.00 199.00
## range 20.00 22.00 22.00
## sum 88.00 562.00 568.00
## median 28.00 190.00 192.00
## mean 29.33 187.33 189.33
## SE.mean 5.81 6.49 6.49
## CI.mean.0.95 25.01 27.92 27.92
## var 101.33 126.33 126.33
## std.dev 10.07 11.24 11.24
## coef.var 0.34 0.06 0.06