mydata <- data.frame("ID" = c(1,2,3), "Age" = c(30, 40, 20), "Gender" = c("F", "M", "M"))
print(mydata) #showing the data frame
##   ID Age Gender
## 1  1  30      F
## 2  2  40      M
## 3  3  20      M
mydata[1,2]  <- 28
mydata2 <- mydata[ , -3] #excluding the third variable

create mydata3 which includes only the first and second row of mydata

mydata3  <- mydata[-3,] #excluded the thrid row
mydata3 <- mydata[c(1, 2) , ]   
print (mydata3)
##   ID Age Gender
## 1  1  28      F
## 2  2  40      M
mydata$height <- c(197, 190, 175)

print(mydata)
##   ID Age Gender height
## 1  1  28      F    197
## 2  2  40      M    190
## 3  3  20      M    175
mydata$height1 <- mydata$height + 2

print(mydata)
##   ID Age Gender height height1
## 1  1  28      F    197     199
## 2  2  40      M    190     192
## 3  3  20      M    175     177
summary(mydata)
##        ID           Age           Gender              height     
##  Min.   :1.0   Min.   :20.00   Length:3           Min.   :175.0  
##  1st Qu.:1.5   1st Qu.:24.00   Class :character   1st Qu.:182.5  
##  Median :2.0   Median :28.00   Mode  :character   Median :190.0  
##  Mean   :2.0   Mean   :29.33                      Mean   :187.3  
##  3rd Qu.:2.5   3rd Qu.:34.00                      3rd Qu.:193.5  
##  Max.   :3.0   Max.   :40.00                      Max.   :197.0  
##     height1     
##  Min.   :177.0  
##  1st Qu.:184.5  
##  Median :192.0  
##  Mean   :189.3  
##  3rd Qu.:195.5  
##  Max.   :199.0

the range for height for exm. is max - min 197 - 175=22

summary(mydata[ , -3]  )
##        ID           Age            height         height1     
##  Min.   :1.0   Min.   :20.00   Min.   :175.0   Min.   :177.0  
##  1st Qu.:1.5   1st Qu.:24.00   1st Qu.:182.5   1st Qu.:184.5  
##  Median :2.0   Median :28.00   Median :190.0   Median :192.0  
##  Mean   :2.0   Mean   :29.33   Mean   :187.3   Mean   :189.3  
##  3rd Qu.:2.5   3rd Qu.:34.00   3rd Qu.:193.5   3rd Qu.:195.5  
##  Max.   :3.0   Max.   :40.00   Max.   :197.0   Max.   :199.0
mean(mydata$Age)
## [1] 29.33333

calculate standard deviation for height 1

sd(mydata$height1)
## [1] 11.23981

we would like to make descriptive statistics with function called “describe”

#install.packages("psych")
library(psych)

describe(mydata)
##         vars n   mean    sd median trimmed   mad min max range  skew kurtosis
## ID         1 3   2.00  1.00      2    2.00  1.48   1   3     2  0.00    -2.33
## Age        2 3  29.33 10.07     28   29.33 11.86  20  40    20  0.13    -2.33
## Gender*    3 3   1.67  0.58      2    1.67  0.00   1   2     1 -0.38    -2.33
## height     4 3 187.33 11.24    190  187.33 10.38 175 197    22 -0.22    -2.33
## height1    5 3 189.33 11.24    192  189.33 10.38 177 199    22 -0.22    -2.33
##           se
## ID      0.58
## Age     5.81
## Gender* 0.33
## height  6.49
## height1 6.49
#install.packages("pastecs")

library(pastecs)

round(stat.desc(mydata[ , c(-1, -3)]), 2)  #round up the sums to not have decimals, and exclude 1 and 3 row
##                 Age height height1
## nbr.val        3.00   3.00    3.00
## nbr.null       0.00   0.00    0.00
## nbr.na         0.00   0.00    0.00
## min           20.00 175.00  177.00
## max           40.00 197.00  199.00
## range         20.00  22.00   22.00
## sum           88.00 562.00  568.00
## median        28.00 190.00  192.00
## mean          29.33 187.33  189.33
## SE.mean        5.81   6.49    6.49
## CI.mean.0.95  25.01  27.92   27.92
## var          101.33 126.33  126.33
## std.dev       10.07  11.24   11.24
## coef.var       0.34   0.06    0.06