mydata <- data.frame("ID" = c(1, 2, 3), 
                     "Age" = c(30, 40, 20), 
                     "Gender" = c("F", "M", "M"))
print(mydata) #Showing the dataframe
##   ID Age Gender
## 1  1  30      F
## 2  2  40      M
## 3  3  20      M
mydata[1, 2]  <- 28 #I changed value in the first row and the second column to 28
mydata2 <- mydata[ , -3] #Excluding the third variable 

print(mydata2)
##   ID Age
## 1  1  28
## 2  2  40
## 3  3  20

Create mydata3, which includes only the first and second row of mydata

mydata3 <- mydata[c(1, 2)  ,  ]
print(mydata3)
##   ID Age Gender
## 1  1  28      F
## 2  2  40      M
mydata$Height <- c(178, 170, 190)

print(mydata)
##   ID Age Gender Height
## 1  1  28      F    178
## 2  2  40      M    170
## 3  3  20      M    190
mydata$Height1 <- mydata$Height + 2

print(mydata)
##   ID Age Gender Height Height1
## 1  1  28      F    178     180
## 2  2  40      M    170     172
## 3  3  20      M    190     192
summary(mydata)
##        ID           Age           Gender              Height     
##  Min.   :1.0   Min.   :20.00   Length:3           Min.   :170.0  
##  1st Qu.:1.5   1st Qu.:24.00   Class :character   1st Qu.:174.0  
##  Median :2.0   Median :28.00   Mode  :character   Median :178.0  
##  Mean   :2.0   Mean   :29.33                      Mean   :179.3  
##  3rd Qu.:2.5   3rd Qu.:34.00                      3rd Qu.:184.0  
##  Max.   :3.0   Max.   :40.00                      Max.   :190.0  
##     Height1     
##  Min.   :172.0  
##  1st Qu.:176.0  
##  Median :180.0  
##  Mean   :181.3  
##  3rd Qu.:186.0  
##  Max.   :192.0

How much is the range for Height? 190 - 170 = 20 cm

summary(mydata[ , -3] )
##        ID           Age            Height         Height1     
##  Min.   :1.0   Min.   :20.00   Min.   :170.0   Min.   :172.0  
##  1st Qu.:1.5   1st Qu.:24.00   1st Qu.:174.0   1st Qu.:176.0  
##  Median :2.0   Median :28.00   Median :178.0   Median :180.0  
##  Mean   :2.0   Mean   :29.33   Mean   :179.3   Mean   :181.3  
##  3rd Qu.:2.5   3rd Qu.:34.00   3rd Qu.:184.0   3rd Qu.:186.0  
##  Max.   :3.0   Max.   :40.00   Max.   :190.0   Max.   :192.0
mean(mydata$Age)
## [1] 29.33333

Calculate standard deviation for Height1

sd(mydata$Height1)
## [1] 10.06645

We would like to make descriptive statistics with function called “describe”

#install.packages("psych")
library(psych)

describe(mydata)
##         vars n   mean    sd median trimmed   mad min max range  skew kurtosis
## ID         1 3   2.00  1.00      2    2.00  1.48   1   3     2  0.00    -2.33
## Age        2 3  29.33 10.07     28   29.33 11.86  20  40    20  0.13    -2.33
## Gender*    3 3   1.67  0.58      2    1.67  0.00   1   2     1 -0.38    -2.33
## Height     4 3 179.33 10.07    178  179.33 11.86 170 190    20  0.13    -2.33
## Height1    5 3 181.33 10.07    180  181.33 11.86 172 192    20  0.13    -2.33
##           se
## ID      0.58
## Age     5.81
## Gender* 0.33
## Height  5.81
## Height1 5.81

In which library function describe is included?

#install.packages("pastecs")
library(pastecs)

round(stat.desc(mydata[ , c(-1, -3)]), 2)
##                 Age Height Height1
## nbr.val        3.00   3.00    3.00
## nbr.null       0.00   0.00    0.00
## nbr.na         0.00   0.00    0.00
## min           20.00 170.00  172.00
## max           40.00 190.00  192.00
## range         20.00  20.00   20.00
## sum           88.00 538.00  544.00
## median        28.00 178.00  180.00
## mean          29.33 179.33  181.33
## SE.mean        5.81   5.81    5.81
## CI.mean.0.95  25.01  25.01   25.01
## var          101.33 101.33  101.33
## std.dev       10.07  10.07   10.07
## coef.var       0.34   0.06    0.06