mydata <- data.frame("ID" = c(1,2,3), "Age" = c(30,40,20), "Gender" = c("F","M","M"))
print(mydata)  #showing the data frame
##   ID Age Gender
## 1  1  30      F
## 2  2  40      M
## 3  3  20      M
mydata[1,2] <- 28 #I changed in the first row and the second column to 28
mydata2 <- mydata[ ,-3]  #Excluding the third variable

print(mydata2)
##   ID Age
## 1  1  28
## 2  2  40
## 3  3  20

Create mydata3 which includes only first and second row of mydata

mydata3 <- mydata[c(1,2) , ]
print(mydata3)
##   ID Age Gender
## 1  1  28      F
## 2  2  40      M
mydata$Height <- c(178, 170, 190)
print(mydata)
##   ID Age Gender Height
## 1  1  28      F    178
## 2  2  40      M    170
## 3  3  20      M    190
mydata$Height1 <- mydata$Height + 2
print(mydata)
##   ID Age Gender Height Height1
## 1  1  28      F    178     180
## 2  2  40      M    170     172
## 3  3  20      M    190     192
summary(mydata)
##        ID           Age           Gender              Height     
##  Min.   :1.0   Min.   :20.00   Length:3           Min.   :170.0  
##  1st Qu.:1.5   1st Qu.:24.00   Class :character   1st Qu.:174.0  
##  Median :2.0   Median :28.00   Mode  :character   Median :178.0  
##  Mean   :2.0   Mean   :29.33                      Mean   :179.3  
##  3rd Qu.:2.5   3rd Qu.:34.00                      3rd Qu.:184.0  
##  Max.   :3.0   Max.   :40.00                      Max.   :190.0  
##     Height1     
##  Min.   :172.0  
##  1st Qu.:176.0  
##  Median :180.0  
##  Mean   :181.3  
##  3rd Qu.:186.0  
##  Max.   :192.0

How much is the range for Height? 190-170 = 20cm in other words max minus min

summary(mydata[,-3]) #it excludes the third variable and counts the summary
##        ID           Age            Height         Height1     
##  Min.   :1.0   Min.   :20.00   Min.   :170.0   Min.   :172.0  
##  1st Qu.:1.5   1st Qu.:24.00   1st Qu.:174.0   1st Qu.:176.0  
##  Median :2.0   Median :28.00   Median :178.0   Median :180.0  
##  Mean   :2.0   Mean   :29.33   Mean   :179.3   Mean   :181.3  
##  3rd Qu.:2.5   3rd Qu.:34.00   3rd Qu.:184.0   3rd Qu.:186.0  
##  Max.   :3.0   Max.   :40.00   Max.   :190.0   Max.   :192.0
mean(mydata$Age)
## [1] 29.33333

Calculate standard deiation fo Height1

sd(mydata$Height1)
## [1] 10.06645

We would like to make descriptive statistics with function called “describe”

#install.packages("psych")
library(psych)
describe(mydata)
##         vars n   mean    sd median trimmed   mad min max range  skew kurtosis
## ID         1 3   2.00  1.00      2    2.00  1.48   1   3     2  0.00    -2.33
## Age        2 3  29.33 10.07     28   29.33 11.86  20  40    20  0.13    -2.33
## Gender*    3 3   1.67  0.58      2    1.67  0.00   1   2     1 -0.38    -2.33
## Height     4 3 179.33 10.07    178  179.33 11.86 170 190    20  0.13    -2.33
## Height1    5 3 181.33 10.07    180  181.33 11.86 172 192    20  0.13    -2.33
##           se
## ID      0.58
## Age     5.81
## Gender* 0.33
## Height  5.81
## Height1 5.81

In witch liberary function describe is included?

#install.packages("pastecs")
library(pastecs)
round(stat.desc(mydata[,c(-1,-3)]), 2)
##                 Age Height Height1
## nbr.val        3.00   3.00    3.00
## nbr.null       0.00   0.00    0.00
## nbr.na         0.00   0.00    0.00
## min           20.00 170.00  172.00
## max           40.00 190.00  192.00
## range         20.00  20.00   20.00
## sum           88.00 538.00  544.00
## median        28.00 178.00  180.00
## mean          29.33 179.33  181.33
## SE.mean        5.81   5.81    5.81
## CI.mean.0.95  25.01  25.01   25.01
## var          101.33 101.33  101.33
## std.dev       10.07  10.07   10.07
## coef.var       0.34   0.06    0.06