title: “DESCRIPTIVE STATISTICS IN R-STUDIO” author: “mugo_muiruri_james” date: “2023-09-20” output: html_document: df_print: paged word_document: default

library(psych)
library(sjPlot)
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
data("iris")
attach(iris)
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
describeBy(iris)
## Warning in describeBy(iris): no grouping variable requested
##              vars   n mean   sd median trimmed  mad min max range  skew
## Sepal.Length    1 150 5.84 0.83   5.80    5.81 1.04 4.3 7.9   3.6  0.31
## Sepal.Width     2 150 3.06 0.44   3.00    3.04 0.44 2.0 4.4   2.4  0.31
## Petal.Length    3 150 3.76 1.77   4.35    3.76 1.85 1.0 6.9   5.9 -0.27
## Petal.Width     4 150 1.20 0.76   1.30    1.18 1.04 0.1 2.5   2.4 -0.10
## Species*        5 150 2.00 0.82   2.00    2.00 1.48 1.0 3.0   2.0  0.00
##              kurtosis   se
## Sepal.Length    -0.61 0.07
## Sepal.Width      0.14 0.04
## Petal.Length    -1.42 0.14
## Petal.Width     -1.36 0.06
## Species*        -1.52 0.07
kableExtra:: kable_styling(kableExtra::kable(describeBy(iris), digits = 2), bootstrap_options=c("bordered", "responsive", "striped"), full_width = FALSE)
## Warning in describeBy(iris): no grouping variable requested
vars n mean sd median trimmed mad min max range skew kurtosis se
Sepal.Length 1 150 5.84 0.83 5.80 5.81 1.04 4.3 7.9 3.6 0.31 -0.61 0.07
Sepal.Width 2 150 3.06 0.44 3.00 3.04 0.44 2.0 4.4 2.4 0.31 0.14 0.04
Petal.Length 3 150 3.76 1.77 4.35 3.76 1.85 1.0 6.9 5.9 -0.27 -1.42 0.14
Petal.Width 4 150 1.20 0.76 1.30 1.18 1.04 0.1 2.5 2.4 -0.10 -1.36 0.06
Species* 5 150 2.00 0.82 2.00 2.00 1.48 1.0 3.0 2.0 0.00 -1.52 0.07
table1(~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width | Species, data = iris)
setosa
(N=50)
versicolor
(N=50)
virginica
(N=50)
Overall
(N=150)
Sepal.Length
Mean (SD) 5.01 (0.352) 5.94 (0.516) 6.59 (0.636) 5.84 (0.828)
Median [Min, Max] 5.00 [4.30, 5.80] 5.90 [4.90, 7.00] 6.50 [4.90, 7.90] 5.80 [4.30, 7.90]
Sepal.Width
Mean (SD) 3.43 (0.379) 2.77 (0.314) 2.97 (0.322) 3.06 (0.436)
Median [Min, Max] 3.40 [2.30, 4.40] 2.80 [2.00, 3.40] 3.00 [2.20, 3.80] 3.00 [2.00, 4.40]
Petal.Length
Mean (SD) 1.46 (0.174) 4.26 (0.470) 5.55 (0.552) 3.76 (1.77)
Median [Min, Max] 1.50 [1.00, 1.90] 4.35 [3.00, 5.10] 5.55 [4.50, 6.90] 4.35 [1.00, 6.90]
Petal.Width
Mean (SD) 0.246 (0.105) 1.33 (0.198) 2.03 (0.275) 1.20 (0.762)
Median [Min, Max] 0.200 [0.100, 0.600] 1.30 [1.00, 1.80] 2.00 [1.40, 2.50] 1.30 [0.100, 2.50]
mod1 <- lm(Sepal.Length ~ Petal.Length, data = iris)
summary(mod1)
## 
## Call:
## lm(formula = Sepal.Length ~ Petal.Length, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.24675 -0.29657 -0.01515  0.27676  1.00269 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.30660    0.07839   54.94   <2e-16 ***
## Petal.Length  0.40892    0.01889   21.65   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4071 on 148 degrees of freedom
## Multiple R-squared:   0.76,  Adjusted R-squared:  0.7583 
## F-statistic: 468.6 on 1 and 148 DF,  p-value: < 2.2e-16
mod2 <- lm(Sepal.Length ~ Petal.Length + Petal.Width, data = iris)
summary(mod2)
## 
## Call:
## lm(formula = Sepal.Length ~ Petal.Length + Petal.Width, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.18534 -0.29838 -0.02763  0.28925  1.02320 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.19058    0.09705  43.181  < 2e-16 ***
## Petal.Length  0.54178    0.06928   7.820 9.41e-13 ***
## Petal.Width  -0.31955    0.16045  -1.992   0.0483 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4031 on 147 degrees of freedom
## Multiple R-squared:  0.7663, Adjusted R-squared:  0.7631 
## F-statistic:   241 on 2 and 147 DF,  p-value: < 2.2e-16
tab_model(mod1, mod2)
  Sepal Length Sepal Length
Predictors Estimates CI p Estimates CI p
(Intercept) 4.31 4.15 – 4.46 <0.001 4.19 4.00 – 4.38 <0.001
Petal Length 0.41 0.37 – 0.45 <0.001 0.54 0.40 – 0.68 <0.001
Petal Width -0.32 -0.64 – -0.00 0.048
Observations 150 150
R2 / R2 adjusted 0.760 / 0.758 0.766 / 0.763

correlation tables

tab_corr(iris[ , 1:4])
  Sepal.Length Sepal.Width Petal.Length Petal.Width
Sepal.Length   -0.118 0.872*** 0.818***
Sepal.Width -0.118   -0.428*** -0.366***
Petal.Length 0.872*** -0.428***   0.963***
Petal.Width 0.818*** -0.366*** 0.963***  
Computed correlation used pearson-method with listwise-deletion.