Descriptive Statistics

Data Structures in R

There five common structures in R namely;

  1. Vector
  2. Matrix
  3. Array
  4. Data Frame
  5. Lists

The vector can be categorized upto six levels namely;

  • Numeric
  • Integer
  • Character
  • Logical
  • Factor
  • Ordinal
# Overview of Descriptive Statistics
y1<-c(1.13,1.11,0.629,0.616,0.265,0.320,
      0.849,0.378,0.056,0.044,0.241,0.221,
      0.110,0.190,0.151,0.049)
y2<-c(T,T,T,F,F,F,T,T,T,F,F,F,T,T,T,F)
y3<-1:16
y4<-c("abid","ali","ali","ali","awo","awo","ali",
      "omer","ali","abdi","ali","awo","ayan",
      "ali","omer","ali")
data<-data.frame(y1,y2,y3,y4)
write.csv(data,"~/jibril.csv")
# Data transformation
y5<-c("n","n","p","p","p","s","s","s","s","s",
      "u","u","u","u","u","u")
y6<-ordered(y5)
y6
##  [1] n n p p p s s s s s u u u u u u
## Levels: n < p < s < u
y7<-as.factor(y5)
y7
##  [1] n n p p p s s s s s u u u u u u
## Levels: n p s u
y8<-as.numeric(y3)
class(y1)
## [1] "numeric"
class(y4)
## [1] "character"
class(y2)
## [1] "logical"
class(y3)
## [1] "integer"
# Descriptive Statistics 
mean(y1)
## [1] 0.3974375
median(y1)
## [1] 0.253
mode(y1)
## [1] "numeric"
sd(y1)
## [1] 0.3631802
var(y1)
## [1] 0.1318999
max(y1)
## [1] 1.13
min(y1)
## [1] 0.044
range(y1)
## [1] 0.044 1.130
library(AdequacyModel)
windows()
descriptive(y1)
## $mean
## [1] 0.39744
## 
## $median
## [1] 0.253
## 
## $mode
## [1] 0.1
## 
## $variance
## [1] 0.1319
## 
## $Skewness
## [1] 0.98292
## 
## $Kurtosis
## [1] -0.34013
## 
## $minimum
## [1] 0.044
## 
## $maximum
## [1] 1.13
## 
## $n
## [1] 16
library(psych)

describe(y1)
library(EnvStats)
## 
## Attaching package: 'EnvStats'
## The following objects are masked from 'package:stats':
## 
##     predict, predict.lm
## The following object is masked from 'package:base':
## 
##     print.default
summaryFull(y1)
##                                    y1
## N                            16.00000
## Mean                          0.39740
## Median                        0.25300
## 10% Trimmed Mean              0.37040
## Geometric Mean                0.25220
## Skew                          1.08800
## Kurtosis                      0.01794
## Min                           0.04400
## Max                           1.13000
## Range                         1.08600
## 1st Quartile                  0.14070
## 3rd Quartile                  0.61920
## Standard Deviation            0.36320
## Geometric Standard Deviation  2.88400
## Interquartile Range           0.47850
## Median Absolute Deviation     0.25200
## Coefficient of Variation      0.91380
## attr(,"class")
## [1] "summaryStats"
## attr(,"stats.in.rows")
## [1] TRUE
## attr(,"drop0trailing")
## [1] TRUE
# Data Visualization
# Numerical Data
windows()
hist(y1)

windows()
hist(y1, col="green",main="Final Exam Scores",
     xlab="Mathematics",ylab="Grades")

windows()
plot(density(y1))

windows()
plot(density(y1),col="red",main="Final Exam Scores",
     xlab="Mathematics",ylab="Grades")
windows()
plot(density(y1),col="red",main="Final Exam Scores",
     xlab="Mathematics",ylab="Grades")
polygon(density(y1),col="blue", border = "black",
        main="Final Exam Scores",xlab="Mathematics",ylab="Grades")

windows()
boxplot(y1, col="cyan",main="Final Exam Scores",
        xlab="Mathematics",ylab="Grades")

library(vioplot)
## Loading required package: sm
## Package 'sm', version 2.2-5.7: type help(sm) for summary information
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
windows()
vioplot(y1, col="magenta",main="Final Exam Scores",
        xlab="Mathematics",ylab="Grades")

library(beanplot)
windows()
beanplot(y1, col="darkblue",main="Final Exam Scores",
         xlab="Mathematics",ylab="Grades")
## log="y" selected

library(beeswarm)
windows()
beeswarm(y1, col="darkred",main="Final Exam Scores",
         xlab="Mathematics",ylab="Grades")