Descriptive Statistics

library(psych)
## Warning: package 'psych' was built under R version 3.6.2
library(pastecs)
library(Rmisc)
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.6.2
## Loading required package: plyr
describe(swiss)
summary(swiss)
##    Fertility      Agriculture     Examination      Education    
##  Min.   :35.00   Min.   : 1.20   Min.   : 3.00   Min.   : 1.00  
##  1st Qu.:64.70   1st Qu.:35.90   1st Qu.:12.00   1st Qu.: 6.00  
##  Median :70.40   Median :54.10   Median :16.00   Median : 8.00  
##  Mean   :70.14   Mean   :50.66   Mean   :16.49   Mean   :10.98  
##  3rd Qu.:78.45   3rd Qu.:67.65   3rd Qu.:22.00   3rd Qu.:12.00  
##  Max.   :92.50   Max.   :89.70   Max.   :37.00   Max.   :53.00  
##     Catholic       Infant.Mortality
##  Min.   :  2.150   Min.   :10.80   
##  1st Qu.:  5.195   1st Qu.:18.15   
##  Median : 15.140   Median :20.00   
##  Mean   : 41.144   Mean   :19.94   
##  3rd Qu.: 93.125   3rd Qu.:21.70   
##  Max.   :100.000   Max.   :26.60

Note: All confidence Intervals use alpha = .05

Fertility

FT = (swiss$Fertility)
describe(FT)
hist(FT)

skew(FT)
## [1] -0.4556871
x_FT = mean(FT)
s_FT = sd(FT)
x_FT - 1.96*(s_FT/sqrt(47));x_FT + 1.96*(s_FT/sqrt(47))
## [1] 66.57123
## [1] 73.71387
# 95% of the time the true population mean is within the interval (66.57123,73.71387)

# or 
CI(FT)
##    upper     mean    lower 
## 73.81025 70.14255 66.47485

Agriculture

AG = (swiss$Agriculture)
describe(AG)
hist(AG)

skew(AG)
## [1] -0.3203637
x_AG = mean(AG)
s_AG = sd(AG)
x_AG - 1.96*(s_AG/sqrt(47));x_AG + 1.96*(s_AG/sqrt(47))
## [1] 44.16654
## [1] 57.15261
# 95% of the time the true population mean is within the interval (44.16654,57.15261)

#or 
CI(AG)
##    upper     mean    lower 
## 57.32784 50.65957 43.99131

Examination

EX = (swiss$Examination)
describe(EX)
hist(EX)

skew(EX)
## [1] 0.4463996
x_EX = mean(EX)
s_EX = sd(EX)
x_EX - 1.96*(s_EX/sqrt(47));x_EX + 1.96*(s_EX/sqrt(47))
## [1] 14.20852
## [1] 18.7702
# 95% of the time the true population mean is within the interval (14.20852,18.7702)

#or 
CI(EX)
##    upper     mean    lower 
## 18.83176 16.48936 14.14697

#Education

ED = (swiss$Education)
describe(ED)
hist(ED)

skew(ED)
## [1] 2.268439
x_ED = mean(ED)
s_ED = sd(ED)
x_ED - 1.96*(s_ED/sqrt(47));x_ED + 1.96*(s_ED/sqrt(47))
## [1] 8.229723
## [1] 13.72772
# 95% of the time the true population mean is within the interval (8.229723,13.72772)

# Because the skew is rather high I decided to take multiple samples in order to make the data normally distributed. 
# The code below takes the mean of 100 samples of 10 observations from the variable Education

ED_X <- c()
for (i in 1:100){
sample <- sample(ED,10,replace=T)
estimate <- mean(sample)
ED_X <- c(ED_X,estimate)}

hist(ED_X)

CI(ED_X)
##    upper     mean    lower 
## 11.39951 10.85800 10.31649
# The confidnece interval has become more accurate after accoutning for the skewness

Catholic

CT = (swiss$Catholic)
describe(CT)
hist(CT)

skew(CT)
## [1] 0.4789257
x_CT = mean(CT)
s_CT = sd(CT)
x_CT - 1.96*(s_CT/sqrt(47));x_CT + 1.96*(s_CT/sqrt(47))
## [1] 29.22061
## [1] 53.06705
# 95% of the time the true population mean is within the interval (29.22061,53.06705)


# Because this data is also rather skewed I again took multiple samples (100 of size 10, found the means of those, and used that to calculate the confidence interval)

CT_X <- c()
for (i in 1:100){
sample <- sample(CT,10,replace=T)
estimate <- mean(sample)
CT_X <- c(CT_X,estimate)}

hist(CT_X)

CI(CT_X)
##    upper     mean    lower 
## 41.70470 38.97571 36.24672
# The confidnece interval has become more accurate after accoutning for the skewness

Infant.Mortality

IM = (swiss$Infant.Mortality)
describe(IM)
hist(IM)

skew(IM)
## [1] -0.3314326
x_IM = mean(IM)
s_IM = sd(IM)
x_IM - 1.96*(s_IM/sqrt(47));x_IM + 1.96*(s_IM/sqrt(47))
## [1] 19.10983
## [1] 20.77528
# 95% of the time the true population mean is within the interval (19.10983,20.77528)

#or 
CI(IM)
##    upper     mean    lower 
## 20.79775 19.94255 19.08735