library(psych)
## Warning: package 'psych' was built under R version 3.6.2
library(pastecs)
library(Rmisc)
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.6.2
## Loading required package: plyr
describe(swiss)
summary(swiss)
## Fertility Agriculture Examination Education
## Min. :35.00 Min. : 1.20 Min. : 3.00 Min. : 1.00
## 1st Qu.:64.70 1st Qu.:35.90 1st Qu.:12.00 1st Qu.: 6.00
## Median :70.40 Median :54.10 Median :16.00 Median : 8.00
## Mean :70.14 Mean :50.66 Mean :16.49 Mean :10.98
## 3rd Qu.:78.45 3rd Qu.:67.65 3rd Qu.:22.00 3rd Qu.:12.00
## Max. :92.50 Max. :89.70 Max. :37.00 Max. :53.00
## Catholic Infant.Mortality
## Min. : 2.150 Min. :10.80
## 1st Qu.: 5.195 1st Qu.:18.15
## Median : 15.140 Median :20.00
## Mean : 41.144 Mean :19.94
## 3rd Qu.: 93.125 3rd Qu.:21.70
## Max. :100.000 Max. :26.60
FT = (swiss$Fertility)
describe(FT)
hist(FT)
skew(FT)
## [1] -0.4556871
x_FT = mean(FT)
s_FT = sd(FT)
x_FT - 1.96*(s_FT/sqrt(47));x_FT + 1.96*(s_FT/sqrt(47))
## [1] 66.57123
## [1] 73.71387
# 95% of the time the true population mean is within the interval (66.57123,73.71387)
# or
CI(FT)
## upper mean lower
## 73.81025 70.14255 66.47485
AG = (swiss$Agriculture)
describe(AG)
hist(AG)
skew(AG)
## [1] -0.3203637
x_AG = mean(AG)
s_AG = sd(AG)
x_AG - 1.96*(s_AG/sqrt(47));x_AG + 1.96*(s_AG/sqrt(47))
## [1] 44.16654
## [1] 57.15261
# 95% of the time the true population mean is within the interval (44.16654,57.15261)
#or
CI(AG)
## upper mean lower
## 57.32784 50.65957 43.99131
EX = (swiss$Examination)
describe(EX)
hist(EX)
skew(EX)
## [1] 0.4463996
x_EX = mean(EX)
s_EX = sd(EX)
x_EX - 1.96*(s_EX/sqrt(47));x_EX + 1.96*(s_EX/sqrt(47))
## [1] 14.20852
## [1] 18.7702
# 95% of the time the true population mean is within the interval (14.20852,18.7702)
#or
CI(EX)
## upper mean lower
## 18.83176 16.48936 14.14697
#Education
ED = (swiss$Education)
describe(ED)
hist(ED)
skew(ED)
## [1] 2.268439
x_ED = mean(ED)
s_ED = sd(ED)
x_ED - 1.96*(s_ED/sqrt(47));x_ED + 1.96*(s_ED/sqrt(47))
## [1] 8.229723
## [1] 13.72772
# 95% of the time the true population mean is within the interval (8.229723,13.72772)
# Because the skew is rather high I decided to take multiple samples in order to make the data normally distributed.
# The code below takes the mean of 100 samples of 10 observations from the variable Education
ED_X <- c()
for (i in 1:100){
sample <- sample(ED,10,replace=T)
estimate <- mean(sample)
ED_X <- c(ED_X,estimate)}
hist(ED_X)
CI(ED_X)
## upper mean lower
## 11.39951 10.85800 10.31649
# The confidnece interval has become more accurate after accoutning for the skewness
CT = (swiss$Catholic)
describe(CT)
hist(CT)
skew(CT)
## [1] 0.4789257
x_CT = mean(CT)
s_CT = sd(CT)
x_CT - 1.96*(s_CT/sqrt(47));x_CT + 1.96*(s_CT/sqrt(47))
## [1] 29.22061
## [1] 53.06705
# 95% of the time the true population mean is within the interval (29.22061,53.06705)
# Because this data is also rather skewed I again took multiple samples (100 of size 10, found the means of those, and used that to calculate the confidence interval)
CT_X <- c()
for (i in 1:100){
sample <- sample(CT,10,replace=T)
estimate <- mean(sample)
CT_X <- c(CT_X,estimate)}
hist(CT_X)
CI(CT_X)
## upper mean lower
## 41.70470 38.97571 36.24672
# The confidnece interval has become more accurate after accoutning for the skewness
IM = (swiss$Infant.Mortality)
describe(IM)
hist(IM)
skew(IM)
## [1] -0.3314326
x_IM = mean(IM)
s_IM = sd(IM)
x_IM - 1.96*(s_IM/sqrt(47));x_IM + 1.96*(s_IM/sqrt(47))
## [1] 19.10983
## [1] 20.77528
# 95% of the time the true population mean is within the interval (19.10983,20.77528)
#or
CI(IM)
## upper mean lower
## 20.79775 19.94255 19.08735