The first thing I did was enter “?swiss” so that I could get an explanation of what each variable represented, which I have copied below:

[,1] Fertility Ig, ‘common standardized fertility measure’

[,2] Agriculture % of males involved in agriculture as occupation

[,3] Examination % draftees receiving highest mark on army examination

[,4] Education % education beyond primary school for draftees.

[,5] Catholic % ‘catholic’ (as opposed to ‘protestant’).

[,6] Infant.Mortality live births who live less than 1 year.

Then, I looked at some descriptive statistics for the data

str(swiss)
## 'data.frame':    47 obs. of  6 variables:
##  $ Fertility       : num  80.2 83.1 92.5 85.8 76.9 76.1 83.8 92.4 82.4 82.9 ...
##  $ Agriculture     : num  17 45.1 39.7 36.5 43.5 35.3 70.2 67.8 53.3 45.2 ...
##  $ Examination     : int  15 6 5 12 17 9 16 14 12 16 ...
##  $ Education       : int  12 9 5 7 15 7 7 8 7 13 ...
##  $ Catholic        : num  9.96 84.84 93.4 33.77 5.16 ...
##  $ Infant.Mortality: num  22.2 22.2 20.2 20.3 20.6 26.6 23.6 24.9 21 24.4 ...
library(psych)
describe(swiss)
##                  vars  n  mean    sd median trimmed   mad   min   max
## Fertility           1 47 70.14 12.49  70.40   70.66 10.23 35.00  92.5
## Agriculture         2 47 50.66 22.71  54.10   51.16 23.87  1.20  89.7
## Examination         3 47 16.49  7.98  16.00   16.08  7.41  3.00  37.0
## Education           4 47 10.98  9.62   8.00    9.38  5.93  1.00  53.0
## Catholic            5 47 41.14 41.70  15.14   39.12 18.65  2.15 100.0
## Infant.Mortality    6 47 19.94  2.91  20.00   19.98  2.82 10.80  26.6
##                  range  skew kurtosis   se
## Fertility        57.50 -0.46     0.26 1.82
## Agriculture      88.50 -0.32    -0.89 3.31
## Examination      34.00  0.45    -0.14 1.16
## Education        52.00  2.27     6.14 1.40
## Catholic         97.85  0.48    -1.67 6.08
## Infant.Mortality 15.80 -0.33     0.78 0.42
boxplot(swiss)

hist(swiss$Fertility)

hist(swiss$Agriculture)

hist(swiss$Examination)

hist(swiss$Education)

hist(swiss$Catholic)

hist(swiss$Infant.Mortality)

Based on the histogram and box plots for the Catholic and Education variables, I don’t think these variables are normally distributed, so I have excluded them from the additional analysis.

I calculated a 95% confidence interval for the remaining variables: Fertility, Agriculture, Examination, and Infant.Mortality.

mean(swiss$Fertility)+(qnorm(0.025)*sd(swiss$Fertility)/sqrt(47))
## [1] 66.5713
mean(swiss$Fertility)+(qnorm(0.975)*sd(swiss$Fertility)/sqrt(47))
## [1] 73.71381
mean(swiss$Agriculture)+(qnorm(0.025)*sd(swiss$Agriculture)/sqrt(47))
## [1] 44.16666
mean(swiss$Agriculture)+(qnorm(0.975)*sd(swiss$Agriculture)/sqrt(47))
## [1] 57.15249
mean(swiss$Examination)+(qnorm(0.025)*sd(swiss$Examination)/sqrt(47))
## [1] 14.20856
mean(swiss$Examination)+(qnorm(0.975)*sd(swiss$Examination)/sqrt(47))
## [1] 18.77016
mean(swiss$Infant.Mortality)+(qnorm(0.025)*sd(swiss$Infant.Mortality)/sqrt(47))
## [1] 19.10984
mean(swiss$Infant.Mortality)+(qnorm(0.975)*sd(swiss$Infant.Mortality)/sqrt(47))
## [1] 20.77526

I found an alternative method for calculating confidence intervals online.

library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
CI(swiss$Fertility,ci=0.95)
##    upper     mean    lower 
## 73.81025 70.14255 66.47485
CI(swiss$Agriculture,ci=0.95)
##    upper     mean    lower 
## 57.32784 50.65957 43.99131
CI(swiss$Examination,ci=0.95)
##    upper     mean    lower 
## 18.83176 16.48936 14.14697
CI(swiss$Infant.Mortality,ci=0.95)
##    upper     mean    lower 
## 20.79775 19.94255 19.08735

The results are slightly different, so I’m curious if anyone knows why.