library(rmarkdown); library(knitr); library(readxl); library(moments)
set.seed(69)
The mean is 5.2
v <- c(7,1,8,2,8)
(v[1]+v[2]+v[3]+v[4]+v[5])/5
## [1] 5.2
The standard deviation is about 3.42
(((v[1]-5.2)^2+(v[2]-5.2)^2+(v[3]-5.2)^2+(v[4]-5.2)^2+(v[5]-5.2)^2)/(5-1))^.5
## [1] 3.420526
The skewness is -.3989, which means the data is left skewed
m3 <- ((v[1]-5.2)^3+(v[2]-5.2)^3+(v[3]-5.2)^3+(v[4]-5.2)^3+(v[5]-5.2)^3)/5
sd3 <- (((v[1]-5.2)^2+(v[2]-5.2)^2+(v[3]-5.2)^2+(v[4]-5.2)^2+(v[5]-5.2)^2)/5)^(3/2)
skew <- m3/sd3
skew
## [1] -0.3989371
The kurtosis is 1.25, which means the data is skinnier than the normal distribution
m4 <- ((v[1]-5.2)^4+(v[2]-5.2)^4+(v[3]-5.2)^4+(v[4]-5.2)^4+(v[5]-5.2)^4)/5
sd4 <- (((v[1]-5.2)^2+(v[2]-5.2)^2+(v[3]-5.2)^2+(v[4]-5.2)^2+(v[5]-5.2)^2)/5)^2
kurtosis <- m4/sd4
kurtosis
## [1] 1.254328
PovertyData <- data.frame(Name = c("Estonia", "Luxembourg", "Chile", "Belgium", "Greece", "Spain", "Djibouti", "Cyprus", "Lithuania", "Kosovo"),
Water = c(6.6,.2,.1,.3,.5,.2,7.1,.5,9.9,.7),
Electricity = c(0,0,.3,0,0,0,39.8,0,0,.2),
Sanitation = c(5.3,0,.6,.9,.3,.2,45.4,.5,10.6,1.4),
Education = c(0,.8,4,1.9,1.7,3.4,30.1,1.4,.2,.5))
PovertyData
## Name Water Electricity Sanitation Education
## 1 Estonia 6.6 0.0 5.3 0.0
## 2 Luxembourg 0.2 0.0 0.0 0.8
## 3 Chile 0.1 0.3 0.6 4.0
## 4 Belgium 0.3 0.0 0.9 1.9
## 5 Greece 0.5 0.0 0.3 1.7
## 6 Spain 0.2 0.0 0.2 3.4
## 7 Djibouti 7.1 39.8 45.4 30.1
## 8 Cyprus 0.5 0.0 0.5 1.4
## 9 Lithuania 9.9 0.0 10.6 0.2
## 10 Kosovo 0.7 0.2 1.4 0.5
stripchart(PovertyData[,2], method = "stack")
Our data appears to be skewed to the right.
The skewness is likely positive, as that indicates right skew.
I think less than 3 because the data looks more flat.
The skewness is 1.04, which is positive like I predicted. Kurtosis is less than 3 which means the data is flatter like predicted.
skewness(PovertyData[,2])
## [1] 1.039527
kurtosis(PovertyData[,2])
## [1] 2.349475
MonthlyGasPrices <- read.csv("C:/Users/Sarah Chock/OneDrive - University of St. Thomas/Senior Year/STAT 360 Comp Stat and Data Analysis/Exploratory Data Analysis/Monthly Gas Prices.csv")
head(MonthlyGasPrices)
## Month Price
## 1 1997-01 3.45
## 2 1997-02 2.15
## 3 1997-03 1.89
## 4 1997-04 2.03
## 5 1997-05 2.25
## 6 1997-06 2.20
The skewness is about 1.53, so the data is right skewed.
stripchart(MonthlyGasPrices[,2], method = "stack")
skewness(MonthlyGasPrices[,2])
## [1] 1.526612
skewness(log(MonthlyGasPrices[,2]))
## [1] 0.439993
skewness(sqrt(MonthlyGasPrices[,2]))
## [1] 0.9346747
skewness((MonthlyGasPrices[,2])^2)
## [1] 3.013805
skewness(1/MonthlyGasPrices[,2])
## [1] 0.3731103
The reciprocal transformation was the most effective at reducing skewness.
HomelessMN <- data.frame(Year = seq(1991, 2018, by = 3),
Homeless = c(3079, 4553, 5645, 7696, 7845, 7751, 9654, 10214, 9312, 10233))
set.seed(69)
mysample <- sample(nrow(HomelessMN), nrow(HomelessMN), replace = TRUE)
mysample
## [1] 1 2 8 7 7 6 7 2 10 4
bootstrap <- HomelessMN[mysample,]
bootstrap
## Year Homeless
## 1 1991 3079
## 2 1994 4553
## 8 2012 10214
## 7 2009 9654
## 7.1 2009 9654
## 6 2006 7751
## 7.2 2009 9654
## 2.1 1994 4553
## 10 2018 10233
## 4 2000 7696
The skewness of the homelessness is -.64, so it is left skewed.
skewness(bootstrap[,2])
## [1] -0.6421693
boot <- replicate(1000,
{
mysample <- sample(nrow(HomelessMN), nrow(HomelessMN), replace = TRUE)
data <- HomelessMN[mysample,]
skewness(data[,2])
}
)
Our bootstrap confidence interval of the skewness is between -1.56 and .37
quantile(boot, prob = c(.025,.975))
## 2.5% 97.5%
## -1.5295494 0.4088601
Our data is not significantly skewed. This is because the skewness I found (-.64), is included in the bootstrap confidence interval.