My Work

library(repmis) 
githubURL <- "https://github.com/AndyBunn/TeachingData/raw/master/kbli.Rdata"
source_data(githubURL)
## Downloading data from: https://github.com/AndyBunn/TeachingData/raw/master/kbli.Rdata
## SHA-1 hash of the downloaded data file is:
## e61c66faa0cd4f23f6bf7e96d860b6b591c39cef
## [1] "kbli.zoo" "kbli.ts"
tAvgF <- kbli.ts[,"tAvgF"] #Average Temperature in Farenheit
plot(tAvgF)

Figure 1. Average temperature at the Bellingham airport over from 1976 to 2016
summary(tAvgF)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   30.23   42.73   49.90   50.21   58.11   67.48
str(tAvgF)
##  Time-Series [1:480] from 1976 to 2016: 40.8 39.8 39.4 47.6 52.5 ...
tsp(tAvgF) # the first data point was taken in 1976, and there are 12 data points taken per year
## [1] 1976.000 2015.917   12.000
library(moments) 
Load the Moments library
kt<-kurtosis(tAvgF)
sk <- skewness(tAvgF)
my.xlim <- range(tAvgF)
h<-hist(tAvgF, breaks=10, col="darkolivegreen", #Choose how many line breaks
        xlab="Average Temperature (F)", #label the x-axis
        main="",xlim=c(30,70), ylim=c(0,160)) #set limits for the x and y axes 
xfit<-seq(min(tAvgF),max(tAvgF),length=100)  #Plot min and max temperatures
yfit<-dnorm(xfit,mean=mean(tAvgF),sd=sd(tAvgF)) #Mean and Standard Deviation
yfit <- yfit*diff(h$mids[1:2])*length(tAvgF) 
lines(xfit, yfit, col="darkblue", lwd=2)
boxplot(tAvgF, horizontal=TRUE,  outline=TRUE,  axes=FALSE,
        ylim=my.xlim, col = "darkorange3", add = TRUE, boxwex=19)
text(x = 30, y=150, labels = paste("Kurtosis=",round(kt,2)),pos = 4)
text(x = 30, y=125, labels = paste("Skewness=",round(sk,2)),pos = 4)

Figure 2. Average temperature in Farenheit for Bellingham Airport.

Next, lets see if there is a trend as a function of time by fitting our data to a linear a model.

tempTime <- time(tAvgF)
Temp.lm <- lm(tAvgF ~ tempTime)
summary(Temp.lm)
## 
## Call:
## lm(formula = tAvgF ~ tempTime)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.5913  -7.4774  -0.4724   7.9929  16.8612 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  3.55231   70.32462   0.051    0.960
## tempTime     0.02338    0.03523   0.664    0.507
## 
## Residual standard error: 8.913 on 478 degrees of freedom
## Multiple R-squared:  0.0009202,  Adjusted R-squared:  -0.00117 
## F-statistic: 0.4403 on 1 and 478 DF,  p-value: 0.5073

The residuals for this model are not independently and identically distributed, so the R2 and p-values are incorrect. However, the coefficients are correct and it seems the temperature has increased by 0.02 degrees F.

ma10 <- filter(x=tAvgF, filter=rep(x=1/10,times=10), sides=2) #10-year, centered moving average filter
ma5 <- filter(x=tAvgF, filter=rep(x=1/5,times=5), sides=2) #5-year, centered moving average filter
plot(tAvgF,col="dodgerblue4")
lines(ma10,col="firebrick",lwd=2)
lines(ma5,col="darkorange4",lwd=2)
abline(Temp.lm, col="black",lwd=2, lty="dashed") 

head(cbind(tAvgF,ma5,ma10),n=10) 
##          tAvgF      ma5     ma10
##  [1,] 40.77419       NA       NA
##  [2,] 39.82759       NA       NA
##  [3,] 39.35484 44.00810       NA
##  [4,] 47.60000 47.21993       NA
##  [5,] 52.48387 51.77699 51.06394
##  [6,] 56.83333 56.11892 51.43319
##  [7,] 62.61290 58.34559 51.74720
##  [8,] 61.06452 58.11978 51.36656
##  [9,] 58.73333 55.64645 51.14227
## [10,] 51.35484 51.71742 50.28421
tAvgF.d <- decompose(tAvgF)
plot(tAvgF.d)

Figure 4. Decomposition of additive time series
decomp<- decompose(tAvgF)
plot(decomp$trend)

Standard Deviation
sd(decomp$x,na.rm=TRUE) #Time Series SD
## [1] 8.908073
sd(decomp$trend,na.rm=TRUE) #Standard Deviation of Trend
## [1] 1.119627
deseason<-decomp$trend


acf(deseason,na.action=na.pass)

pacf(deseason,na.action=na.pass) #Significant negative correlation at 2 months

residual<- decomp$random
acf(residual,na.action=na.pass) #Significant negative correlation at 3 and 4 months

pacf(residual,na.action=na.pass) #Several significant negative correlations in the residuals, but I can't see a pattern, or something that would make sense...

AR(2) Model with 100 observations

p<- acf(decomp$x,plot=FALSE)
p$acf[2:3,,1] #phi 1 and phi 2
## [1] 0.8097762 0.4549258
n<-874 #Number of observations
epsilon <- rnorm(n=n, mean = 0, sd = 1) #Noise
x <- numeric(length = n)
p1<-0.8
p2<-0.5
plot(x)

for(i in 3:n){x[i] <-p1*x[i-1]+p2*x[i-2]+epsilon[i]}
acf(x)

pacf(x)

cor(x[2:n],x[1:(n-1)]) #Correlation with x at time 1 with
## [1] 1
#and time - 1 
x2<-arima.sim(model=list(ar = c(-0.04,-0.07)), n=100)
cor(x2[2:n],x2[1:(n-1)])
## [1] NA
plot(x2)

acf(x2)

pacf(x2)

Drunken Data

setwd("/Users/shannoncall/Desktop/ESCI597A/Week02")
drunk <- read.csv("/Users/shannoncall/Desktop/ESCI597A/Week02/drunk.csv", header=TRUE)
attach(drunk)
tsp(drunk)
## NULL
drunk.ts<-ts(drunk)
tsp(drunk.ts)
## [1]   1 144   1
plot(drunk.ts)

Histogram of Drunkness overtime

kt <-kurtosis(drunk.ts)
sk <- skewness(drunk.ts)
my.xlim <- range(drunk.ts)
h<-hist(drunk.ts, breaks=10, col="lightblue", xlab="Drunken-ness in public",main="",xlim=my.xlim) 
xfit<-seq(min(drunk.ts),max(drunk.ts),length=100) 
yfit<-dnorm(xfit,mean=mean(drunk.ts),sd=sd(drunk.ts)) 
yfit <- yfit*diff(h$mids[1:2])*length(drunk.ts) 
lines(xfit, yfit, col="darkblue", lwd=2)
boxplot(drunk.ts, horizontal=TRUE,  outline=TRUE,  axes=FALSE,
        ylim=my.xlim, col = "green", add = TRUE, boxwex=3)
text(x = 145, y=150, labels = paste("Kurtosis=",round(kt,2)),pos = 4)
text(x = 145, y=130, labels = paste("Skewness=",round(sk,2)),pos = 4)

Linear Model

drunk.lm <- lm(Monthly~Year)
summary(drunk.lm)
## 
## Call:
## lm(formula = Monthly ~ Year)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -285.94 -103.76  -11.82  102.77  354.24 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 113580.877   6538.077   17.37   <2e-16 ***
## Year           -57.410      3.316  -17.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 137.5 on 142 degrees of freedom
## Multiple R-squared:  0.6785, Adjusted R-squared:  0.6762 
## F-statistic: 299.7 on 1 and 142 DF,  p-value: < 2.2e-16
Yikes…looks like people get a little bit more drunk each year….
acf(Monthly)

pacf(Monthly)

The acf plot indicates a lot of autocorrelation, but the pacf plot indicates there may be a yearly occurrence of increased drunkenness. Maybe around the holidays? Also, it looks like overall more people are drinking more all around. This makes sense because our population is growing since the 60s and it’s more socially acceptable.