HDD plot for all the datasets

try to make all the plot and see what happens.

source('smallHDD.r')

data2007 <- read.csv(file = 'newdata2007.csv', header = TRUE, sep = ",")

data2011 <- read.csv(file = 'newdata2011.csv', header = TRUE, sep = ",")

data2012 <- read.csv(file = 'newdata2013.csv', header = TRUE, sep = ",")

data2015 <- read.csv(file = 'newdata2015-1.csv', header = TRUE, sep = ",")

data2015.2 <- read.csv(file = 'newdata2015-2.csv', header = TRUE, sep = ",")


smallHDD(data2007, introDate = c('2015-03-22'), introYear = 2007)

## 2015-04-22 
##        111 
## 2015-04-05 
##         94 
## [1] 7818.27
## [1] 692.49
smallHDD(data2007, introDate = c('2015-05-06'), introYear = 2009)

## 2015-04-24 
##        113 
## 2015-01-23 
##         22 
## [1] 8536.53
## [1] 297.9
smallHDD(data2011, introDate = c('2015-05-13'), introYear = 2011)

## 2015-05-10 
##        129 
## 2015-02-10 
##         40 
## [1] 7302.5
## [1] 1035
smallHDD(data2012, introDate = c('2015-04-26'), introYear = 2012)

## 2015-03-19 
##         77 
## 2015-02-19 
##         49 
## [1] 7503.6
## [1] 727.83
smallHDD(data2012, introDate = c('2015-05-08'), introYear = 2013)

## 2015-05-15 
##        134 
## 2015-01-09 
##          8 
## [1] 9575.52
## [1] 554.43
smallHDD(data2012, introDate = c('2015-05-09'), introYear = 2014)

## 2015-05-25 
##        144 
## 2015-01-07 
##          6 
## [1] 9602.01
## [1] 418.02
smallHDD(data2015, introDate = c('2015-02-27'), introYear = 2015)

## 2015-04-12 
##        101 
## 2015-02-06 
##         36 
## [1] 7672.56
## [1] 503.34
smallHDD(data2015.2, introDate = c('2015-03-22'), introYear = 2015)

## 2015-04-12 
##        101 
## 2015-03-10 
##         68 
## [1] 6824.91
## [1] 819.63

Correlation between HDD and Date of Introduction

We are looking at the date when HDD reaches to zero in the same year of introduction.

dat = read.csv(file = 'HDDsummary.csv', sep = ",", header = T)

reg1 = lm(dat$Date2~dat$Intro)

###exclude 2015-1
dat.without = dat[-7, ]

reg2 = lm(dat.without$Date2~dat.without$Intro)

plot(dat$Date2~dat$Intro)
abline(reg1, col = "blue")
abline(reg2, col = "red")

summary(reg1)
## 
## Call:
## lm(formula = dat$Date2 ~ dat$Intro)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.639 -19.303   2.753  14.955  37.713 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 26798.3966  5641.6237    4.75  0.00316 **
## dat$Intro      -0.6240     0.3411   -1.83  0.11705   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.96 on 6 degrees of freedom
## Multiple R-squared:  0.3581, Adjusted R-squared:  0.2511 
## F-statistic: 3.348 on 1 and 6 DF,  p-value: 0.117
summary(reg2)
## 
## Call:
## lm(formula = dat.without$Date2 ~ dat.without$Intro)
## 
## Residuals:
##       1       2       3       4       5       6       7 
##  12.716  -3.383  23.312  11.194 -14.899 -15.657 -13.284 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       37033.8934  5118.3073   7.236 0.000787 ***
## dat.without$Intro    -1.2422     0.3093  -4.016 0.010157 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.24 on 5 degrees of freedom
## Multiple R-squared:  0.7634, Adjusted R-squared:  0.7161 
## F-statistic: 16.13 on 1 and 5 DF,  p-value: 0.01016