Overview of Data

ggplot(daily, aes(x = Time)) + geom_line(aes(y = Gas.in.Run.Time, color = "Gas in Run Time")) + geom_line(aes(y = Temperature, color = "Temperature"))

Hourly Gas Usage vs HDD

Set-point = 65

hourly_65 = lm(Gas.in.Run.Time ~ HDD_65, data = hourly_no_outliers)
summary(hourly_65)
## 
## Call:
## lm(formula = Gas.in.Run.Time ~ HDD_65, data = hourly_no_outliers)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.49022 -0.35855  0.01689  0.34935  1.60868 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.52044    0.03422   15.21   <2e-16 ***
## HDD_65       1.25883    0.03309   38.04   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5102 on 2277 degrees of freedom
## Multiple R-squared:  0.3886, Adjusted R-squared:  0.3884 
## F-statistic:  1447 on 1 and 2277 DF,  p-value: < 2.2e-16

Plotting Variables

ggplot(hourly_no_outliers, aes(x = HDD_65, y = Gas.in.Run.Time)) + 
  geom_point() +
  stat_smooth(method = "lm")

Prediction with setpoint = 65

sum(predict(hourly_65, hourly_normals))
## [1] 10293.6
prediction = data.frame(hourly_normals$date.time, hourly_normals$Temperature, hourly_normals$HDD_65, hourly_normals$Hour, predict(hourly_65, hourly_normals))
prediction

Using Different Setpoints

hourly_60 = lm(Gas.in.Run.Time ~ HDD_60, data = hourly_no_outliers)
hourly_62.5 = lm(Gas.in.Run.Time ~ HDD_62.5, data = hourly_no_outliers)
hourly_67.5 = lm(Gas.in.Run.Time ~ HDD_67.5, data = hourly_no_outliers)
hourly_70 = lm(Gas.in.Run.Time ~ HDD_70, data = hourly_no_outliers)

intercept = c(hourly_60$coefficients[1], hourly_62.5$coefficients[1], hourly_65$coefficients[1], hourly_67.5$coefficients[1], hourly_70$coefficients[1])
slope = c(hourly_60$coefficients[2], hourly_62.5$coefficients[2], hourly_65$coefficients[2], hourly_67.5$coefficients[2], hourly_70$coefficients[2])
r2 = c(summary(hourly_60)$r.squared, summary(hourly_62.5)$r.squared, summary(hourly_65)$r.squared, summary(hourly_67.5)$r.squared, summary(hourly_70)$r.squared)
predictions = c(sum(predict(hourly_60, hourly_normals)), sum(predict(hourly_62.5, hourly_normals)), sum(predict(hourly_65, hourly_normals)), sum(predict(hourly_67.5, hourly_normals)), sum(predict(hourly_70, hourly_normals)))

hourly_summary = data.frame(intercept, slope, r2, predictions)
hourly_summary

Multivariable Hourly Gas Usage vs HDD + Hour

Setpoint = 65

multivariable_hourly_65 = lm(Gas.in.Run.Time ~ HDD_65 + Hour, data = hourly_no_outliers)
summary(multivariable_hourly_65)
## 
## Call:
## lm(formula = Gas.in.Run.Time ~ HDD_65 + Hour, data = hourly_no_outliers)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.49623 -0.35811  0.01085  0.35197  1.64220 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.561500   0.040735  13.784   <2e-16 ***
## HDD_65       1.250762   0.033355  37.499   <2e-16 ***
## Hour        -0.002884   0.001554  -1.856   0.0636 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5099 on 2276 degrees of freedom
## Multiple R-squared:  0.3895, Adjusted R-squared:  0.389 
## F-statistic: 726.2 on 2 and 2276 DF,  p-value: < 2.2e-16

Plotting Variables

scatterplot = scatterplot3d(hourly_no_outliers$HDD_65, hourly_no_outliers$Hour, hourly_no_outliers$Gas.in.Run.Time)
scatterplot$plane3d(multivariable_hourly_65)

Prediction with setpoint = 65

sum(predict(multivariable_hourly_65, hourly_normals))
## [1] 10326
prediction = data.frame(hourly_normals$date.time, hourly_normals$Temperature, hourly_normals$HDD_65, hourly_normals$Hour, predict(multivariable_hourly_65, hourly_normals))
prediction

Using Different Setpoints

multivariable_hourly_60 = lm(Gas.in.Run.Time ~ HDD_60 + Hour, data = hourly_no_outliers)
multivariable_hourly_62.5 = lm(Gas.in.Run.Time ~ HDD_62.5 + Hour, data = hourly_no_outliers)
multivariable_hourly_67.5 = lm(Gas.in.Run.Time ~ HDD_67.5 + Hour, data = hourly_no_outliers)
multivariable_hourly_70 = lm(Gas.in.Run.Time ~ HDD_70 + Hour, data = hourly_no_outliers)

intercept = c(multivariable_hourly_60$coefficients[1], multivariable_hourly_62.5$coefficients[1], multivariable_hourly_65$coefficients[1], multivariable_hourly_67.5$coefficients[1], multivariable_hourly_70$coefficients[1])
slope = c(multivariable_hourly_60$coefficients[2], multivariable_hourly_62.5$coefficients[2], multivariable_hourly_65$coefficients[2], multivariable_hourly_67.5$coefficients[2], multivariable_hourly_70$coefficients[2])
r2 = c(summary(multivariable_hourly_60)$r.squared, summary(multivariable_hourly_62.5)$r.squared, summary(multivariable_hourly_65)$r.squared, summary(multivariable_hourly_67.5)$r.squared, summary(multivariable_hourly_70)$r.squared)
predictions = c(sum(predict(multivariable_hourly_60, hourly_normals)),
                sum(predict(multivariable_hourly_62.5, hourly_normals)), sum(predict(multivariable_hourly_65, hourly_normals)),
sum(predict(multivariable_hourly_67.5, hourly_normals)), sum(predict(multivariable_hourly_70, hourly_normals)))
multivariable_summary = data.frame(intercept, slope, r2, predictions)
multivariable_summary

Daily Gas Usage vs HDD

Set-point = 65

daily_65 = lm(Gas.in.Run.Time ~ HDD_65, data = daily_no_outliers)
summary(daily_65)
## 
## Call:
## lm(formula = Gas.in.Run.Time ~ HDD_65, data = daily_no_outliers)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.5423  -3.2062   0.0287   3.1472  12.4200 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.98581    2.06256   4.357 3.34e-05 ***
## HDD_65       1.45089    0.08449  17.173  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.344 on 95 degrees of freedom
## Multiple R-squared:  0.7564, Adjusted R-squared:  0.7538 
## F-statistic: 294.9 on 1 and 95 DF,  p-value: < 2.2e-16

Plotting Variables

ggplot(daily_no_outliers, aes(x = HDD_65, y = Gas.in.Run.Time)) + 
  geom_point() +
  stat_smooth(method = "lm")

Prediction with setpoint = 65

sum(predict(daily_65, daily_normals))
## [1] 9860.208
prediction = data.frame(daily_normals$Date, daily_normals$Temperature, daily_normals$HDD..65F., predict(daily_65, daily_normals))
prediction

Using Different Setpoints

Summary Dataframe

daily_60 = lm(Gas.in.Run.Time ~ HDD_60, data = daily_no_outliers)
daily_62.5 = lm(Gas.in.Run.Time ~ HDD_62.5, data = daily_no_outliers)
daily_67.5 = lm(Gas.in.Run.Time ~ HDD_67.5, data = daily_no_outliers)
daily_70 = lm(Gas.in.Run.Time ~ HDD_70, data = daily_no_outliers)

intercept = c(daily_60$coefficients[1], daily_62.5$coefficients[1], daily_65$coefficients[1], daily_67.5$coefficients[1], daily_70$coefficients[1])
slope = c(daily_60$coefficients[2], daily_62.5$coefficients[2], daily_65$coefficients[2], daily_67.5$coefficients[2], daily_70$coefficients[2])
r2 = c(summary(daily_60)$r.squared, summary(daily_62.5)$r.squared, summary(daily_65)$r.squared, summary(daily_67.5)$r.squared, summary(daily_70)$r.squared)
predictions = c(sum(predict(daily_60, daily_normals)), sum(predict(daily_62.5, daily_normals)), sum(predict(daily_65, daily_normals)), sum(predict(daily_67.5, daily_normals)), sum(predict(daily_70, daily_normals)))

daily_summary = data.frame(intercept, slope, r2, predictions)
daily_summary