Gas <- read_csv("~/Senior Project/Gas.csv")
Gas  <- Gas %>% rename(PriceRegular=R1)
y<- Gas$PriceRegular
x<- as.numeric(Gas$Date)-9132
glimpse(x)
xsq<-x^2
xcub<-x^3
xquar<-x^4
x5<-x^5
x6<-x^6
x7<-x^7
x8<-x^8
x9<-x^9
x10<-x^10
x11<-x^11
x12<-x^12
x13<-x^13
x14<-x^14
x15<-x^15
x16<-x^16
x17<-x^17
x18<-x^18
x19<-x^19
x20<-x^20
x21<-x^21
x22<-x^22
x23<-x^23
x24<-x^24
x25<-x^25
plot(x,y, pch=20, xlab="Days since 01/01/1995", ylab="Gas Price")
xv<-seq(min(x),max(x),1)
yv<-predict(fit25, list(x=xv, xsq=xv^2, xcub=xv^3, xquar=xv^4, x5=xv^5, x6=xv^6,x7=xv^7,x8=xv^8,x9=xv^9,x10=xv^10,x11=xv^11,x12=xv^12,x13=xv^13,x14=xv^14,x15=xv^15,x16=xv^16,x17=xv^17,x18=xv^18,x19=xv^19,x20=xv^20,x21=xv^21,x22=xv^22,x23=xv^23,x24=xv^24,x25=xv^25))
lines(xv,yv, col = "blue", lwd=3)
summary(fit25)
set.seed(100)
sample <- sample(c(TRUE, FALSE), nrow(Gas), replace=TRUE, prob=c(0.9,0.1))
train  <- Gas[sample, ]
test   <- Gas[!sample, ]

for (i in 1:25) {
PredictionAccuracy=mean((predict(lm(PriceRegular~poly(Date,i), data=train),test)-test$PriceRegular)^2)
print(PredictionAccuracy)
}
## [1] 0.3967022
## [1] 0.2611901
## [1] 0.2106733
## [1] 0.1826787
## [1] 0.1668109
## [1] 0.1660173
## [1] 0.1633775
## [1] 0.1560798
## [1] 0.1501038
## [1] 0.1491021
## [1] 0.1129385
## [1] 0.1045485
## [1] 0.09304121
## [1] 0.09165833
## [1] 0.0901241
## [1] 0.08982945
## [1] 0.09060015
## [1] 0.09018333
## [1] 0.08970467
## [1] 0.08891765
## [1] 0.08604971
## [1] 0.08236128
## [1] 0.08300596
## [1] 0.08014616
## [1] 0.08017713
Gas1<-Gas%>%
  filter(as.numeric(Date)<3000)
Gas7<-Gas%>%
  filter(as.numeric(Date)>3000 & as.numeric(Date)<4936)
Gas2<-Gas%>%
  filter(as.numeric(Date)>4936 & as.numeric(Date)<5111)
Gas3<-Gas%>%
  filter(as.numeric(Date)>5111 & as.numeric(Date)<5972)
Gas4<-Gas%>%
  filter(as.numeric(Date)>5972 & as.numeric(Date)<7120)
Gas5<-Gas%>%
  filter(as.numeric(Date)>7120 & as.numeric(Date)<7722)
Gas6<-Gas%>%
  filter(as.numeric(Date)>7722 & as.numeric(Date)<8548)
Gas8<-Gas%>%
  filter(as.numeric(Date)>8548)
plot(PriceRegular~Date, data=Gas,  xlab="Days since 01/01/1995", ylab="Gas Price", pch=19)
sec1<- lm(PriceRegular~Date, data=Gas1)
ablineclip(sec1, col = "red", lwd=3,x1=0,x2=3000)
sec2<- lm(PriceRegular~Date, data=Gas7)
ablineclip(sec2, col = "green", lwd=3,x1=3000,x2=4936)
sec3<- lm(PriceRegular~Date, data=Gas2)
ablineclip(sec3, col = "blue",lwd=3,x1=4936,x2=5111)
sec4<- lm(PriceRegular~Date, data=Gas3)
ablineclip(sec4, col = "orange",lwd=3,x1=5111,x2=5972)
sec5<- lm(PriceRegular~Date, data=Gas4)
ablineclip(sec5, col = "purple",lwd=3,x1=5972,x2=7120)
sec6<- lm(PriceRegular~Date, data=Gas5)
ablineclip(sec6, col = "yellow",lwd=3,x1=7120,x2=7722)
sec7<- lm(PriceRegular~Date, data=Gas6)
ablineclip(sec7, col = "green",lwd=3,x1=7722,x2=8548)
sec8<- lm(PriceRegular~Date, data=Gas8)
ablineclip(sec8, col = "red",lwd=3,x1=8548)
dataframe<- data.frame(x=as.numeric(Gas$Date)-9132,y=Gas$PriceRegular)
fit <- lm(y~x, data=dataframe)
segmented.fit <- segmented(fit, seg.Z = ~x, psi = c(3003,4936,5111,5972,7120,7722,8548))
summary(segmented.fit)
## 
##  ***Regression Model with Segmented Relationship(s)***
## 
## Call: 
## segmented.lm(obj = fit, seg.Z = ~x, psi = c(3003, 4936, 5111, 
##     5972, 7120, 7722, 8548))
## 
## Estimated Break-Point(s):
##             Est. St.Err
## psi1.x 3146.779 33.834
## psi2.x 5023.143  4.505
## psi3.x 5071.675  4.578
## psi4.x 6046.010 25.679
## psi5.x 7077.593 23.998
## psi6.x 7687.075 17.068
## psi7.x 8609.209 30.580
## 
## Meaningful coefficients of the linear terms:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.049e+00  1.949e-02  53.844   <2e-16 ***
## x            1.360e-04  1.073e-05  12.673   <2e-16 ***
## U1.x         9.442e-04  2.570e-05  36.738       NA    
## U2.x        -3.477e-02  5.589e-03  -6.222       NA    
## U3.x         3.548e-02  5.589e-03   6.347       NA    
## U4.x        -1.905e-03  8.454e-05 -22.532       NA    
## U5.x        -2.338e-03  1.385e-04 -16.881       NA    
## U6.x         3.349e-03  1.436e-04  23.325       NA    
## U7.x        -1.673e-03  9.663e-05 -17.313       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.207 on 1345 degrees of freedom
## Multiple R-Squared: 0.9414,  Adjusted R-squared: 0.9407 
## 
## Boot restarting based on 6 samples. Last fit:
## Convergence attained in 4 iterations (rel. change 3.8081e-06)
plot(dataframe$x, dataframe$y, pch=20,xlab="Days since 01/01/1995", ylab="GasTesting Price")
plot(segmented.fit, add=T, lwd=3, col = "royalblue1")
abline(v=c(3003,4936,5111,5972,7120,7722,8548),lty=2,col="darkgreen")

summary(fit)
## 
## Call:
## lm(formula = y ~ x, data = dataframe)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4097 -0.4140 -0.1588  0.3802  1.8515 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.209e+00  3.333e-02   36.29   <2e-16 ***
## x           2.134e-04  6.062e-06   35.20   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6151 on 1359 degrees of freedom
## Multiple R-squared:  0.4769, Adjusted R-squared:  0.4765 
## F-statistic:  1239 on 1 and 1359 DF,  p-value: < 2.2e-16
fit2<-lm(PriceRegular~bs(Date,knots=c(3003,4936,5111,5972,7120,7722,8548)),data=Gas)
plot(dataframe$x, dataframe$y, pch=20,xlab="Days since 01/01/1995", ylab="GasTesting Price")
Datelims<-range(Gas$Date)
Date.grid<-seq(from=Datelims[1], to = Datelims[2])
points(Date.grid,predict(fit2,newdata = list(Date=Date.grid)),col="royalblue1", lwd=3,type="l")
abline(v=c(3003,4936,5111,5972,7120,7722,8548),lty=2,col="darkgreen")
fit3<-smooth.spline(as.numeric(Gas$Date)-9132,Gas$PriceRegular, nknots=200)
plot(dataframe$x, dataframe$y, pch=20,xlab="Days since 01/01/1995", ylab="GasTesting Price")
lines(fit3,col="red",lwd=3)

fit3$fit
## $knot
##   [1] 0.000000000 0.000000000 0.000000000 0.000000000 0.004411765 0.009558824
##   [7] 0.014705882 0.019852941 0.025000000 0.030147059 0.034558824 0.039705882
##  [13] 0.044852941 0.050000000 0.055147059 0.060294118 0.064705882 0.069852941
##  [19] 0.075000000 0.080147059 0.085294118 0.090441176 0.094852941 0.100000000
##  [25] 0.105147059 0.110294118 0.115441176 0.120588235 0.125000000 0.130147059
##  [31] 0.135294118 0.140441176 0.145588235 0.150735294 0.155147059 0.160294118
##  [37] 0.165441176 0.170588235 0.175735294 0.180882353 0.185294118 0.190441176
##  [43] 0.195588235 0.200735294 0.205882353 0.211029412 0.215441176 0.220588235
##  [49] 0.225735294 0.230882353 0.236029412 0.241176471 0.245588235 0.250735294
##  [55] 0.255882353 0.261029412 0.266176471 0.271323529 0.275735294 0.280882353
##  [61] 0.286029412 0.291176471 0.296323529 0.301470588 0.305882353 0.311029412
##  [67] 0.316176471 0.321323529 0.326470588 0.331617647 0.336029412 0.341176471
##  [73] 0.346323529 0.351470588 0.356617647 0.361764706 0.366176471 0.371323529
##  [79] 0.376470588 0.381617647 0.386764706 0.391911765 0.396323529 0.401470588
##  [85] 0.406617647 0.411764706 0.416911765 0.422058824 0.426470588 0.431617647
##  [91] 0.436764706 0.441911765 0.447058824 0.452205882 0.456617647 0.461764706
##  [97] 0.466911765 0.472058824 0.477205882 0.482352941 0.486764706 0.491911765
## [103] 0.497058824 0.502205882 0.507352941 0.512500000 0.516911765 0.522058824
## [109] 0.527205882 0.532352941 0.537500000 0.542647059 0.547058824 0.552205882
## [115] 0.557352941 0.562500000 0.567647059 0.572794118 0.577205882 0.582352941
## [121] 0.587500000 0.592647059 0.597794118 0.602941176 0.607352941 0.612500000
## [127] 0.617647059 0.622794118 0.627941176 0.633088235 0.637500000 0.642647059
## [133] 0.647794118 0.652941176 0.658088235 0.663235294 0.667647059 0.672794118
## [139] 0.677941176 0.683088235 0.688235294 0.693382353 0.697794118 0.702941176
## [145] 0.708088235 0.713235294 0.718382353 0.723529412 0.727941176 0.733088235
## [151] 0.738235294 0.743382353 0.748529412 0.753676471 0.758088235 0.763235294
## [157] 0.768382353 0.773529412 0.778676471 0.783823529 0.788235294 0.793382353
## [163] 0.798529412 0.803676471 0.808823529 0.813970588 0.818382353 0.823529412
## [169] 0.828676471 0.833823529 0.838970588 0.844117647 0.848529412 0.853676471
## [175] 0.858823529 0.863970588 0.869117647 0.874264706 0.878676471 0.883823529
## [181] 0.888970588 0.894117647 0.899264706 0.904411765 0.908823529 0.913970588
## [187] 0.919117647 0.924264706 0.929411765 0.934558824 0.938970588 0.944117647
## [193] 0.949264706 0.954411765 0.959558824 0.964705882 0.969117647 0.974264706
## [199] 0.979411765 0.984558824 0.989705882 0.994852941 1.000000000 1.000000000
## [205] 1.000000000 1.000000000
## 
## $nk
## [1] 202
## 
## $min
## [1] 0
## 
## $range
## [1] 9520
## 
## $coef
##   [1] 1.0813010 1.0835233 1.0763412 1.0521964 1.2380042 1.1550538 1.1135713
##   [8] 1.0978307 1.0374048 1.1158713 1.0561876 1.2991488 1.2680506 1.2110898
##  [15] 1.1921277 1.2178520 1.2396570 1.2454356 1.1824480 1.2216981 1.1653249
##  [22] 1.2176655 1.2429098 1.1527628 1.1163228 1.0191997 1.0107355 1.0817135
##  [29] 1.0658016 0.9932621 1.0420898 0.9554281 0.9312271 0.8891676 1.2454290
##  [36] 1.0367969 1.2273037 1.2593804 1.2396369 1.2645477 1.3072989 1.5848347
##  [43] 1.3736242 1.7710218 1.3343210 1.6510166 1.4555777 1.4527138 1.4176609
##  [50] 1.4515703 1.9076000 1.1829598 1.6562596 1.2380012 1.0781096 1.0609021
##  [57] 1.2262384 1.4891081 1.3321847 1.4260983 1.3817174 1.4862457 1.2815362
##  [64] 1.6661149 1.7095678 1.4588074 1.4379315 1.7285098 1.6154684 1.4341339
##  [71] 1.5296608 1.7323574 1.7704976 2.1075306 1.8659785 1.8012970 2.1146617
##  [78] 1.8257273 1.7567686 2.1614522 2.2416985 2.0968952 2.3196918 3.3100396
##  [85] 1.9830780 2.3611576 2.1727168 2.6463432 2.9965419 2.8955509 3.0899920
##  [92] 1.9270767 2.4433258 2.1432726 2.3354865 3.0426639 3.1913785 2.8606720
##  [99] 2.6961260 2.9278737 3.1477688 2.8989521 3.2972374 3.5905212 4.4105010
## [106] 3.5936054 4.0147369 1.6444775 1.6196601 2.0814927 1.8045684 2.5593435
## [113] 2.6337081 2.5460896 2.5750605 2.6498801 2.6745599 2.6703153 2.9925712
## [120] 2.6524429 2.7814414 2.6711024 2.8301476 3.0357022 3.0558507 3.7042772
## [127] 4.0740999 3.4735516 3.7786419 3.4545117 3.3898951 3.1724817 3.7548286
## [134] 4.0325194 3.6266565 3.2469749 4.0214279 3.7271701 3.1706875 3.3774575
## [141] 3.8975668 3.4267674 3.6918973 3.5670992 3.5761213 3.1371957 3.3441986
## [148] 3.2659384 3.6627984 3.7003331 3.6712671 3.4964743 3.2425488 2.9678633
## [155] 1.8479429 2.4819010 2.4395216 2.8898950 2.8164542 2.4539086 2.1867513
## [162] 2.1438331 1.6966570 1.9218029 2.2521931 2.4525459 2.0317902 2.3657840
## [169] 2.0819312 2.3904662 2.2769944 2.3581156 2.4967368 2.1362515 2.5829642
## [176] 2.5798935 2.4544152 2.5956501 2.5131654 2.8197374 2.9683272 2.7894123
## [183] 2.8509570 2.9179525 2.2720651 2.1489285 2.6617456 2.9656101 2.6736067
## [190] 2.6671832 2.5612694 2.6632985 2.4547247 2.6362560 1.7362384 1.8903514
## [197] 2.2776674 2.1526233 2.1812817 2.0381631 2.3268831 2.3972805
## 
## attr(,"class")
## [1] "smooth.spline.fit"
library(npreg)
#mod.ss <- with(Gas, ss(Date, PriceRegular),nknots=8)
mod.ss <- ss(Gas$Date, Gas$PriceRegular,nknots=14)
ss(x = Gas$Date, y = Gas$PriceRegular, nknots = 8)
## 
## Call:
## ss(x = Gas$Date, y = Gas$PriceRegular, nknots = 8)
## 
## Smoothing Parameter  spar = -0.1720181   lambda = 3.407981e-09
## Equivalent Degrees of Freedom (Df) 8.991439
## Penalized Criterion (RSS) 147.2724
## Generalized Cross-Validation (GCV) 0.1096531
summary(mod.ss)
## 
## Call:
## ss(x = Gas$Date, y = Gas$PriceRegular, nknots = 14)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -1.104584 -0.110088  0.005267  0.093147  1.332995 
## 
## Approx. Signif. of Parametric Effects:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.2257   0.007561  294.37        0 ***
## x             0.8531   0.076945   11.09        0 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Approx. Signif. of Nonparametric Effects:
##                Df Sum Sq  Mean Sq F value Pr(>F)    
## s(x)        12.58  408.7 32.48495   417.5      0 ***
## Residuals 1346.42  104.8  0.07781                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.2789 on 1346 degrees of freedom
## Multiple R-squared:  0.8934,    Adjusted R-squared:  0.8923
## F-statistic: 830.4 on 13.58 and 1346 DF,  p-value: <2e-16
plot(mod.ss)