Gas <- read_csv("~/Senior Project/Gas.csv")
Gas <- Gas %>% rename(PriceRegular=R1)
y<- Gas$PriceRegular
x<- as.numeric(Gas$Date)-9132
glimpse(x)
xsq<-x^2
xcub<-x^3
xquar<-x^4
x5<-x^5
x6<-x^6
x7<-x^7
x8<-x^8
x9<-x^9
x10<-x^10
x11<-x^11
x12<-x^12
x13<-x^13
x14<-x^14
x15<-x^15
x16<-x^16
x17<-x^17
x18<-x^18
x19<-x^19
x20<-x^20
x21<-x^21
x22<-x^22
x23<-x^23
x24<-x^24
x25<-x^25
plot(x,y, pch=20, xlab="Days since 01/01/1995", ylab="Gas Price")
xv<-seq(min(x),max(x),1)
yv<-predict(fit25, list(x=xv, xsq=xv^2, xcub=xv^3, xquar=xv^4, x5=xv^5, x6=xv^6,x7=xv^7,x8=xv^8,x9=xv^9,x10=xv^10,x11=xv^11,x12=xv^12,x13=xv^13,x14=xv^14,x15=xv^15,x16=xv^16,x17=xv^17,x18=xv^18,x19=xv^19,x20=xv^20,x21=xv^21,x22=xv^22,x23=xv^23,x24=xv^24,x25=xv^25))
lines(xv,yv, col = "blue", lwd=3)
summary(fit25)
set.seed(100)
sample <- sample(c(TRUE, FALSE), nrow(Gas), replace=TRUE, prob=c(0.9,0.1))
train <- Gas[sample, ]
test <- Gas[!sample, ]
for (i in 1:25) {
PredictionAccuracy=mean((predict(lm(PriceRegular~poly(Date,i), data=train),test)-test$PriceRegular)^2)
print(PredictionAccuracy)
}
## [1] 0.3967022
## [1] 0.2611901
## [1] 0.2106733
## [1] 0.1826787
## [1] 0.1668109
## [1] 0.1660173
## [1] 0.1633775
## [1] 0.1560798
## [1] 0.1501038
## [1] 0.1491021
## [1] 0.1129385
## [1] 0.1045485
## [1] 0.09304121
## [1] 0.09165833
## [1] 0.0901241
## [1] 0.08982945
## [1] 0.09060015
## [1] 0.09018333
## [1] 0.08970467
## [1] 0.08891765
## [1] 0.08604971
## [1] 0.08236128
## [1] 0.08300596
## [1] 0.08014616
## [1] 0.08017713
Gas1<-Gas%>%
filter(as.numeric(Date)<3000)
Gas7<-Gas%>%
filter(as.numeric(Date)>3000 & as.numeric(Date)<4936)
Gas2<-Gas%>%
filter(as.numeric(Date)>4936 & as.numeric(Date)<5111)
Gas3<-Gas%>%
filter(as.numeric(Date)>5111 & as.numeric(Date)<5972)
Gas4<-Gas%>%
filter(as.numeric(Date)>5972 & as.numeric(Date)<7120)
Gas5<-Gas%>%
filter(as.numeric(Date)>7120 & as.numeric(Date)<7722)
Gas6<-Gas%>%
filter(as.numeric(Date)>7722 & as.numeric(Date)<8548)
Gas8<-Gas%>%
filter(as.numeric(Date)>8548)
plot(PriceRegular~Date, data=Gas, xlab="Days since 01/01/1995", ylab="Gas Price", pch=19)
sec1<- lm(PriceRegular~Date, data=Gas1)
ablineclip(sec1, col = "red", lwd=3,x1=0,x2=3000)
sec2<- lm(PriceRegular~Date, data=Gas7)
ablineclip(sec2, col = "green", lwd=3,x1=3000,x2=4936)
sec3<- lm(PriceRegular~Date, data=Gas2)
ablineclip(sec3, col = "blue",lwd=3,x1=4936,x2=5111)
sec4<- lm(PriceRegular~Date, data=Gas3)
ablineclip(sec4, col = "orange",lwd=3,x1=5111,x2=5972)
sec5<- lm(PriceRegular~Date, data=Gas4)
ablineclip(sec5, col = "purple",lwd=3,x1=5972,x2=7120)
sec6<- lm(PriceRegular~Date, data=Gas5)
ablineclip(sec6, col = "yellow",lwd=3,x1=7120,x2=7722)
sec7<- lm(PriceRegular~Date, data=Gas6)
ablineclip(sec7, col = "green",lwd=3,x1=7722,x2=8548)
sec8<- lm(PriceRegular~Date, data=Gas8)
ablineclip(sec8, col = "red",lwd=3,x1=8548)
dataframe<- data.frame(x=as.numeric(Gas$Date)-9132,y=Gas$PriceRegular)
fit <- lm(y~x, data=dataframe)
segmented.fit <- segmented(fit, seg.Z = ~x, psi = c(3003,4936,5111,5972,7120,7722,8548))
summary(segmented.fit)
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = fit, seg.Z = ~x, psi = c(3003, 4936, 5111,
## 5972, 7120, 7722, 8548))
##
## Estimated Break-Point(s):
## Est. St.Err
## psi1.x 3146.779 33.834
## psi2.x 5023.143 4.505
## psi3.x 5071.675 4.578
## psi4.x 6046.010 25.679
## psi5.x 7077.593 23.998
## psi6.x 7687.075 17.068
## psi7.x 8609.209 30.580
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.049e+00 1.949e-02 53.844 <2e-16 ***
## x 1.360e-04 1.073e-05 12.673 <2e-16 ***
## U1.x 9.442e-04 2.570e-05 36.738 NA
## U2.x -3.477e-02 5.589e-03 -6.222 NA
## U3.x 3.548e-02 5.589e-03 6.347 NA
## U4.x -1.905e-03 8.454e-05 -22.532 NA
## U5.x -2.338e-03 1.385e-04 -16.881 NA
## U6.x 3.349e-03 1.436e-04 23.325 NA
## U7.x -1.673e-03 9.663e-05 -17.313 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.207 on 1345 degrees of freedom
## Multiple R-Squared: 0.9414, Adjusted R-squared: 0.9407
##
## Boot restarting based on 6 samples. Last fit:
## Convergence attained in 4 iterations (rel. change 3.8081e-06)
plot(dataframe$x, dataframe$y, pch=20,xlab="Days since 01/01/1995", ylab="GasTesting Price")
plot(segmented.fit, add=T, lwd=3, col = "royalblue1")
abline(v=c(3003,4936,5111,5972,7120,7722,8548),lty=2,col="darkgreen")

summary(fit)
##
## Call:
## lm(formula = y ~ x, data = dataframe)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4097 -0.4140 -0.1588 0.3802 1.8515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.209e+00 3.333e-02 36.29 <2e-16 ***
## x 2.134e-04 6.062e-06 35.20 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6151 on 1359 degrees of freedom
## Multiple R-squared: 0.4769, Adjusted R-squared: 0.4765
## F-statistic: 1239 on 1 and 1359 DF, p-value: < 2.2e-16
fit2<-lm(PriceRegular~bs(Date,knots=c(3003,4936,5111,5972,7120,7722,8548)),data=Gas)
plot(dataframe$x, dataframe$y, pch=20,xlab="Days since 01/01/1995", ylab="GasTesting Price")
Datelims<-range(Gas$Date)
Date.grid<-seq(from=Datelims[1], to = Datelims[2])
points(Date.grid,predict(fit2,newdata = list(Date=Date.grid)),col="royalblue1", lwd=3,type="l")
abline(v=c(3003,4936,5111,5972,7120,7722,8548),lty=2,col="darkgreen")
fit3<-smooth.spline(as.numeric(Gas$Date)-9132,Gas$PriceRegular, nknots=200)
plot(dataframe$x, dataframe$y, pch=20,xlab="Days since 01/01/1995", ylab="GasTesting Price")
lines(fit3,col="red",lwd=3)

fit3$fit
## $knot
## [1] 0.000000000 0.000000000 0.000000000 0.000000000 0.004411765 0.009558824
## [7] 0.014705882 0.019852941 0.025000000 0.030147059 0.034558824 0.039705882
## [13] 0.044852941 0.050000000 0.055147059 0.060294118 0.064705882 0.069852941
## [19] 0.075000000 0.080147059 0.085294118 0.090441176 0.094852941 0.100000000
## [25] 0.105147059 0.110294118 0.115441176 0.120588235 0.125000000 0.130147059
## [31] 0.135294118 0.140441176 0.145588235 0.150735294 0.155147059 0.160294118
## [37] 0.165441176 0.170588235 0.175735294 0.180882353 0.185294118 0.190441176
## [43] 0.195588235 0.200735294 0.205882353 0.211029412 0.215441176 0.220588235
## [49] 0.225735294 0.230882353 0.236029412 0.241176471 0.245588235 0.250735294
## [55] 0.255882353 0.261029412 0.266176471 0.271323529 0.275735294 0.280882353
## [61] 0.286029412 0.291176471 0.296323529 0.301470588 0.305882353 0.311029412
## [67] 0.316176471 0.321323529 0.326470588 0.331617647 0.336029412 0.341176471
## [73] 0.346323529 0.351470588 0.356617647 0.361764706 0.366176471 0.371323529
## [79] 0.376470588 0.381617647 0.386764706 0.391911765 0.396323529 0.401470588
## [85] 0.406617647 0.411764706 0.416911765 0.422058824 0.426470588 0.431617647
## [91] 0.436764706 0.441911765 0.447058824 0.452205882 0.456617647 0.461764706
## [97] 0.466911765 0.472058824 0.477205882 0.482352941 0.486764706 0.491911765
## [103] 0.497058824 0.502205882 0.507352941 0.512500000 0.516911765 0.522058824
## [109] 0.527205882 0.532352941 0.537500000 0.542647059 0.547058824 0.552205882
## [115] 0.557352941 0.562500000 0.567647059 0.572794118 0.577205882 0.582352941
## [121] 0.587500000 0.592647059 0.597794118 0.602941176 0.607352941 0.612500000
## [127] 0.617647059 0.622794118 0.627941176 0.633088235 0.637500000 0.642647059
## [133] 0.647794118 0.652941176 0.658088235 0.663235294 0.667647059 0.672794118
## [139] 0.677941176 0.683088235 0.688235294 0.693382353 0.697794118 0.702941176
## [145] 0.708088235 0.713235294 0.718382353 0.723529412 0.727941176 0.733088235
## [151] 0.738235294 0.743382353 0.748529412 0.753676471 0.758088235 0.763235294
## [157] 0.768382353 0.773529412 0.778676471 0.783823529 0.788235294 0.793382353
## [163] 0.798529412 0.803676471 0.808823529 0.813970588 0.818382353 0.823529412
## [169] 0.828676471 0.833823529 0.838970588 0.844117647 0.848529412 0.853676471
## [175] 0.858823529 0.863970588 0.869117647 0.874264706 0.878676471 0.883823529
## [181] 0.888970588 0.894117647 0.899264706 0.904411765 0.908823529 0.913970588
## [187] 0.919117647 0.924264706 0.929411765 0.934558824 0.938970588 0.944117647
## [193] 0.949264706 0.954411765 0.959558824 0.964705882 0.969117647 0.974264706
## [199] 0.979411765 0.984558824 0.989705882 0.994852941 1.000000000 1.000000000
## [205] 1.000000000 1.000000000
##
## $nk
## [1] 202
##
## $min
## [1] 0
##
## $range
## [1] 9520
##
## $coef
## [1] 1.0813010 1.0835233 1.0763412 1.0521964 1.2380042 1.1550538 1.1135713
## [8] 1.0978307 1.0374048 1.1158713 1.0561876 1.2991488 1.2680506 1.2110898
## [15] 1.1921277 1.2178520 1.2396570 1.2454356 1.1824480 1.2216981 1.1653249
## [22] 1.2176655 1.2429098 1.1527628 1.1163228 1.0191997 1.0107355 1.0817135
## [29] 1.0658016 0.9932621 1.0420898 0.9554281 0.9312271 0.8891676 1.2454290
## [36] 1.0367969 1.2273037 1.2593804 1.2396369 1.2645477 1.3072989 1.5848347
## [43] 1.3736242 1.7710218 1.3343210 1.6510166 1.4555777 1.4527138 1.4176609
## [50] 1.4515703 1.9076000 1.1829598 1.6562596 1.2380012 1.0781096 1.0609021
## [57] 1.2262384 1.4891081 1.3321847 1.4260983 1.3817174 1.4862457 1.2815362
## [64] 1.6661149 1.7095678 1.4588074 1.4379315 1.7285098 1.6154684 1.4341339
## [71] 1.5296608 1.7323574 1.7704976 2.1075306 1.8659785 1.8012970 2.1146617
## [78] 1.8257273 1.7567686 2.1614522 2.2416985 2.0968952 2.3196918 3.3100396
## [85] 1.9830780 2.3611576 2.1727168 2.6463432 2.9965419 2.8955509 3.0899920
## [92] 1.9270767 2.4433258 2.1432726 2.3354865 3.0426639 3.1913785 2.8606720
## [99] 2.6961260 2.9278737 3.1477688 2.8989521 3.2972374 3.5905212 4.4105010
## [106] 3.5936054 4.0147369 1.6444775 1.6196601 2.0814927 1.8045684 2.5593435
## [113] 2.6337081 2.5460896 2.5750605 2.6498801 2.6745599 2.6703153 2.9925712
## [120] 2.6524429 2.7814414 2.6711024 2.8301476 3.0357022 3.0558507 3.7042772
## [127] 4.0740999 3.4735516 3.7786419 3.4545117 3.3898951 3.1724817 3.7548286
## [134] 4.0325194 3.6266565 3.2469749 4.0214279 3.7271701 3.1706875 3.3774575
## [141] 3.8975668 3.4267674 3.6918973 3.5670992 3.5761213 3.1371957 3.3441986
## [148] 3.2659384 3.6627984 3.7003331 3.6712671 3.4964743 3.2425488 2.9678633
## [155] 1.8479429 2.4819010 2.4395216 2.8898950 2.8164542 2.4539086 2.1867513
## [162] 2.1438331 1.6966570 1.9218029 2.2521931 2.4525459 2.0317902 2.3657840
## [169] 2.0819312 2.3904662 2.2769944 2.3581156 2.4967368 2.1362515 2.5829642
## [176] 2.5798935 2.4544152 2.5956501 2.5131654 2.8197374 2.9683272 2.7894123
## [183] 2.8509570 2.9179525 2.2720651 2.1489285 2.6617456 2.9656101 2.6736067
## [190] 2.6671832 2.5612694 2.6632985 2.4547247 2.6362560 1.7362384 1.8903514
## [197] 2.2776674 2.1526233 2.1812817 2.0381631 2.3268831 2.3972805
##
## attr(,"class")
## [1] "smooth.spline.fit"
library(npreg)
#mod.ss <- with(Gas, ss(Date, PriceRegular),nknots=8)
mod.ss <- ss(Gas$Date, Gas$PriceRegular,nknots=14)
ss(x = Gas$Date, y = Gas$PriceRegular, nknots = 8)
##
## Call:
## ss(x = Gas$Date, y = Gas$PriceRegular, nknots = 8)
##
## Smoothing Parameter spar = -0.1720181 lambda = 3.407981e-09
## Equivalent Degrees of Freedom (Df) 8.991439
## Penalized Criterion (RSS) 147.2724
## Generalized Cross-Validation (GCV) 0.1096531
summary(mod.ss)
##
## Call:
## ss(x = Gas$Date, y = Gas$PriceRegular, nknots = 14)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.104584 -0.110088 0.005267 0.093147 1.332995
##
## Approx. Signif. of Parametric Effects:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.2257 0.007561 294.37 0 ***
## x 0.8531 0.076945 11.09 0 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approx. Signif. of Nonparametric Effects:
## Df Sum Sq Mean Sq F value Pr(>F)
## s(x) 12.58 408.7 32.48495 417.5 0 ***
## Residuals 1346.42 104.8 0.07781
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2789 on 1346 degrees of freedom
## Multiple R-squared: 0.8934, Adjusted R-squared: 0.8923
## F-statistic: 830.4 on 13.58 and 1346 DF, p-value: <2e-16
plot(mod.ss)
