Gas <- read_csv("~/Senior Project/Gas.csv")
Gas  <- Gas %>% rename(PriceRegular=R1)
plot(PriceRegular~Date, data=Gas)

GasPrices <- Gas %>%
  select(PriceRegular, Date) %>%
  mutate(Date=as.numeric(Date)-9131)
head(Gas)
plot(PriceRegular~Date, data=GasPrices, pch=20, xlab="Days since 01/01/1995", ylab="Price of Regular Gas")
LinearModel<-lm(PriceRegular~Date, data=GasPrices)
abline(LinearModel, col = "red", lwd=3)

summary(LinearModel)
## 
## Call:
## lm(formula = PriceRegular ~ Date, data = GasPrices)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4097 -0.4140 -0.1588  0.3802  1.8515 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.209e+00  3.333e-02   36.28   <2e-16 ***
## Date        2.134e-04  6.062e-06   35.20   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6151 on 1359 degrees of freedom
## Multiple R-squared:  0.4769, Adjusted R-squared:  0.4765 
## F-statistic:  1239 on 1 and 1359 DF,  p-value: < 2.2e-16
y<- GasPrices$PriceRegular
x<- as.numeric(GasPrices$Date)
glimpse(x)
##  num [1:1361] 1 8 15 22 29 36 43 50 57 64 ...
xsq<-x^2
xcub<-x^3
xquar<-x^4
x5<-x^5
x6<-x^6
x7<-x^7
x8<-x^8
x9<-x^9
x10<-x^10
x11<-x^11
x12<-x^12
x13<-x^13
x14<-x^14
x15<-x^15
x16<-x^16
x17<-x^17
x18<-x^18
x19<-x^19
x20<-x^20
x21<-x^21
x22<-x^22
x23<-x^23
x24<-x^24
x25<-x^25
plot(x,y, pch=20, xlab="Days since 01/01/1995", ylab="Gas Price")
fit1<- lm(y~x)
# summary(fit1)
abline(fit1, col = "red", lwd=3)
fit2<- lm(y~x+xsq)
# summary(fit2)
xv<-seq(min(x),max(x),1)
yv<-predict(fit2, list(x=xv, xsq=xv^2))
lines(xv,yv, col = "green", lwd=3)

# fit3<- lm(y~x+xsq+xcub)
# summary(fit3)
# xv<-seq(min(x),max(x),1)
# yv<-predict(fit3, list(x=xv, xsq=xv^2, xcub=xv^3))
# lines(xv,yv, col = "blue", lwd=3)
# # fit4<- lm(y~poly(x,4))
# summary(fit4)
# xv<-seq(min(x),max(x),1)
# yv<-predict(fit4, list(x=xv, xsq=xv^2, xcub=xv^3, xquar=xv^4))
# lines(xv,yv, col = "purple")
# fit5<- lm(y~poly(x,5))
# #summary(fit5)$adj.r.squared
# xv<-seq(min(x),max(x),1)
# yv<-predict(fit5, list(x=xv, xsq=xv^2, xcub=xv^3, xquar=xv^4, x5=xv^5))
# lines(xv,yv, col = "orange")
# fit25<- lm(y~poly(x,25))
# summary(fit10)$adj.r.squared
# xv<-seq(min(x),max(x),1)
# yv<-predict(fit25, list(x=xv, xsq=xv^2, xcub=xv^3, xquar=xv^4, x5=xv^5, x6=xv^6,x7=xv^7,x8=xv^8,x9=xv^9,x10=xv^10,x11=xv^11,x12=xv^12,x13=xv^13,x14=xv^14,x15=xv^15,x16=xv^16,x17=xv^17,x18=xv^18,x19=xv^19,x20=xv^20,x21=xv^21,x22=xv^22,x23=xv^23,x24=xv^24,x25=xv^25))
# lines(xv,yv, col = "orange", lwd=3)
# summary(fit25)
Gas1<-GasPrices%>%
  filter(as.numeric(Date)<3000)
Gas7<-GasPrices%>%
  filter(as.numeric(Date)>3000 & as.numeric(Date)<4936)
Gas2<-GasPrices%>%
  filter(as.numeric(Date)>4936 & as.numeric(Date)<5111)
Gas3<-GasPrices%>%
  filter(as.numeric(Date)>5111 & as.numeric(Date)<5972)
Gas4<-GasPrices%>%
  filter(as.numeric(Date)>5972 & as.numeric(Date)<7120)
Gas5<-GasPrices%>%
  filter(as.numeric(Date)>7120 & as.numeric(Date)<7722)
Gas6<-GasPrices%>%
  filter(as.numeric(Date)>7722 & as.numeric(Date)<8548)
Gas8<-GasPrices%>%
  filter(as.numeric(Date)>8548)
plot(PriceRegular~Date, data=GasPrices,  xlab="Days since 01/01/1995", ylab="Gas Price")
sec1<- lm(PriceRegular~Date, data=Gas1)
ablineclip(sec1, col = "red", lwd=3,x1=0,x2=3000)
sec2<- lm(PriceRegular~Date, data=Gas7)
ablineclip(sec2, col = "green", lwd=3,x1=3000,x2=4936)
sec3<- lm(PriceRegular~Date, data=Gas2)
ablineclip(sec3, col = "blue",lwd=3,x1=4936,x2=5111)
sec4<- lm(PriceRegular~Date, data=Gas3)
ablineclip(sec4, col = "orange",lwd=3,x1=5111,x2=5972)
sec5<- lm(PriceRegular~Date, data=Gas4)
ablineclip(sec5, col = "purple",lwd=3,x1=5972,x2=7120)
sec6<- lm(PriceRegular~Date, data=Gas5)
ablineclip(sec6, col = "yellow",lwd=3,x1=7120,x2=7722)
sec7<- lm(PriceRegular~Date, data=Gas6)
ablineclip(sec7, col = "green",lwd=3,x1=7722,x2=8548)
sec8<- lm(PriceRegular~Date, data=Gas8)
ablineclip(sec8, col = "red",lwd=3,x1=8548)

dataframe<- data.frame(x=as.numeric(Gas$Date)-9132,y=Gas$PriceRegular)
fit <- lm(y~x, data=dataframe)
segmented.fit <- segmented(fit, seg.Z = ~x, psi = c(3003,4936,5111,5972,7120,7722,8548))
summary(segmented.fit)
## 
##  ***Regression Model with Segmented Relationship(s)***
## 
## Call: 
## segmented.lm(obj = fit, seg.Z = ~x, psi = c(3003, 4936, 5111, 
##     5972, 7120, 7722, 8548))
## 
## Estimated Break-Point(s):
##             Est. St.Err
## psi1.x 3146.779 33.834
## psi2.x 5023.143  4.505
## psi3.x 5071.675  4.578
## psi4.x 6046.010 25.679
## psi5.x 7077.593 23.998
## psi6.x 7687.075 17.068
## psi7.x 8609.209 30.580
## 
## Meaningful coefficients of the linear terms:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.049e+00  1.949e-02  53.844   <2e-16 ***
## x            1.360e-04  1.073e-05  12.673   <2e-16 ***
## U1.x         9.442e-04  2.570e-05  36.738       NA    
## U2.x        -3.477e-02  5.589e-03  -6.222       NA    
## U3.x         3.548e-02  5.589e-03   6.347       NA    
## U4.x        -1.905e-03  8.454e-05 -22.532       NA    
## U5.x        -2.338e-03  1.385e-04 -16.881       NA    
## U6.x         3.349e-03  1.436e-04  23.325       NA    
## U7.x        -1.673e-03  9.663e-05 -17.313       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.207 on 1345 degrees of freedom
## Multiple R-Squared: 0.9414,  Adjusted R-squared: 0.9407 
## 
## Boot restarting based on 6 samples. Last fit:
## Convergence attained in 4 iterations (rel. change 3.8081e-06)
plot(dataframe$x, dataframe$y, pch=20,xlab="Days since 01/01/1995", ylab="GasTesting Price")
plot(segmented.fit, add=T, lwd=3, col = "royalblue1")
abline(v=c(3003,4936,5111,5972,7120,7722,8548),lty=2,col="darkgreen")

fit3<-smooth.spline(as.numeric(Gas$Date)-9132,Gas$PriceRegular, cv=TRUE, df=20)
plot(dataframe$x, dataframe$y, pch=20,xlab="Days since 01/01/1995", ylab="GasTesting Price")
lines(fit3,col="red",lwd=3)

fit3$knots
## NULL
library(npreg)
#mod.ss <- with(Gas, ss(Date, PriceRegular),nknots=8)
mod.ss <- ss(GasPrices$Date, GasPrices$PriceRegular,nknots=14)
ss(x = GasPrices$Date, y = GasPrices$PriceRegular, nknots = 8)
## 
## Call:
## ss(x = GasPrices$Date, y = GasPrices$PriceRegular, nknots = 8)
## 
## Smoothing Parameter  spar = -0.1720181   lambda = 3.407981e-09
## Equivalent Degrees of Freedom (Df) 8.991439
## Penalized Criterion (RSS) 147.2724
## Generalized Cross-Validation (GCV) 0.1096531
summary(mod.ss)
## 
## Call:
## ss(x = GasPrices$Date, y = GasPrices$PriceRegular, nknots = 14)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -1.104584 -0.110088  0.005267  0.093147  1.332995 
## 
## Approx. Signif. of Parametric Effects:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.2257   0.007561  294.37        0 ***
## x             0.8531   0.076945   11.09        0 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Approx. Signif. of Nonparametric Effects:
##                Df Sum Sq  Mean Sq F value Pr(>F)    
## s(x)        12.58  408.7 32.48495   417.5      0 ***
## Residuals 1346.42  104.8  0.07781                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.2789 on 1346 degrees of freedom
## Multiple R-squared:  0.8934,    Adjusted R-squared:  0.8923
## F-statistic: 830.4 on 13.58 and 1346 DF,  p-value: <2e-16
plot(mod.ss)

gam_mod <- gam(PriceRegular ~ s(Date, k=3), data=GasPrices, method="REML")
coef(gam_mod)
## (Intercept)   s(Date).1   s(Date).2 
##   2.2251705   1.4988632   0.5868904
bs<-bs(GasPrices$Date, knots=c(3003,4936,5111,5972,7120,7722,8548), degree=3)
library(splines)

# Set up input values and knots
x <- seq(1, 9521, length.out = 1360)
knots <- c(3003, 4936, 5111, 5972, 7120, 7722, 8548)

# Compute B-spline basis functions
B <- bs(x, degree = 3, knots = knots)

# Plot basis functions
plot(x, B[,1], type = "l", lty = 1, col = 1, xlab = "x", ylab = "Basis functions", ylim = c(0, 1), xlim = c(0, 10000))
for(i in 2:dim(B)[2]) lines(x, B[,i], type = "l", lty = 1, col = i)

# Add legend
#legend("topright", legend = 1:dim(B)[2], col = 1:dim(B)[2], lty = 1, title = "Basis functions")
library(splines)

# Create a sequence of x values
x <- seq(1, 9521, length.out = 1361)

# Define the knot locations
knots <- c(3003, 4936, 5111, 5972, 7120, 7722, 8548)

# Create the b-spline basis functions
B <- bs(x, knots=knots, degree = 3, intercept = TRUE)

# Plot the basis functions
plot(x, B[,1], type = "l", lty = 1, col = 1, xlab = "x", ylab = "Basis functions", ylim = c(0, 1), xlim = c(0, 10000))
for(i in 2:dim(B)[2]) lines(x, B[,i], type = "l", lty = 1, col = i)

# Fit a linear regression model with the B-spline basis functions
model <- lm(PriceRegular ~ B - 1, data = GasPrices)

# Get the coefficients
coefficients <- coef(model)
# Create a sequence of x values
x <- seq(1, 9521, length.out = 1361)

# Define the knot locations
knots <- c(3003, 4936, 5111, 5972, 7120, 7722, 8548)

# Create the b-spline basis functions
B <- bs(GasPrices$Date, knots=knots, degree = 3, intercept = TRUE)

# Fit the regression spline
fit <- lm(PriceRegular ~ B - 1, data = GasPrices)

# Predict the values of the regression spline
yfit <- predict(fit, newdata = list(B = B))

# Plot the regression spline and basis functions
plot(GasPrices$Date, GasPrices$PriceRegular, xlab = "Days since 01/01/1995", ylab = "Gas Price", ylim=c(0,4.5))
lines(GasPrices$Date, yfit, type = "l", col = "red", lwd = 3)
for(i in 1:dim(B)[2]) lines(x, coef(fit)[i] * B[,i], type = "l", lty = 1, col = i+1)

# Create a sequence of x values
x <- seq(1, 9521, length.out = 1361)

# Define the knot locations
knots <- c(3003, 4936, 5111, 5972, 7120, 7722, 8548)

# Create the b-spline basis functions
B <- bs(GasPrices$Date, knots=knots, degree = 3, intercept = TRUE)

# Fit the regression spline
fit <- lm(PriceRegular ~ B - 1, data = GasPrices)

# Predict the values of the regression spline
yfit <- predict(fit, newdata = list(B = B))

# Plot the regression spline and basis functions
plot(GasPrices$Date, GasPrices$PriceRegular, xlab = "Days since 01/01/1995", ylab = "Gas Price", ylim=c(0,4.5))
lines(GasPrices$Date, yfit, type = "l", col = "red", lwd = 3)
for(i in 1:dim(B)[2]) lines(x, coef(fit)[i] * B[,i], type = "l", lty = 1, col = i+1)

x <- seq(1, 9521, length.out = 1361)

# Define the knot locations
knots <- c(3003, 4936, 5111, 5972, 7120, 7722, 8548)

# Create the b-spline basis functions
B <- bs(GasPrices$Date, df=6, degree = 3, intercept = TRUE)

# Fit the regression spline
fit <- lm(PriceRegular ~ B - 1, data = GasPrices)

# Predict the values of the regression spline
yfit <- predict(fit, newdata = list(B = B))

# Plot the regression spline and basis functions
plot(GasPrices$Date, GasPrices$PriceRegular, xlab = "Days since 01/01/1995", ylab = "Gas Price", ylim=c(0,4.5))
lines(GasPrices$Date, yfit, type = "l", col = "red", lwd = 3)
for(i in 1:dim(B)[2]) lines(x, coef(fit)[i] * B[,i], type = "l", lty = 1, col = i+1)

plot(dataframe$x, dataframe$y, pch=20,xlab="Days since 01/01/1995", ylab="GasTesting Price")
Datelims<-range(Gas$Date)
Date.grid<-seq(from=Datelims[1], to = Datelims[2])
points(Date.grid,predict(fit2,newdata = list(Date=Date.grid)),col="royalblue1", lwd=3,type="l")
abline(v=c(3003,4936,5111,5972,7120,7722,8548),lty=2,col="darkgreen")
smooth<-smooth.spline(as.numeric(GasPrices$Date),GasPrices$PriceRegular, cv=TRUE)
plot(dataframe$x, dataframe$y, pch=20,xlab="Days since 01/01/1995", ylab="GasTesting Price")
lines(fit3,col="red",lwd=3)

fit3$df
## [1] 19.99711