R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

final_function <- function(file_path){
df=read.csv(file_path) %>% as_tibble()
df <- df %>% mutate(quantity = as.numeric(quantity))
df <- df %>% mutate(price = as.numeric(price))
market = df$Market[1]
df = df %>% group_by(Date) %>% summarise(quantity = sum(quantity), price = max(price))
df1=zoo(df$price,order.by=as.Date(df$Date,format="%d-%b-%y"))
df2=zoo(df$quantity,order.by=as.Date(df$Date,format="%d-%b-%y"))
df1=na.omit(df1)
df2=na.omit(df2)
x=dsa(df1)
pre_quantity=dsa(df2)
quantity_pre1=pre_quantity$sa_result$seas_adj
x6=x$sa_result$seas_adj
data=na.omit(abs(diff(x6,1)))
print(adf.test(data$seas_adj))
print(pp.test(data$seas_adj))
quantity=na.omit(abs(diff(quantity_pre1,1)))
print(adf.test(quantity$seas_adj))
print(pp.test(quantity$seas_adj))
p=ggplot(data,aes(x=seas_adj))+geom_density(n=131072)
v=ggplot_build(p)
final_data=cbind.data.frame(v$data[[1]]$x,v$data[[1]]$y,v$data[[1]]$density)
colnames(final_data)=c("X","Y","Density")
x_dif=diff(final_data$X,1)
y_dif=diff(final_data$Y,1)
if(round(max(final_data$Y),1)==0){
  round2=3
}else{round2=2}
maxy=which.max(final_data$Y)
miny=which(final_data$Y<1e-03)
miny=min(miny)
a1<-which.min(final_data$X[c(maxy:miny)])
a2<-which.max(final_data$X[c(maxy:miny)])
a1<-c(final_data$X[a1],final_data$Y[a1])
a2<-c(final_data$X[a2],final_data$Y[a2])
a0<-rbind(a1,a2)
a3<-lm(a0[,2]~a0[,1])$coef
a4<-c(-a3[2],1)
a4<-a4/sqrt(sum(a4**2))
a5<-which.min(cbind(final_data$X,final_data$Y)%*%a4)
final_data=final_data %>% mutate (sno=row_number())
slope=round(y_dif/x_dif,3)
final_data1=cbind.data.frame(x_dif,y_dif,slope)
final_data=final_data[-1,]
data_analysis=cbind.data.frame(final_data,final_data1)
round1=7
data_analysis=data_analysis %>% mutate(diffrentiator=case_when(slope>0~"NA",(slope==0 & Y<0.8*max(data_analysis$Y))~"B",TRUE~"A"))
data_analysis=data_analysis %>% mutate(area=Y*x_dif)
sumAB=(data_analysis %>% filter(diffrentiator!="NA")%>% summarise(sumAB=sum(area)))$sumAB
sumA=(data_analysis %>% filter(diffrentiator=="A")  %>% summarise(sumAB=sum(area)))$sumAB
ratio1=sumA/sumAB
sumAx=(data_analysis %>% filter(diffrentiator=="A") %>% summarise(sumAB=sum(X)))$sumAB
sumABx=(data_analysis %>% filter(diffrentiator!="NA") %>% summarise(sumAB=sum(X)))$sumAB
# meanA=sumA/count(data_analysis %>% filter(diffrentiator=="A") %>% select(Y))$n
# meanAB=sumAB/count(data_analysis %>% filter(diffrentiator!="NA") %>% select(Y))$n
# arrA=sum(((data_analysis %>% filter(diffrentiator=="A") %>% select(Y))-meanA)^2)
# arrAB=sum(((data_analysis %>% filter(diffrentiator!="NA") %>% select(Y))-meanAB)^2)
varA=(data_analysis %>% filter(diffrentiator=="A")  %>% summarise(sumAB=var(area)))$sumAB
varB=(data_analysis %>% filter(diffrentiator!="B") %>% summarise(sumAB=var(area)))$sumAB
varAplusB=varA + varB
varAB=(data_analysis %>% filter(diffrentiator!="NA") %>% summarise(sumAB=var(area)))$sumAB
collusion=(sumA/sumAB)*(varA/varAplusB)
A_df=data_analysis %>% filter(diffrentiator=="A") %>% filter(area>10^-round1)
AB_df=data_analysis %>% filter(diffrentiator!="NA") %>% filter(area>10^-round1)
A_df=A_df %>% mutate(xy=X*Y)
AB_df=AB_df %>% mutate(xy=X*Y)
meanA1=sum(A_df$xy)/sum(A_df$Y)
meanAB1=sum(AB_df$xy)/sum(AB_df$Y)
A_df=A_df %>% mutate(varn=Y*(X-meanA1)^2)
AB_df=AB_df %>% mutate(varn=Y*(X-meanA1)^2)
varA1=sum(A_df$varn)/sum(A_df$Y)
varAB1=sum(AB_df$varn)/sum(AB_df$Y)
ratio=sum(A_df$xy)/sum(AB_df$xy)
collusion1=ratio*min(varA1/varAB1,1)
fin_graph = ggplot(data_analysis,aes(x=X,y=Y,fill=diffrentiator))+geom_area(col="black")+scale_fill_manual(values=c("red", "yellow", "azure2"))+theme_classic(base_size=10)+
  ggtitle(paste0(market))
print(fin_graph)
ggplotly(fin_graph)
ratio3=varA/varAplusB
output_df=tibble(sumA,sumAB,collusionmeasure1=ratio1,varA=format(varA,format="e",digits=2),varB=format(varB,format="e",digits=2),varAplusB=format(varAplusB,format="e",digits=2),varratio=format(ratio3,format="e",digits=2),varAB=format(varAB,format="e",digits=2),collusionmeasure2=collusion)
print(output_df)
(datatable(output_df))
print(summary(lm(quantity$seas_adj~data$seas_adj)))
print(summary(lm(log(quantity$seas_adj)~data$seas_adj)))
quantity_diff=diff(log(quantity$seas_adj))
price_diff=diff(log(x6))
new_df=cbind.data.frame(price_diff[1:length(quantity_diff)],quantity_diff)
colnames(new_df)=c("price","quantity")
models <- auto_ardl(quantity~price, data = new_df, max_order = 10)
print(models)
ardl=models$best_model
print(summary(ardl))
print(pacf(na.omit(new_df$quantity),title="Quantity"))
print(acf(na.omit(new_df$quantity),title="Quantity"))
ar=arima(na.omit(new_df$quantity),order=c(5,0,0))
print(summary(ar))
print(ArchTest(quantity_diff,lags=5,demean=T))
ehatsq=ar$residuals^2
arch1=dynlm(ehatsq~L(ehatsq)+L(ehatsq,k=-2)+L(ehatsq,k=-3)+L(ehatsq,k=-4)+L(ehatsq,k=-5),data=ehatsq)
print(summary(arch1))
colnames(new_df)=c("log(diff(price))","log(diff(quantity))")
plot.ts(new_df)}
final_function("D:/DipankarSir/DipankarSir/dataanalysis_/Market1.csv")
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `quantity = as.numeric(quantity)`.
## Caused by warning:
## ! NAs introduced by coercion
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Warning in adf.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data$seas_adj
## Dickey-Fuller = -9.8967, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  data$seas_adj
## Dickey-Fuller Z(alpha) = -4281.8, Truncation lag parameter = 10,
## p-value = 0.01
## alternative hypothesis: stationary
## Warning in adf.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller = -8.4166, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller Z(alpha) = -3807.6, Truncation lag parameter = 10,
## p-value = 0.01
## alternative hypothesis: stationary

## # A tibble: 1 × 9
##    sumA sumAB collusionmeasure1 varA    varB    varAplusB varratio varAB  
##   <dbl> <dbl>             <dbl> <chr>   <chr>   <chr>     <chr>    <chr>  
## 1 0.587 0.667             0.879 3.4e-08 3.6e-08 7e-08     0.49     2.6e-09
## # ℹ 1 more variable: collusionmeasure2 <dbl>
## 
## Call:
## lm(formula = quantity$seas_adj ~ data$seas_adj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.059  -1.118  -0.983  -0.575  85.592 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.131310   0.062911   17.98   <2e-16 ***
## data$seas_adj 0.041640   0.002571   16.20   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.761 on 3946 degrees of freedom
## Multiple R-squared:  0.06234,    Adjusted R-squared:  0.06211 
## F-statistic: 262.4 on 1 and 3946 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = log(quantity$seas_adj) ~ data$seas_adj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.9862 -1.0153 -0.0476  0.9090  5.4931 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.420589   0.029037  -48.92   <2e-16 ***
## data$seas_adj  0.022771   0.001187   19.19   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.736 on 3946 degrees of freedom
## Multiple R-squared:  0.08536,    Adjusted R-squared:  0.08513 
## F-statistic: 368.3 on 1 and 3946 DF,  p-value: < 2.2e-16
## 
## $best_model
## 
## Time series regression with "ts" data:
## Start = 12, End = 3948
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Coefficients:
##     (Intercept)   L(quantity, 1)   L(quantity, 2)   L(quantity, 3)  
##       -0.002425        -0.836548        -0.698725        -0.577070  
##  L(quantity, 4)   L(quantity, 5)   L(quantity, 6)   L(quantity, 7)  
##       -0.515711        -0.470522        -0.318283        -0.114941  
##  L(quantity, 8)   L(quantity, 9)  L(quantity, 10)            price  
##       -0.105644        -0.112337        -0.058034         0.644118  
##     L(price, 1)      L(price, 2)      L(price, 3)      L(price, 4)  
##       -0.055423         2.290050        -0.823813         0.548551  
##     L(price, 5)      L(price, 6)      L(price, 7)      L(price, 8)  
##       -0.589573         1.817967        -0.732613         0.229483  
##     L(price, 9)     L(price, 10)  
##       -0.179546        -0.751213  
## 
## 
## $best_order
## quantity    price 
##       10       10 
## 
## $top_orders
##    quantity price      AIC
## 1        10    10 14109.36
## 2         9     6 14117.12
## 3         9     7 14118.37
## 4         9     8 14120.34
## 5         9     9 14122.34
## 6         7    10 14133.37
## 7         6    10 14134.30
## 8         8     6 14134.38
## 9         7     9 14135.10
## 10        8     7 14135.25
## 11        7     8 14135.79
## 12        6     9 14135.82
## 13        6     8 14136.50
## 14        7     7 14137.19
## 15        8     8 14137.21
## 16        6     7 14137.93
## 17        6     6 14140.03
## 18        5    10 14332.20
## 19        5     9 14334.50
## 20        5     8 14336.60
## 
## 
## Time series regression with "ts" data:
## Start = 12, End = 3948
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.4785 -0.7665  0.1517  0.9302  5.7290 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -0.002425   0.023075  -0.105 0.916300    
## L(quantity, 1)  -0.836548   0.015960 -52.416  < 2e-16 ***
## L(quantity, 2)  -0.698725   0.020766 -33.648  < 2e-16 ***
## L(quantity, 3)  -0.577070   0.023528 -24.527  < 2e-16 ***
## L(quantity, 4)  -0.515711   0.025215 -20.453  < 2e-16 ***
## L(quantity, 5)  -0.470522   0.026036 -18.072  < 2e-16 ***
## L(quantity, 6)  -0.318283   0.026035 -12.225  < 2e-16 ***
## L(quantity, 7)  -0.114941   0.025201  -4.561 5.25e-06 ***
## L(quantity, 8)  -0.105644   0.023497  -4.496 7.12e-06 ***
## L(quantity, 9)  -0.112337   0.020736  -5.418 6.41e-08 ***
## L(quantity, 10) -0.058034   0.015937  -3.641 0.000275 ***
## price            0.644118   0.878007   0.734 0.463228    
## L(price, 1)     -0.055423   0.893259  -0.062 0.950530    
## L(price, 2)      2.290050   0.892328   2.566 0.010314 *  
## L(price, 3)     -0.823813   0.894470  -0.921 0.357104    
## L(price, 4)      0.548551   0.895751   0.612 0.540313    
## L(price, 5)     -0.589573   0.892382  -0.661 0.508861    
## L(price, 6)      1.817967   0.894382   2.033 0.042155 *  
## L(price, 7)     -0.732613   0.893287  -0.820 0.412191    
## L(price, 8)      0.229483   0.892066   0.257 0.797000    
## L(price, 9)     -0.179546   0.888047  -0.202 0.839786    
## L(price, 10)    -0.751213   0.873004  -0.860 0.389571    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.448 on 3915 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.4309, Adjusted R-squared:  0.4279 
## F-statistic: 141.2 on 21 and 3915 DF,  p-value: < 2.2e-16
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter

## 
## Partial autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      1      2      3      4      5      6      7      8      9     10     11 
## -0.484 -0.328 -0.216 -0.154 -0.217 -0.223 -0.028 -0.010 -0.066 -0.057 -0.020 
##     12     13     14     15     16     17     18     19     20     21     22 
## -0.071 -0.088  0.017 -0.017 -0.023  0.007 -0.060 -0.012 -0.075 -0.009  0.030 
##     23     24     25     26     27     28     29     30     31     32     33 
##  0.029 -0.001 -0.011 -0.023 -0.078 -0.026  0.043  0.017 -0.054  0.034 -0.004 
##     34     35 
## -0.033  0.000
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter
## 
## Autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      0      1      2      3      4      5      6      7      8      9     10 
##  1.000 -0.484 -0.017  0.022 -0.004 -0.067  0.020  0.119 -0.071 -0.039  0.033 
##     11     12     13     14     15     16     17     18     19     20     21 
##  0.007 -0.056  0.027  0.080 -0.081  0.005  0.034 -0.058  0.038 -0.034  0.074 
##     22     23     24     25     26     27     28     29     30     31     32 
## -0.028 -0.012 -0.008 -0.005 -0.007 -0.016  0.067  0.004 -0.044 -0.029  0.074 
##     33     34     35 
## -0.062  0.000  0.062 
## 
## Call:
## arima(x = na.omit(new_df$quantity), order = c(5, 0, 0))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5  intercept
##       -0.7797  -0.6087  -0.4422  -0.3156  -0.2166    -0.0003
## s.e.   0.0155   0.0192   0.0204   0.0193   0.0156     0.0071
## 
## sigma^2 estimated as 2.223:  log likelihood = -7177.88,  aic = 14369.76
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample

##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  quantity_diff
## Chi-squared = 503.17, df = 5, p-value < 2.2e-16
## 
## 
## Time series regression with "ts" data:
## Start = 2, End = 3942
## 
## Call:
## dynlm(formula = ehatsq ~ L(ehatsq) + L(ehatsq, k = -2) + L(ehatsq, 
##     k = -3) + L(ehatsq, k = -4) + L(ehatsq, k = -5), data = ehatsq)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.553 -1.840 -1.265  0.257 60.330 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.70339    0.09443  18.039  < 2e-16 ***
## L(ehatsq)          0.05316    0.01589   3.346 0.000828 ***
## L(ehatsq, k = -2)  0.07848    0.01593   4.928 8.66e-07 ***
## L(ehatsq, k = -3)  0.01692    0.01597   1.059 0.289508    
## L(ehatsq, k = -4)  0.06461    0.01594   4.052 5.17e-05 ***
## L(ehatsq, k = -5)  0.02144    0.01594   1.345 0.178704    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.922 on 3935 degrees of freedom
## Multiple R-squared:  0.01614,    Adjusted R-squared:  0.01489 
## F-statistic: 12.91 on 5 and 3935 DF,  p-value: 1.77e-12

final_function("D:/DipankarSir/DipankarSir/dataanalysis_/Market2.csv")
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Warning in adf.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data$seas_adj
## Dickey-Fuller = -7.3639, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  data$seas_adj
## Dickey-Fuller Z(alpha) = -4721.8, Truncation lag parameter = 9, p-value
## = 0.01
## alternative hypothesis: stationary
## Warning in adf.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller = -4.7663, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller Z(alpha) = -4014.5, Truncation lag parameter = 9, p-value
## = 0.01
## alternative hypothesis: stationary
## # A tibble: 1 × 9
##    sumA sumAB collusionmeasure1 varA    varB    varAplusB varratio varAB  
##   <dbl> <dbl>             <dbl> <chr>   <chr>   <chr>     <chr>    <chr>  
## 1 0.492 0.758             0.649 1.3e-06 1.5e-06 2.8e-06   0.45     9.6e-09
## # ℹ 1 more variable: collusionmeasure2 <dbl>
## 
## Call:
## lm(formula = quantity$seas_adj ~ data$seas_adj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
##  -286.7  -285.7  -282.1  -245.4 21429.4 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   286.72823   22.04269  13.008   <2e-16 ***
## data$seas_adj  -0.04797    0.02778  -1.727   0.0843 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1339 on 3799 degrees of freedom
## Multiple R-squared:  0.0007843,  Adjusted R-squared:  0.0005213 
## F-statistic: 2.982 on 1 and 3799 DF,  p-value: 0.08429
## 
## 
## Call:
## lm(formula = log(quantity$seas_adj) ~ data$seas_adj)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.6312  -2.1167  -0.7795   1.9444   8.6990 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.287e+00  4.973e-02  25.876  < 2e-16 ***
## data$seas_adj -2.573e-04  6.267e-05  -4.106 4.12e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.021 on 3799 degrees of freedom
## Multiple R-squared:  0.004418,   Adjusted R-squared:  0.004155 
## F-statistic: 16.86 on 1 and 3799 DF,  p-value: 4.116e-05
## Warning in log(x6): NaNs produced

## $best_model
## 
## Time series regression with "zooreg" data:
## Start = 12, End = 3801
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Coefficients:
##     (Intercept)   L(quantity, 1)   L(quantity, 2)   L(quantity, 3)  
##        0.003893        -0.679812        -0.616234        -0.528485  
##  L(quantity, 4)   L(quantity, 5)   L(quantity, 6)   L(quantity, 7)  
##       -0.441802        -0.417166        -0.351064        -0.126048  
##  L(quantity, 8)   L(quantity, 9)  L(quantity, 10)            price  
##       -0.062333        -0.031608        -0.043994         0.052330  
##     L(price, 1)      L(price, 2)      L(price, 3)      L(price, 4)  
##       -0.046963         0.115404         0.064758         0.030704  
##     L(price, 5)      L(price, 6)      L(price, 7)      L(price, 8)  
##        0.088509         0.418879         0.167692        -0.044838  
##     L(price, 9)     L(price, 10)  
##       -0.292452        -0.267082  
## 
## 
## $best_order
## quantity    price 
##       10       10 
## 
## $top_orders
##    quantity price      AIC
## 1        10    10 13432.49
## 2         8    10 13435.21
## 3         9    10 13437.21
## 4         7    10 13437.22
## 5         6    10 13456.64
## 6         8     9 13467.66
## 7         9     9 13469.65
## 8         7     9 13469.75
## 9         8     8 13487.63
## 10        7     8 13490.02
## 11        6     9 13490.85
## 12        7     7 13509.11
## 13        6     8 13511.31
## 14        6     7 13530.27
## 15        6     6 13553.41
## 16        5    10 13693.83
## 17        5     9 13726.43
## 18        5     8 13745.60
## 19        5     7 13766.24
## 20        5     6 13787.93
## 
## 
## Time series regression with "zooreg" data:
## Start = 12, End = 3801
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.7608 -0.9402  0.1198  1.0010  9.1573 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.003893   0.029246   0.133  0.89410    
## L(quantity, 1)  -0.679812   0.017084 -39.792  < 2e-16 ***
## L(quantity, 2)  -0.616234   0.020655 -29.835  < 2e-16 ***
## L(quantity, 3)  -0.528485   0.023169 -22.810  < 2e-16 ***
## L(quantity, 4)  -0.441802   0.024766 -17.839  < 2e-16 ***
## L(quantity, 5)  -0.417166   0.025188 -16.562  < 2e-16 ***
## L(quantity, 6)  -0.351064   0.025197 -13.933  < 2e-16 ***
## L(quantity, 7)  -0.126048   0.024799  -5.083 3.92e-07 ***
## L(quantity, 8)  -0.062333   0.023155  -2.692  0.00714 ** 
## L(quantity, 9)  -0.031608   0.020629  -1.532  0.12556    
## L(quantity, 10) -0.043994   0.017029  -2.584  0.00982 ** 
## price            0.052330   0.154699   0.338  0.73518    
## L(price, 1)     -0.046963   0.179759  -0.261  0.79391    
## L(price, 2)      0.115404   0.182958   0.631  0.52823    
## L(price, 3)      0.064758   0.182900   0.354  0.72331    
## L(price, 4)      0.030704   0.172737   0.178  0.85893    
## L(price, 5)      0.088509   0.166072   0.533  0.59410    
## L(price, 6)      0.418879   0.167478   2.501  0.01243 *  
## L(price, 7)      0.167692   0.173518   0.966  0.33390    
## L(price, 8)     -0.044838   0.174825  -0.256  0.79760    
## L(price, 9)     -0.292452   0.173191  -1.689  0.09139 .  
## L(price, 10)    -0.267082   0.165853  -1.610  0.10741    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.71 on 3407 degrees of freedom
##   (362 observations deleted due to missingness)
## Multiple R-squared:   0.35,  Adjusted R-squared:  0.346 
## F-statistic: 87.37 on 21 and 3407 DF,  p-value: < 2.2e-16
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter

## 
## Partial autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      1      2      3      4      5      6      7      8      9     10     11 
## -0.377 -0.290 -0.223 -0.134 -0.165 -0.274 -0.063 -0.038  0.002 -0.051 -0.035 
##     12     13     14     15     16     17     18     19     20     21     22 
## -0.078 -0.123  0.001 -0.005  0.007 -0.031 -0.054 -0.078 -0.068 -0.009 -0.006 
##     23     24     25     26     27     28     29     30     31     32     33 
## -0.004 -0.026 -0.052 -0.061 -0.037  0.000  0.010  0.012 -0.045 -0.026 -0.017 
##     34     35 
## -0.071  0.054
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter
## 
## Autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      0      1      2      3      4      5      6      7      8      9     10 
##  1.000 -0.377 -0.107 -0.014  0.035 -0.053 -0.089  0.173 -0.013 -0.011 -0.058 
##     11     12     13     14     15     16     17     18     19     20     21 
##  0.029 -0.038 -0.044  0.121 -0.016 -0.005 -0.047  0.004 -0.022 -0.002  0.070 
##     22     23     24     25     26     27     28     29     30     31     32 
## -0.004 -0.004 -0.037 -0.010 -0.014  0.018  0.048  0.002 -0.005 -0.057  0.018 
##     33     34     35 
## -0.006 -0.031  0.114 
## 
## Call:
## arima(x = na.omit(new_df$quantity), order = c(5, 0, 0))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5  intercept
##       -0.6044  -0.5025  -0.3728  -0.2306  -0.1656     0.0011
## s.e.   0.0160   0.0184   0.0192   0.0184   0.0160     0.0101
## 
## sigma^2 estimated as 3.21:  log likelihood = -7608.12,  aic = 15230.25
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample

##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  quantity_diff
## Chi-squared = 179.68, df = 5, p-value < 2.2e-16
## 
## 
## Time series regression with "ts" data:
## Start = 2, End = 3795
## 
## Call:
## dynlm(formula = ehatsq ~ L(ehatsq) + L(ehatsq, k = -2) + L(ehatsq, 
##     k = -3) + L(ehatsq, k = -4) + L(ehatsq, k = -5), data = ehatsq)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.860  -2.430  -1.645   0.252  78.786 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.88784    0.13945  13.538  < 2e-16 ***
## L(ehatsq)          0.21674    0.01583  13.695  < 2e-16 ***
## L(ehatsq, k = -2)  0.10286    0.01619   6.355 2.33e-10 ***
## L(ehatsq, k = -3)  0.02278    0.01653   1.378   0.1682    
## L(ehatsq, k = -4)  0.03370    0.01653   2.039   0.0415 *  
## L(ehatsq, k = -5)  0.03587    0.01623   2.210   0.0272 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.529 on 3788 degrees of freedom
## Multiple R-squared:  0.07135,    Adjusted R-squared:  0.07012 
## F-statistic: 58.21 on 5 and 3788 DF,  p-value: < 2.2e-16

final_function("D:/DipankarSir/DipankarSir/dataanalysis_/Market3.csv")
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Warning in adf.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data$seas_adj
## Dickey-Fuller = -6.4331, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  data$seas_adj
## Dickey-Fuller Z(alpha) = -5110.6, Truncation lag parameter = 10,
## p-value = 0.01
## alternative hypothesis: stationary
## Warning in adf.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller = -5.9874, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller Z(alpha) = -2972.2, Truncation lag parameter = 10,
## p-value = 0.01
## alternative hypothesis: stationary
## # A tibble: 1 × 9
##    sumA sumAB collusionmeasure1 varA    varB    varAplusB varratio varAB  
##   <dbl> <dbl>             <dbl> <chr>   <chr>   <chr>     <chr>    <chr>  
## 1 0.250 0.752             0.333 0.00015 0.00015 3e-04     0.5      3.3e-07
## # ℹ 1 more variable: collusionmeasure2 <dbl>
## 
## Call:
## lm(formula = quantity$seas_adj ~ data$seas_adj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4034.6  -118.2  -118.0  -116.6 19764.3 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.182e+02  1.528e+01   7.737 1.28e-14 ***
## data$seas_adj 3.333e-03  3.293e-04  10.121  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 961.5 on 3997 degrees of freedom
## Multiple R-squared:  0.02499,    Adjusted R-squared:  0.02474 
## F-statistic: 102.4 on 1 and 3997 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = log(quantity$seas_adj) ~ data$seas_adj)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.7650  -1.9035  -0.7541   1.8221  10.3044 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -4.238e-01  4.755e-02  -8.913   <2e-16 ***
## data$seas_adj  1.382e-05  1.025e-06  13.482   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.992 on 3997 degrees of freedom
## Multiple R-squared:  0.04349,    Adjusted R-squared:  0.04326 
## F-statistic: 181.8 on 1 and 3997 DF,  p-value: < 2.2e-16
## Warning in log(x6): NaNs produced

## $best_model
## 
## Time series regression with "zooreg" data:
## Start = 12, End = 3999
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Coefficients:
##    (Intercept)  L(quantity, 1)  L(quantity, 2)  L(quantity, 3)  L(quantity, 4)  
##       0.005564       -0.848167       -0.756864       -0.703124       -0.619933  
## L(quantity, 5)  L(quantity, 6)  L(quantity, 7)  L(quantity, 8)           price  
##      -0.532733       -0.459135       -0.109162       -0.050878        0.115676  
##    L(price, 1)     L(price, 2)     L(price, 3)     L(price, 4)     L(price, 5)  
##      -0.051263        0.069627       -0.019978        0.067260        0.109446  
##    L(price, 6)     L(price, 7)     L(price, 8)     L(price, 9)    L(price, 10)  
##       0.042168       -0.053180       -0.081089        0.081367        0.082238  
## 
## 
## $best_order
## quantity    price 
##        8       10 
## 
## $top_orders
##    quantity price      AIC
## 1         8    10 12983.62
## 2         9    10 12984.17
## 3        10    10 12984.72
## 4         7    10 12990.95
## 5         8     9 13001.46
## 6         9     9 13001.89
## 7         6    10 13004.76
## 8         7     9 13008.86
## 9         8     8 13020.63
## 10        6     9 13022.62
## 11        7     8 13028.03
## 12        6     8 13041.59
## 13        7     7 13049.08
## 14        6     7 13062.69
## 15        6     6 13084.33
## 16        5    10 13526.08
## 17        5     9 13545.47
## 18        5     8 13565.72
## 19        5     7 13588.53
## 20        5     6 13610.64
## 
## 
## Time series regression with "zooreg" data:
## Start = 12, End = 3999
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.2619 -0.7338  0.1274  0.8898  5.6217 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.005564   0.024382   0.228  0.81951    
## L(quantity, 1) -0.848167   0.016681 -50.846  < 2e-16 ***
## L(quantity, 2) -0.756864   0.021807 -34.707  < 2e-16 ***
## L(quantity, 3) -0.703124   0.024010 -29.285  < 2e-16 ***
## L(quantity, 4) -0.619933   0.025198 -24.602  < 2e-16 ***
## L(quantity, 5) -0.532733   0.025191 -21.147  < 2e-16 ***
## L(quantity, 6) -0.459135   0.024009 -19.124  < 2e-16 ***
## L(quantity, 7) -0.109162   0.021820  -5.003 5.92e-07 ***
## L(quantity, 8) -0.050878   0.016688  -3.049  0.00231 ** 
## price           0.115676   0.132677   0.872  0.38334    
## L(price, 1)    -0.051263   0.137861  -0.372  0.71003    
## L(price, 2)     0.069627   0.138441   0.503  0.61504    
## L(price, 3)    -0.019978   0.138103  -0.145  0.88499    
## L(price, 4)     0.067260   0.147637   0.456  0.64872    
## L(price, 5)     0.109446   0.150270   0.728  0.46646    
## L(price, 6)     0.042168   0.152643   0.276  0.78237    
## L(price, 7)    -0.053180   0.159776  -0.333  0.73928    
## L(price, 8)    -0.081089   0.160879  -0.504  0.61427    
## L(price, 9)     0.081367   0.161863   0.503  0.61521    
## L(price, 10)    0.082238   0.153037   0.537  0.59104    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.462 on 3583 degrees of freedom
##   (325 observations deleted due to missingness)
## Multiple R-squared:  0.4621, Adjusted R-squared:  0.4593 
## F-statistic:   162 on 19 and 3583 DF,  p-value: < 2.2e-16
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter

## 
## Partial autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      1      2      3      4      5      6      7      8      9     10     11 
## -0.463 -0.300 -0.243 -0.190 -0.174 -0.407 -0.047 -0.038 -0.025 -0.014 -0.020 
##     12     13     14     15     16     17     18     19     20     21     22 
## -0.045 -0.184  0.004 -0.024  0.004  0.040 -0.032  0.005 -0.132  0.041 -0.009 
##     23     24     25     26     27     28     29     30     31     32     33 
##  0.012 -0.010 -0.040  0.016 -0.087  0.024 -0.016 -0.006  0.015  0.012 -0.025 
##     34     35     36 
## -0.047  0.025  0.001
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter
## 
## Autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      0      1      2      3      4      5      6      7      8      9     10 
##  1.000 -0.463 -0.021 -0.023  0.010 -0.012 -0.154  0.316 -0.126 -0.008 -0.016 
##     11     12     13     14     15     16     17     18     19     20     21 
##  0.000 -0.017 -0.114  0.250 -0.111  0.017 -0.006 -0.046  0.032 -0.136  0.255 
##     22     23     24     25     26     27     28     29     30     31     32 
## -0.130  0.033 -0.026 -0.026  0.047 -0.142  0.226 -0.119  0.037 -0.012 -0.024 
##     33     34     35     36 
##  0.004 -0.084  0.186 -0.104 
## 
## Call:
## arima(x = na.omit(new_df$quantity), order = c(5, 0, 0))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5  intercept
##       -0.7538  -0.5958  -0.4643  -0.3159  -0.1746    -0.0005
## s.e.   0.0156   0.0190   0.0199   0.0190   0.0156     0.0075
## 
## sigma^2 estimated as 2.447:  log likelihood = -7462.15,  aic = 14938.3
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample

##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  quantity_diff
## Chi-squared = 483.69, df = 5, p-value < 2.2e-16
## 
## 
## Time series regression with "ts" data:
## Start = 2, End = 3993
## 
## Call:
## dynlm(formula = ehatsq ~ L(ehatsq) + L(ehatsq, k = -2) + L(ehatsq, 
##     k = -3) + L(ehatsq, k = -4) + L(ehatsq, k = -5), data = ehatsq)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.710  -1.991  -1.428   0.165  96.918 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.86022    0.10925  17.027  < 2e-16 ***
## L(ehatsq)          0.08640    0.01579   5.470 4.77e-08 ***
## L(ehatsq, k = -2)  0.03542    0.01587   2.232   0.0257 *  
## L(ehatsq, k = -3)  0.07355    0.01585   4.641 3.58e-06 ***
## L(ehatsq, k = -4)  0.01887    0.01585   1.191   0.2339    
## L(ehatsq, k = -5)  0.02656    0.01586   1.675   0.0940 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.958 on 3986 degrees of freedom
## Multiple R-squared:  0.01786,    Adjusted R-squared:  0.01663 
## F-statistic:  14.5 on 5 and 3986 DF,  p-value: 4.187e-14

final_function("D:/DipankarSir/DipankarSir/dataanalysis_/Market4.csv")
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Warning in adf.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data$seas_adj
## Dickey-Fuller = -13.763, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  data$seas_adj
## Dickey-Fuller Z(alpha) = -2557.8, Truncation lag parameter = 9, p-value
## = 0.01
## alternative hypothesis: stationary
## Warning in adf.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller = -9.6439, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller Z(alpha) = -2627.7, Truncation lag parameter = 9, p-value
## = 0.01
## alternative hypothesis: stationary
## # A tibble: 1 × 9
##    sumA sumAB collusionmeasure1 varA    varB    varAplusB varratio varAB  
##   <dbl> <dbl>             <dbl> <chr>   <chr>   <chr>     <chr>    <chr>  
## 1 0.255 0.826             0.309 0.00037 0.00037 0.00074   0.5      4.2e-07
## # ℹ 1 more variable: collusionmeasure2 <dbl>
## 
## Call:
## lm(formula = quantity$seas_adj ~ data$seas_adj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
##   -6.05   -1.38   -1.26   -0.88 1256.70 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   1.442e+00  5.188e-01   2.779  0.00549 **
## data$seas_adj 3.954e-06  1.071e-05   0.369  0.71215   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.28 on 2574 degrees of freedom
## Multiple R-squared:  5.29e-05,   Adjusted R-squared:  -0.0003356 
## F-statistic: 0.1362 on 1 and 2574 DF,  p-value: 0.7122
## 
## 
## Call:
## lm(formula = log(quantity$seas_adj) ~ data$seas_adj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.8159 -1.0321  0.0355  1.1678  8.8222 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.722e+00  3.540e-02 -48.642   <2e-16 ***
## data$seas_adj  1.681e-06  7.312e-07   2.299   0.0216 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.794 on 2574 degrees of freedom
## Multiple R-squared:  0.002048,   Adjusted R-squared:  0.001661 
## F-statistic: 5.283 on 1 and 2574 DF,  p-value: 0.02161
## Warning in log(x6): NaNs produced

## $best_model
## 
## Time series regression with "zooreg" data:
## Start = 12, End = 2576
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Coefficients:
##     (Intercept)   L(quantity, 1)   L(quantity, 2)   L(quantity, 3)  
##        0.007917        -0.788530        -0.670571        -0.636377  
##  L(quantity, 4)   L(quantity, 5)   L(quantity, 6)   L(quantity, 7)  
##       -0.569696        -0.498212        -0.459766        -0.246723  
##  L(quantity, 8)   L(quantity, 9)  L(quantity, 10)            price  
##       -0.145027        -0.122145        -0.077508        -0.039707  
##     L(price, 1)      L(price, 2)      L(price, 3)      L(price, 4)  
##       -0.110876         0.005154         0.098724        -0.025557  
##     L(price, 5)      L(price, 6)      L(price, 7)      L(price, 8)  
##        0.044349         0.182202        -0.083089        -0.149910  
##     L(price, 9)     L(price, 10)  
##       -0.089234        -0.065360  
## 
## 
## $best_order
## quantity    price 
##       10       10 
## 
## $top_orders
##    quantity price      AIC
## 1        10    10 9253.459
## 2         9    10 9266.971
## 3         8    10 9274.667
## 4         7    10 9278.092
## 5         9     9 9291.753
## 6         8     9 9298.525
## 7         7     9 9302.019
## 8         6    10 9314.545
## 9         8     8 9314.801
## 10        7     8 9318.023
## 11        6     9 9337.221
## 12        7     7 9339.637
## 13        6     8 9353.387
## 14        6     7 9373.293
## 15        6     6 9402.344
## 16        5    10 9491.684
## 17        5     9 9514.715
## 18        5     8 9530.462
## 19        4    10 9545.357
## 20        5     7 9551.487
## 
## 
## Time series regression with "zooreg" data:
## Start = 12, End = 2576
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.5403 -0.7876  0.2310  0.9990  5.3464 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.007917   0.030181   0.262    0.793    
## L(quantity, 1)  -0.788530   0.019912 -39.600  < 2e-16 ***
## L(quantity, 2)  -0.670571   0.025225 -26.583  < 2e-16 ***
## L(quantity, 3)  -0.636377   0.028328 -22.465  < 2e-16 ***
## L(quantity, 4)  -0.569696   0.030674 -18.573  < 2e-16 ***
## L(quantity, 5)  -0.498212   0.031355 -15.890  < 2e-16 ***
## L(quantity, 6)  -0.459766   0.031367 -14.658  < 2e-16 ***
## L(quantity, 7)  -0.246723   0.030621  -8.057 1.20e-15 ***
## L(quantity, 8)  -0.145027   0.028222  -5.139 2.98e-07 ***
## L(quantity, 9)  -0.122145   0.025082  -4.870 1.19e-06 ***
## L(quantity, 10) -0.077508   0.019735  -3.927 8.82e-05 ***
## price           -0.039707   0.100597  -0.395    0.693    
## L(price, 1)     -0.110876   0.100542  -1.103    0.270    
## L(price, 2)      0.005154   0.100565   0.051    0.959    
## L(price, 3)      0.098724   0.100539   0.982    0.326    
## L(price, 4)     -0.025557   0.100505  -0.254    0.799    
## L(price, 5)      0.044349   0.100512   0.441    0.659    
## L(price, 6)      0.182202   0.100502   1.813    0.070 .  
## L(price, 7)     -0.083089   0.100567  -0.826    0.409    
## L(price, 8)     -0.149910   0.100525  -1.491    0.136    
## L(price, 9)     -0.089234   0.100569  -0.887    0.375    
## L(price, 10)    -0.065360   0.098840  -0.661    0.509    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.514 on 2495 degrees of freedom
##   (49 observations deleted due to missingness)
## Multiple R-squared:  0.4044, Adjusted R-squared:  0.3994 
## F-statistic: 80.67 on 21 and 2495 DF,  p-value: < 2.2e-16
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter

## 
## Partial autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      1      2      3      4      5      6      7      8      9     10     11 
## -0.451 -0.252 -0.224 -0.183 -0.139 -0.256 -0.107 -0.033 -0.048 -0.067 -0.003 
##     12     13     14     15     16     17     18     19     20     21     22 
## -0.045 -0.067 -0.081 -0.036  0.003 -0.007 -0.042 -0.036 -0.033 -0.057  0.059 
##     23     24     25     26     27     28     29     30     31     32     33 
##  0.018  0.025  0.011  0.028 -0.024  0.047 -0.085 -0.026  0.017 -0.019 -0.025 
##     34 
## -0.026
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter
## 
## Autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      0      1      2      3      4      5      6      7      8      9     10 
##  1.000 -0.451  0.002 -0.054  0.004  0.008 -0.096  0.132  0.000 -0.029 -0.023 
##     11     12     13     14     15     16     17     18     19     20     21 
##  0.040 -0.041 -0.018  0.007  0.046  0.007 -0.030 -0.011  0.005 -0.006 -0.016 
##     22     23     24     25     26     27     28     29     30     31     32 
##  0.090 -0.056  0.011 -0.027  0.022 -0.038  0.046 -0.076  0.078  0.003 -0.035 
##     33     34 
##  0.009 -0.022 
## 
## Call:
## arima(x = na.omit(new_df$quantity), order = c(5, 0, 0))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5  intercept
##       -0.7035  -0.5177  -0.4228  -0.2919  -0.1468     0.0001
## s.e.   0.0196   0.0234   0.0242   0.0234   0.0196     0.0103
## 
## sigma^2 estimated as 2.575:  log likelihood = -4871.88,  aic = 9757.75
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample

##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  quantity_diff
## Chi-squared = 337.86, df = 5, p-value < 2.2e-16
## 
## 
## Time series regression with "ts" data:
## Start = 2, End = 2570
## 
## Call:
## dynlm(formula = ehatsq ~ L(ehatsq) + L(ehatsq, k = -2) + L(ehatsq, 
##     k = -3) + L(ehatsq, k = -4) + L(ehatsq, k = -5), data = ehatsq)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.286 -2.116 -1.544  0.164 64.584 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.94010    0.13888  13.969  < 2e-16 ***
## L(ehatsq)          0.07226    0.01938   3.728 0.000197 ***
## L(ehatsq, k = -2)  0.02951    0.01982   1.489 0.136731    
## L(ehatsq, k = -3)  0.05922    0.01981   2.989 0.002830 ** 
## L(ehatsq, k = -4)  0.04138    0.01982   2.088 0.036926 *  
## L(ehatsq, k = -5)  0.04039    0.01984   2.036 0.041862 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.869 on 2563 degrees of freedom
## Multiple R-squared:  0.01582,    Adjusted R-squared:  0.0139 
## F-statistic:  8.24 on 5 and 2563 DF,  p-value: 9.857e-08

final_function("D:/DipankarSir/DipankarSir/dataanalysis_/Market5.csv")
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Warning in adf.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data$seas_adj
## Dickey-Fuller = -8.3133, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  data$seas_adj
## Dickey-Fuller Z(alpha) = -2683, Truncation lag parameter = 10, p-value
## = 0.01
## alternative hypothesis: stationary
## Warning in adf.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller = -8.4397, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller Z(alpha) = -2845.7, Truncation lag parameter = 10,
## p-value = 0.01
## alternative hypothesis: stationary
## # A tibble: 1 × 9
##    sumA sumAB collusionmeasure1 varA    varB    varAplusB varratio varAB  
##   <dbl> <dbl>             <dbl> <chr>   <chr>   <chr>     <chr>    <chr>  
## 1 0.242 0.786             0.308 1.5e-07 1.5e-07 3e-07     0.5      1.7e-08
## # ℹ 1 more variable: collusionmeasure2 <dbl>
## 
## Call:
## lm(formula = quantity$seas_adj ~ data$seas_adj)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.088 -2.888 -2.108  0.322 73.771 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.344e+00  9.357e-02  35.736   <2e-16 ***
## data$seas_adj 1.090e-06  1.224e-06   0.891    0.373    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.818 on 3983 degrees of freedom
## Multiple R-squared:  0.0001991,  Adjusted R-squared:  -5.196e-05 
## F-statistic: 0.793 on 1 and 3983 DF,  p-value: 0.3732
## 
## 
## Call:
## lm(formula = log(quantity$seas_adj) ~ data$seas_adj)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.8122  -0.9548   0.0336   1.1126   4.1589 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.863e-01  2.556e-02   7.290 3.71e-13 ***
## data$seas_adj 3.383e-07  3.343e-07   1.012    0.312    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.589 on 3983 degrees of freedom
## Multiple R-squared:  0.0002571,  Adjusted R-squared:  6.141e-06 
## F-statistic: 1.024 on 1 and 3983 DF,  p-value: 0.3115
## Warning in log(x6): NaNs produced

## $best_model
## 
## Time series regression with "zooreg" data:
## Start = 12, End = 3975
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Coefficients:
##     (Intercept)   L(quantity, 1)   L(quantity, 2)   L(quantity, 3)  
##       -0.006668        -0.790404        -0.643407        -0.535715  
##  L(quantity, 4)   L(quantity, 5)   L(quantity, 6)   L(quantity, 7)  
##       -0.471064        -0.401057        -0.352358        -0.281675  
##  L(quantity, 8)   L(quantity, 9)  L(quantity, 10)            price  
##       -0.220669        -0.181192        -0.091519        -0.037319  
##     L(price, 1)      L(price, 2)      L(price, 3)      L(price, 4)  
##       -0.066410         0.262314         0.332898        -0.016295  
##     L(price, 5)      L(price, 6)      L(price, 7)      L(price, 8)  
##       -0.363815        -0.331858         0.158690         0.283942  
##     L(price, 9)     L(price, 10)  
##        0.123771        -0.053904  
## 
## 
## $best_order
## quantity    price 
##       10       10 
## 
## $top_orders
##    quantity price      AIC
## 1        10    10 9362.662
## 2         9    10 9383.068
## 3         9     9 9398.048
## 4         8    10 9413.465
## 5         7    10 9428.205
## 6         8     9 9428.333
## 7         7     9 9443.183
## 8         8     8 9447.952
## 9         6    10 9457.380
## 10        7     8 9462.637
## 11        6     9 9472.649
## 12        7     7 9480.985
## 13        6     8 9492.357
## 14        5    10 9501.099
## 15        6     7 9510.515
## 16        5     9 9517.129
## 17        6     6 9532.264
## 18        5     8 9536.832
## 19        4    10 9543.410
## 20        5     7 9554.949
## 
## 
## Time series regression with "zooreg" data:
## Start = 12, End = 3975
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.7411 -0.7577  0.1585  0.9511  4.0302 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -0.006668   0.026991  -0.247   0.8049    
## L(quantity, 1)  -0.790404   0.019373 -40.800  < 2e-16 ***
## L(quantity, 2)  -0.643407   0.024503 -26.258  < 2e-16 ***
## L(quantity, 3)  -0.535715   0.027206 -19.691  < 2e-16 ***
## L(quantity, 4)  -0.471064   0.028618 -16.461  < 2e-16 ***
## L(quantity, 5)  -0.401057   0.029255 -13.709  < 2e-16 ***
## L(quantity, 6)  -0.352358   0.029225 -12.057  < 2e-16 ***
## L(quantity, 7)  -0.281675   0.028509  -9.880  < 2e-16 ***
## L(quantity, 8)  -0.220669   0.027077  -8.150 5.57e-16 ***
## L(quantity, 9)  -0.181192   0.024460  -7.408 1.72e-13 ***
## L(quantity, 10) -0.091519   0.019374  -4.724 2.44e-06 ***
## price           -0.037319   0.151221  -0.247   0.8051    
## L(price, 1)     -0.066410   0.155590  -0.427   0.6695    
## L(price, 2)      0.262314   0.157130   1.669   0.0952 .  
## L(price, 3)      0.332898   0.156241   2.131   0.0332 *  
## L(price, 4)     -0.016295   0.164433  -0.099   0.9211    
## L(price, 5)     -0.363815   0.163950  -2.219   0.0266 *  
## L(price, 6)     -0.331858   0.169897  -1.953   0.0509 .  
## L(price, 7)      0.158690   0.188120   0.844   0.3990    
## L(price, 8)      0.283942   0.190133   1.493   0.1355    
## L(price, 9)      0.123771   0.191979   0.645   0.5192    
## L(price, 10)    -0.053904   0.185840  -0.290   0.7718    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.392 on 2647 degrees of freedom
##   (1108 observations deleted due to missingness)
## Multiple R-squared:  0.3911, Adjusted R-squared:  0.3863 
## F-statistic: 80.96 on 21 and 2647 DF,  p-value: < 2.2e-16
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter

## 
## Partial autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      1      2      3      4      5      6      7      8      9     10     11 
## -0.472 -0.296 -0.209 -0.183 -0.127 -0.131 -0.111 -0.084 -0.090 -0.099 -0.078 
##     12     13     14     15     16     17     18     19     20     21     22 
## -0.067 -0.056 -0.088 -0.007 -0.043 -0.056 -0.042 -0.030 -0.038 -0.048 -0.037 
##     23     24     25     26     27     28     29     30     31     32     33 
##  0.012 -0.008 -0.027 -0.051 -0.004 -0.016 -0.037 -0.016  0.012  0.008  0.002 
##     34     35     36 
##  0.003  0.002 -0.006
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter
## 
## Autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      0      1      2      3      4      5      6      7      8      9     10 
##  1.000 -0.472 -0.007 -0.004 -0.020  0.022 -0.026  0.010  0.008 -0.015 -0.005 
##     11     12     13     14     15     16     17     18     19     20     21 
##  0.014 -0.002  0.003 -0.025  0.060 -0.052  0.003  0.014  0.000 -0.006 -0.007 
##     22     23     24     25     26     27     28     29     30     31     32 
##  0.012  0.026 -0.033 -0.004 -0.006  0.035 -0.023 -0.011  0.025  0.003 -0.012 
##     33     34     35     36 
## -0.003  0.003 -0.002 -0.006 
## 
## Call:
## arima(x = na.omit(new_df$quantity), order = c(5, 0, 0))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5  intercept
##       -0.7357  -0.5440  -0.3958  -0.2736  -0.1267    -0.0004
## s.e.   0.0157   0.0191   0.0200   0.0191   0.0157     0.0074
## 
## sigma^2 estimated as 2.044:  log likelihood = -7077.72,  aic = 14169.45
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample

##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  quantity_diff
## Chi-squared = 690.73, df = 5, p-value < 2.2e-16
## 
## 
## Time series regression with "ts" data:
## Start = 2, End = 3979
## 
## Call:
## dynlm(formula = ehatsq ~ L(ehatsq) + L(ehatsq, k = -2) + L(ehatsq, 
##     k = -3) + L(ehatsq, k = -4) + L(ehatsq, k = -5), data = ehatsq)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.825 -1.795 -1.214  0.280 90.821 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1.7948675  0.0961361  18.670  < 2e-16 ***
## L(ehatsq)         0.0683858  0.0158264   4.321 1.59e-05 ***
## L(ehatsq, k = -2) 0.0324312  0.0158670   2.044    0.041 *  
## L(ehatsq, k = -3) 0.0004644  0.0159046   0.029    0.977    
## L(ehatsq, k = -4) 0.0164164  0.0159023   1.032    0.302    
## L(ehatsq, k = -5) 0.0044875  0.0158718   0.283    0.777    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.288 on 3972 degrees of freedom
## Multiple R-squared:  0.006125,   Adjusted R-squared:  0.004874 
## F-statistic: 4.895 on 5 and 3972 DF,  p-value: 0.000181

final_function("D:/DipankarSir/DipankarSir/dataanalysis_/Market6.csv")
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Warning in adf.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data$seas_adj
## Dickey-Fuller = -5.1694, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  data$seas_adj
## Dickey-Fuller Z(alpha) = -4175.7, Truncation lag parameter = 9, p-value
## = 0.01
## alternative hypothesis: stationary
## Warning in adf.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller = -11.253, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller Z(alpha) = -4162.5, Truncation lag parameter = 9, p-value
## = 0.01
## alternative hypothesis: stationary
## # A tibble: 1 × 9
##    sumA sumAB collusionmeasure1 varA    varB    varAplusB varratio varAB  
##   <dbl> <dbl>             <dbl> <chr>   <chr>   <chr>     <chr>    <chr>  
## 1 0.270 0.800             0.338 5.6e-06 5.6e-06 1.1e-05   0.5      1.3e-07
## # ℹ 1 more variable: collusionmeasure2 <dbl>
## 
## Call:
## lm(formula = quantity$seas_adj ~ data$seas_adj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.0207 -0.0201 -0.0181 -0.0125  3.2272 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    2.073e-02  1.980e-03  10.470   <2e-16 ***
## data$seas_adj -5.851e-09  5.477e-08  -0.107    0.915    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1177 on 3625 degrees of freedom
## Multiple R-squared:  3.149e-06,  Adjusted R-squared:  -0.0002727 
## F-statistic: 0.01141 on 1 and 3625 DF,  p-value: 0.9149
## 
## 
## Call:
## lm(formula = log(quantity$seas_adj) ~ data$seas_adj)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.0606  -1.2968   0.0742   1.2290   7.2264 
## 
## Coefficients:
##                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)   -6.048e+00  3.289e-02 -183.885   <2e-16 ***
## data$seas_adj  1.626e-06  9.098e-07    1.787    0.074 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.955 on 3625 degrees of freedom
## Multiple R-squared:  0.0008801,  Adjusted R-squared:  0.0006045 
## F-statistic: 3.193 on 1 and 3625 DF,  p-value: 0.07403
## Warning in log(x6): NaNs produced

## $best_model
## 
## Time series regression with "zooreg" data:
## Start = 12, End = 3627
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Coefficients:
##     (Intercept)   L(quantity, 1)   L(quantity, 2)   L(quantity, 3)  
##      -0.0002696       -0.8344050       -0.7705502       -0.6843870  
##  L(quantity, 4)   L(quantity, 5)   L(quantity, 6)   L(quantity, 7)  
##      -0.5783322       -0.4792760       -0.3597994       -0.1940630  
##  L(quantity, 8)   L(quantity, 9)  L(quantity, 10)            price  
##      -0.1114287       -0.0673050       -0.0934059       -0.1698720  
##     L(price, 1)      L(price, 2)      L(price, 3)      L(price, 4)  
##      -0.0405283       -0.1301565       -0.1083398       -0.0460961  
##     L(price, 5)      L(price, 6)      L(price, 7)      L(price, 8)  
##      -0.1400410       -0.0746170        0.1440679        0.1975439  
##     L(price, 9)     L(price, 10)  
##       0.0580031        0.1453591  
## 
## 
## $best_order
## quantity    price 
##       10       10 
## 
## $top_orders
##    quantity price      AIC
## 1        10    10 8631.466
## 2         8    10 8646.680
## 3         9    10 8648.386
## 4         7    10 8649.777
## 5         6    10 8668.466
## 6         8     9 8687.697
## 7         9     9 8689.415
## 8         7     9 8691.014
## 9         6     9 8709.907
## 10        8     8 8723.370
## 11        7     8 8727.147
## 12        6     8 8745.948
## 13        5    10 8753.312
## 14        7     7 8782.401
## 15        5     9 8794.630
## 16        6     7 8800.948
## 17        4    10 8829.792
## 18        5     8 8831.479
## 19        6     6 8847.163
## 20        4     9 8871.961
## 
## 
## Time series regression with "zooreg" data:
## Start = 12, End = 3627
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.8320 -1.0450 -0.0196  1.1135  7.6989 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -0.0002696  0.0380568  -0.007 0.994348    
## L(quantity, 1)  -0.8344050  0.0215393 -38.739  < 2e-16 ***
## L(quantity, 2)  -0.7705502  0.0280993 -27.422  < 2e-16 ***
## L(quantity, 3)  -0.6843870  0.0325738 -21.010  < 2e-16 ***
## L(quantity, 4)  -0.5783322  0.0355502 -16.268  < 2e-16 ***
## L(quantity, 5)  -0.4792760  0.0370221 -12.946  < 2e-16 ***
## L(quantity, 6)  -0.3597994  0.0371405  -9.688  < 2e-16 ***
## L(quantity, 7)  -0.1940630  0.0358069  -5.420 6.64e-08 ***
## L(quantity, 8)  -0.1114287  0.0327648  -3.401 0.000684 ***
## L(quantity, 9)  -0.0673050  0.0282267  -2.384 0.017191 *  
## L(quantity, 10) -0.0934059  0.0215372  -4.337 1.51e-05 ***
## price           -0.1698720  0.0628119  -2.704 0.006896 ** 
## L(price, 1)     -0.0405283  0.0688629  -0.589 0.556234    
## L(price, 2)     -0.1301565  0.0805078  -1.617 0.106092    
## L(price, 3)     -0.1083398  0.0842780  -1.286 0.198755    
## L(price, 4)     -0.0460961  0.0877739  -0.525 0.599521    
## L(price, 5)     -0.1400410  0.0870334  -1.609 0.107753    
## L(price, 6)     -0.0746170  0.0858024  -0.870 0.384596    
## L(price, 7)      0.1440679  0.0825351   1.746 0.081035 .  
## L(price, 8)      0.1975439  0.0785671   2.514 0.011999 *  
## L(price, 9)      0.0580031  0.0705444   0.822 0.411043    
## L(price, 10)     0.1453591  0.0630931   2.304 0.021324 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.77 on 2141 degrees of freedom
##   (747 observations deleted due to missingness)
## Multiple R-squared:  0.4299, Adjusted R-squared:  0.4244 
## F-statistic: 76.89 on 21 and 2141 DF,  p-value: < 2.2e-16
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter

## 
## Partial autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      1      2      3      4      5      6      7      8      9     10     11 
## -0.462 -0.333 -0.258 -0.201 -0.178 -0.194 -0.082 -0.062 -0.002 -0.108 -0.076 
##     12     13     14     15     16     17     18     19     20     21     22 
## -0.077 -0.075 -0.049 -0.064 -0.095 -0.019 -0.007 -0.021 -0.069 -0.106 -0.036 
##     23     24     25     26     27     28     29     30     31     32     33 
## -0.010 -0.023  0.014 -0.047 -0.073 -0.055 -0.014 -0.015 -0.017 -0.009 -0.046 
##     34     35 
## -0.041 -0.035
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter
## 
## Autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      0      1      2      3      4      5      6      7      8      9     10 
##  1.000 -0.462 -0.049  0.003  0.009 -0.008 -0.021  0.073 -0.032  0.018 -0.086 
##     11     12     13     14     15     16     17     18     19     20     21 
##  0.060 -0.006 -0.002  0.018 -0.021 -0.013  0.045 -0.014 -0.021 -0.013 -0.002 
##     22     23     24     25     26     27     28     29     30     31     32 
##  0.055 -0.016 -0.018  0.025 -0.050 -0.002  0.034  0.027 -0.023 -0.007 -0.002 
##     33     34     35 
## -0.026  0.026  0.005 
## 
## Call:
## arima(x = na.omit(new_df$quantity), order = c(5, 0, 0))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5  intercept
##       -0.7883  -0.6608  -0.5034  -0.3352  -0.1781     0.0007
## s.e.   0.0163   0.0202   0.0214   0.0202   0.0164     0.0087
## 
## sigma^2 estimated as 3.289:  log likelihood = -7304.24,  aic = 14622.48
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample

##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  quantity_diff
## Chi-squared = 402.65, df = 5, p-value < 2.2e-16
## 
## 
## Time series regression with "ts" data:
## Start = 2, End = 3621
## 
## Call:
## dynlm(formula = ehatsq ~ L(ehatsq) + L(ehatsq, k = -2) + L(ehatsq, 
##     k = -3) + L(ehatsq, k = -4) + L(ehatsq, k = -5), data = ehatsq)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
##  -7.677  -2.810  -2.038   0.567 153.113 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       2.682855   0.151256  17.737  < 2e-16 ***
## L(ehatsq)         0.054049   0.016616   3.253 0.001153 ** 
## L(ehatsq, k = -2) 0.058132   0.016613   3.499 0.000472 ***
## L(ehatsq, k = -3) 0.008744   0.016651   0.525 0.599515    
## L(ehatsq, k = -4) 0.042333   0.016640   2.544 0.010998 *  
## L(ehatsq, k = -5) 0.022488   0.016650   1.351 0.176893    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.153 on 3614 degrees of freedom
## Multiple R-squared:  0.009739,   Adjusted R-squared:  0.008369 
## F-statistic: 7.109 on 5 and 3614 DF,  p-value: 1.26e-06

final_function("D:/DipankarSir/DipankarSir/dataanalysis_/Market7.csv")
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Warning in adf.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data$seas_adj
## Dickey-Fuller = -7.6501, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  data$seas_adj
## Dickey-Fuller Z(alpha) = -4113.5, Truncation lag parameter = 9, p-value
## = 0.01
## alternative hypothesis: stationary
## Warning in adf.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller = -15.496, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller Z(alpha) = -3871.1, Truncation lag parameter = 9, p-value
## = 0.01
## alternative hypothesis: stationary
## # A tibble: 1 × 9
##    sumA sumAB collusionmeasure1 varA  varB  varAplusB varratio varAB  
##   <dbl> <dbl>             <dbl> <chr> <chr> <chr>     <chr>    <chr>  
## 1 0.203 0.699             0.291 2e-05 2e-05 4e-05     0.5      1.2e-07
## # ℹ 1 more variable: collusionmeasure2 <dbl>
## 
## Call:
## lm(formula = quantity$seas_adj ~ data$seas_adj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -405.20    0.82    0.85    0.85 2829.97 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -8.498e-01  7.736e-01  -1.099    0.272    
## data$seas_adj  2.265e-04  1.151e-05  19.681   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.75 on 3869 degrees of freedom
## Multiple R-squared:  0.09101,    Adjusted R-squared:  0.09077 
## F-statistic: 387.4 on 1 and 3869 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = log(quantity$seas_adj) ~ data$seas_adj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.7662 -1.9218 -0.6505  1.7633  9.1490 
## 
## Coefficients:
##                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)   -5.544e+00  4.463e-02 -124.226  < 2e-16 ***
## data$seas_adj  4.305e-06  6.639e-07    6.484 1.01e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.755 on 3869 degrees of freedom
## Multiple R-squared:  0.01075,    Adjusted R-squared:  0.01049 
## F-statistic: 42.04 on 1 and 3869 DF,  p-value: 1.006e-10
## Warning in log(x6): NaNs produced

## $best_model
## 
## Time series regression with "zooreg" data:
## Start = 12, End = 3862
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Coefficients:
##    (Intercept)  L(quantity, 1)  L(quantity, 2)  L(quantity, 3)  L(quantity, 4)  
##      -0.008818       -0.843245       -0.703392       -0.653581       -0.633469  
## L(quantity, 5)  L(quantity, 6)  L(quantity, 7)  L(quantity, 8)  L(quantity, 9)  
##      -0.585256       -0.440221       -0.013479        0.050573       -0.039564  
##          price     L(price, 1)     L(price, 2)     L(price, 3)     L(price, 4)  
##      -0.064244        0.037691       -0.244031       -0.157243       -0.078888  
##    L(price, 5)     L(price, 6)     L(price, 7)     L(price, 8)     L(price, 9)  
##       0.021942        0.190064        0.195730        0.111009       -0.040175  
##   L(price, 10)  
##       0.127802  
## 
## 
## $best_order
## quantity    price 
##        9       10 
## 
## $top_orders
##    quantity price      AIC
## 1         9    10 13375.14
## 2        10    10 13377.14
## 3         8    10 13378.29
## 4         7    10 13399.76
## 5         6    10 13408.64
## 6         9     9 13413.75
## 7         8     9 13416.99
## 8         7     9 13438.53
## 9         8     8 13442.15
## 10        6     9 13448.56
## 11        7     8 13463.64
## 12        6     8 13473.64
## 13        7     7 13493.17
## 14        6     7 13503.09
## 15        6     6 13530.45
## 16        5    10 14074.19
## 17        5     9 14103.70
## 18        5     8 14138.47
## 19        5     7 14165.96
## 20        5     6 14195.55
## 
## 
## Time series regression with "zooreg" data:
## Start = 12, End = 3862
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.3034 -1.0043 -0.0366  0.8526  8.6004 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -0.008818   0.032940  -0.268   0.7889    
## L(quantity, 1) -0.843245   0.017529 -48.106   <2e-16 ***
## L(quantity, 2) -0.703392   0.022911 -30.701   <2e-16 ***
## L(quantity, 3) -0.653581   0.026002 -25.136   <2e-16 ***
## L(quantity, 4) -0.633469   0.027347 -23.164   <2e-16 ***
## L(quantity, 5) -0.585256   0.027702 -21.127   <2e-16 ***
## L(quantity, 6) -0.440221   0.027384 -16.076   <2e-16 ***
## L(quantity, 7) -0.013479   0.026057  -0.517   0.6050    
## L(quantity, 8)  0.050573   0.022903   2.208   0.0273 *  
## L(quantity, 9) -0.039564   0.017477  -2.264   0.0237 *  
## price          -0.064244   0.152833  -0.420   0.6743    
## L(price, 1)     0.037691   0.156704   0.241   0.8099    
## L(price, 2)    -0.244031   0.157336  -1.551   0.1210    
## L(price, 3)    -0.157243   0.171537  -0.917   0.3594    
## L(price, 4)    -0.078888   0.177166  -0.445   0.6562    
## L(price, 5)     0.021942   0.176354   0.124   0.9010    
## L(price, 6)     0.190064   0.170092   1.117   0.2639    
## L(price, 7)     0.195730   0.153787   1.273   0.2032    
## L(price, 8)     0.111009   0.152834   0.726   0.4677    
## L(price, 9)    -0.040175   0.152396  -0.264   0.7921    
## L(price, 10)    0.127802   0.151170   0.845   0.3979    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.878 on 3237 degrees of freedom
##   (344 observations deleted due to missingness)
## Multiple R-squared:  0.4898, Adjusted R-squared:  0.4866 
## F-statistic: 155.4 on 20 and 3237 DF,  p-value: < 2.2e-16
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter

## 
## Partial autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      1      2      3      4      5      6      7      8      9     10     11 
## -0.429 -0.261 -0.196 -0.199 -0.267 -0.418 -0.034  0.100 -0.047 -0.021 -0.090 
##     12     13     14     15     16     17     18     19     20     21     22 
## -0.165 -0.253 -0.030  0.026  0.002 -0.025  0.063 -0.033 -0.047  0.008 -0.005 
##     23     24     25     26     27     28     29     30     31     32     33 
## -0.004  0.013 -0.060 -0.047 -0.033 -0.035  0.027  0.001 -0.081  0.095 -0.029 
##     34     35 
## -0.029  0.012
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter
## 
## Autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      0      1      2      3      4      5      6      7      8      9     10 
##  1.000 -0.429 -0.029 -0.022 -0.038 -0.059 -0.077  0.315 -0.046 -0.132  0.048 
##     11     12     13     14     15     16     17     18     19     20     21 
## -0.078 -0.053 -0.022  0.267 -0.043 -0.093  0.004  0.004 -0.128  0.049  0.166 
##     22     23     24     25     26     27     28     29     30     31     32 
## -0.028 -0.066  0.009 -0.062 -0.041  0.035  0.128  0.010 -0.079 -0.027  0.070 
##     33     34     35 
## -0.173  0.098  0.098 
## 
## Call:
## arima(x = na.omit(new_df$quantity), order = c(5, 0, 0))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5  intercept
##       -0.6836  -0.5233  -0.4311  -0.3679  -0.2672     0.0015
## s.e.   0.0155   0.0181   0.0187   0.0181   0.0155     0.0101
## 
## sigma^2 estimated as 4.26:  log likelihood = -8296.35,  aic = 16606.7
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample

##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  quantity_diff
## Chi-squared = 411.55, df = 5, p-value < 2.2e-16
## 
## 
## Time series regression with "ts" data:
## Start = 2, End = 3865
## 
## Call:
## dynlm(formula = ehatsq ~ L(ehatsq) + L(ehatsq, k = -2) + L(ehatsq, 
##     k = -3) + L(ehatsq, k = -4) + L(ehatsq, k = -5), data = ehatsq)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.518  -3.318  -2.186   0.810 159.327 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.89458    0.17031  16.996  < 2e-16 ***
## L(ehatsq)          0.13318    0.01605   8.298  < 2e-16 ***
## L(ehatsq, k = -2)  0.10326    0.01607   6.424 1.49e-10 ***
## L(ehatsq, k = -3)  0.03251    0.01620   2.007   0.0448 *  
## L(ehatsq, k = -4)  0.01124    0.01622   0.693   0.4881    
## L(ehatsq, k = -5)  0.04156    0.01626   2.556   0.0106 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.141 on 3858 degrees of freedom
## Multiple R-squared:  0.03776,    Adjusted R-squared:  0.03652 
## F-statistic: 30.28 on 5 and 3858 DF,  p-value: < 2.2e-16

final_function("D:/DipankarSir/DipankarSir/dataanalysis_/Market9.csv")
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Warning in adf.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data$seas_adj
## Dickey-Fuller = -12.185, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  data$seas_adj
## Dickey-Fuller Z(alpha) = -2314.2, Truncation lag parameter = 8, p-value
## = 0.01
## alternative hypothesis: stationary
## Warning in adf.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller = -10.863, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller Z(alpha) = -2961.4, Truncation lag parameter = 8, p-value
## = 0.01
## alternative hypothesis: stationary
## # A tibble: 1 × 9
##    sumA sumAB collusionmeasure1 varA    varB    varAplusB varratio varAB
##   <dbl> <dbl>             <dbl> <chr>   <chr>   <chr>     <chr>    <chr>
## 1 0.259 0.779             0.332 1.4e-06 1.4e-06 2.7e-06   0.5      5e-08
## # ℹ 1 more variable: collusionmeasure2 <dbl>
## 
## Call:
## lm(formula = quantity$seas_adj ~ data$seas_adj)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.14361 -0.01644 -0.01597 -0.01397  1.45242 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.661e-02  1.623e-03  10.234  < 2e-16 ***
## data$seas_adj 2.119e-07  3.543e-08   5.978 2.57e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08083 on 2513 degrees of freedom
## Multiple R-squared:  0.01402,    Adjusted R-squared:  0.01363 
## F-statistic: 35.74 on 1 and 2513 DF,  p-value: 2.572e-09
## 
## 
## Call:
## lm(formula = log(quantity$seas_adj) ~ data$seas_adj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.1412 -1.2812 -0.1547  1.0865  7.2997 
## 
## Coefficients:
##                 Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)   -6.915e+00  4.201e-02 -164.601  < 2e-16 ***
## data$seas_adj  6.258e-06  9.174e-07    6.821 1.13e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.092 on 2513 degrees of freedom
## Multiple R-squared:  0.01818,    Adjusted R-squared:  0.01779 
## F-statistic: 46.53 on 1 and 2513 DF,  p-value: 1.125e-11
## Warning in log(x6): NaNs produced

## $best_model
## 
## Time series regression with "zooreg" data:
## Start = 12, End = 2515
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Coefficients:
##    (Intercept)  L(quantity, 1)  L(quantity, 2)  L(quantity, 3)  L(quantity, 4)  
##      0.0004413      -0.7657515      -0.5947707      -0.4952793      -0.3632658  
## L(quantity, 5)  L(quantity, 6)  L(quantity, 7)  L(quantity, 8)  L(quantity, 9)  
##     -0.3190098      -0.2612074      -0.1545968      -0.1271984      -0.0324305  
##          price     L(price, 1)     L(price, 2)     L(price, 3)     L(price, 4)  
##     -0.0077796      -0.1948654      -0.3257191      -0.1010545       0.1860741  
##    L(price, 5)     L(price, 6)     L(price, 7)     L(price, 8)     L(price, 9)  
##      0.0101725       0.2660725       0.2328208       0.2640935       0.2945873  
##   L(price, 10)  
##      0.2847967  
## 
## 
## $best_order
## quantity    price 
##        9       10 
## 
## $top_orders
##    quantity price      AIC
## 1         9    10 9504.339
## 2         8    10 9504.856
## 3        10    10 9505.495
## 4         7    10 9527.720
## 5         9     9 9528.162
## 6         8     9 9529.141
## 7         6    10 9533.600
## 8         8     8 9552.637
## 9         7     9 9554.673
## 10        6     9 9558.692
## 11        7     8 9577.502
## 12        5    10 9580.392
## 13        6     8 9581.272
## 14        7     7 9590.529
## 15        6     7 9594.267
## 16        5     9 9603.360
## 17        6     6 9607.166
## 18        4    10 9614.711
## 19        5     8 9626.259
## 20        5     7 9639.753
## 
## 
## Time series regression with "zooreg" data:
## Start = 12, End = 2515
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.4156  -0.9626   0.0844   0.8734   8.2934 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.0004413  0.0388967   0.011 0.990949    
## L(quantity, 1) -0.7657515  0.0208173 -36.784  < 2e-16 ***
## L(quantity, 2) -0.5947707  0.0260205 -22.858  < 2e-16 ***
## L(quantity, 3) -0.4952793  0.0285752 -17.332  < 2e-16 ***
## L(quantity, 4) -0.3632658  0.0298746 -12.160  < 2e-16 ***
## L(quantity, 5) -0.3190098  0.0300256 -10.625  < 2e-16 ***
## L(quantity, 6) -0.2612074  0.0298395  -8.754  < 2e-16 ***
## L(quantity, 7) -0.1545968  0.0285384  -5.417 6.69e-08 ***
## L(quantity, 8) -0.1271984  0.0259191  -4.908 9.87e-07 ***
## L(quantity, 9) -0.0324305  0.0205299  -1.580 0.114319    
## price          -0.0077796  0.0814788  -0.095 0.923942    
## L(price, 1)    -0.1948654  0.0922513  -2.112 0.034766 *  
## L(price, 2)    -0.3257191  0.0956500  -3.405 0.000672 ***
## L(price, 3)    -0.1010545  0.0964779  -1.047 0.295009    
## L(price, 4)     0.1860741  0.0933607   1.993 0.046372 *  
## L(price, 5)     0.0101725  0.0935065   0.109 0.913380    
## L(price, 6)     0.2660725  0.0944174   2.818 0.004873 ** 
## L(price, 7)     0.2328208  0.0980850   2.374 0.017694 *  
## L(price, 8)     0.2640935  0.0977485   2.702 0.006948 ** 
## L(price, 9)     0.2945873  0.0948351   3.106 0.001918 ** 
## L(price, 10)    0.2847967  0.0842907   3.379 0.000740 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.872 on 2296 degrees of freedom
##   (188 observations deleted due to missingness)
## Multiple R-squared:  0.3885, Adjusted R-squared:  0.3832 
## F-statistic: 72.94 on 20 and 2296 DF,  p-value: < 2.2e-16
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter

## 
## Partial autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      1      2      3      4      5      6      7      8      9     10     11 
## -0.476 -0.290 -0.231 -0.120 -0.126 -0.143 -0.040 -0.096 -0.039 -0.025 -0.004 
##     12     13     14     15     16     17     18     19     20     21     22 
## -0.067 -0.111 -0.013  0.014 -0.076 -0.065 -0.031 -0.022 -0.122  0.007 -0.004 
##     23     24     25     26     27     28     29     30     31     32     33 
## -0.031  0.027 -0.048 -0.034  0.008  0.006 -0.034 -0.043 -0.017  0.007 -0.026 
##     34 
## -0.019
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter
## 
## Autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      0      1      2      3      4      5      6      7      8      9     10 
##  1.000 -0.476  0.003 -0.027  0.047 -0.047 -0.014  0.066 -0.071  0.050 -0.014 
##     11     12     13     14     15     16     17     18     19     20     21 
##  0.014 -0.057 -0.001  0.074 -0.022 -0.061  0.034  0.024 -0.018 -0.064  0.121 
##     22     23     24     25     26     27     28     29     30     31     32 
## -0.061 -0.010  0.046 -0.059  0.017  0.020  0.015 -0.053  0.015  0.024  0.007 
##     33     34 
## -0.037  0.009 
## 
## Call:
## arima(x = na.omit(new_df$quantity), order = c(5, 0, 0))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5  intercept
##       -0.7278  -0.5303  -0.3802  -0.2137  -0.1297     0.0018
## s.e.   0.0198   0.0243   0.0254   0.0243   0.0199     0.0130
## 
## sigma^2 estimated as 3.797:  log likelihood = -5244.53,  aic = 10503.07
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample

##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  quantity_diff
## Chi-squared = 447.61, df = 5, p-value < 2.2e-16
## 
## 
## Time series regression with "ts" data:
## Start = 2, End = 2509
## 
## Call:
## dynlm(formula = ehatsq ~ L(ehatsq) + L(ehatsq, k = -2) + L(ehatsq, 
##     k = -3) + L(ehatsq, k = -4) + L(ehatsq, k = -5), data = ehatsq)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.281  -2.803  -2.133  -0.440  82.064 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.315974   0.210114  11.022  < 2e-16 ***
## L(ehatsq)          0.207889   0.019408  10.712  < 2e-16 ***
## L(ehatsq, k = -2)  0.107728   0.020288   5.310 1.19e-07 ***
## L(ehatsq, k = -3)  0.062018   0.020597   3.011  0.00263 ** 
## L(ehatsq, k = -4)  0.016905   0.020630   0.819  0.41262    
## L(ehatsq, k = -5) -0.002132   0.020252  -0.105  0.91617    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.056 on 2502 degrees of freedom
## Multiple R-squared:  0.06811,    Adjusted R-squared:  0.06624 
## F-statistic: 36.57 on 5 and 2502 DF,  p-value: < 2.2e-16

final_function("D:/DipankarSir/DipankarSir/dataanalysis_/Marketwb.csv")
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Missing values detected. To allow DSA to work properly, missing values are imputed using the 'last observation carried forward' algorithm (zoo::na.locf). Leading NAs are removed.
## You might want to impute the series outside of this function.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================================| 100%
## Warning in adf.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data$seas_adj
## Dickey-Fuller = -11.331, Lag order = 16, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(data$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  data$seas_adj
## Dickey-Fuller Z(alpha) = -2733.4, Truncation lag parameter = 10,
## p-value = 0.01
## alternative hypothesis: stationary
## Warning in adf.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller = -6.5987, Lag order = 16, p-value = 0.01
## alternative hypothesis: stationary
## Warning in pp.test(quantity$seas_adj): p-value smaller than printed p-value
## 
##  Phillips-Perron Unit Root Test
## 
## data:  quantity$seas_adj
## Dickey-Fuller Z(alpha) = -2907.3, Truncation lag parameter = 10,
## p-value = 0.01
## alternative hypothesis: stationary

## # A tibble: 1 × 9
##    sumA sumAB collusionmeasure1 varA    varB  varAplusB varratio varAB
##   <dbl> <dbl>             <dbl> <chr>   <chr> <chr>     <chr>    <chr>
## 1 0.611 0.787             0.776 3.4e-08 4e-08 7.4e-08   0.46     2e-09
## # ℹ 1 more variable: collusionmeasure2 <dbl>
## 
## Call:
## lm(formula = quantity$seas_adj ~ data$seas_adj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
##  -6.676  -3.477  -2.740  -1.015 155.145 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.882558   0.148631  26.122  < 2e-16 ***
## data$seas_adj 0.008930   0.002667   3.348  0.00082 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.934 on 4211 degrees of freedom
## Multiple R-squared:  0.002655,   Adjusted R-squared:  0.002418 
## F-statistic: 11.21 on 1 and 4211 DF,  p-value: 0.0008202
## 
## 
## Call:
## lm(formula = log(quantity$seas_adj) ~ data$seas_adj)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.1325 -0.8744 -0.0079  0.8408  4.8014 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.2259081  0.0254403   8.880  < 2e-16 ***
## data$seas_adj 0.0020731  0.0004565   4.541 5.75e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.529 on 4211 degrees of freedom
## Multiple R-squared:  0.004874,   Adjusted R-squared:  0.004637 
## F-statistic: 20.62 on 1 and 4211 DF,  p-value: 5.745e-06
## 
## $best_model
## 
## Time series regression with "ts" data:
## Start = 12, End = 4213
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Coefficients:
##     (Intercept)   L(quantity, 1)   L(quantity, 2)   L(quantity, 3)  
##        -0.00357         -0.76453         -0.61442         -0.49434  
##  L(quantity, 4)   L(quantity, 5)   L(quantity, 6)   L(quantity, 7)  
##        -0.41705         -0.36631         -0.33225         -0.28288  
##  L(quantity, 8)   L(quantity, 9)  L(quantity, 10)            price  
##        -0.22535         -0.13269         -0.07577          0.07466  
##     L(price, 1)      L(price, 2)      L(price, 3)      L(price, 4)  
##        -0.08096         -0.82585          0.40747         -0.52027  
##     L(price, 5)      L(price, 6)      L(price, 7)      L(price, 8)  
##        -0.58606         -0.34992         -0.05045         -0.15107  
##     L(price, 9)     L(price, 10)  
##         0.42956          0.61028  
## 
## 
## $best_order
## quantity    price 
##       10       10 
## 
## $top_orders
##    quantity price      AIC
## 1        10    10 14128.28
## 2         9    10 14150.48
## 3         9     9 14153.55
## 4         8     2 14167.58
## 5         8     1 14168.12
## 6         8     3 14168.62
## 7         8     4 14170.03
## 8         8     5 14171.37
## 9         8     6 14173.09
## 10        8     7 14175.09
## 11        8     8 14176.98
## 12        7     2 14233.26
## 13        7     1 14233.90
## 14        7     3 14234.26
## 15        7     4 14235.96
## 16        7     5 14237.15
## 17        7     6 14238.93
## 18        7     7 14240.92
## 19        6    10 14286.16
## 20        6     9 14289.72
## 
## 
## Time series regression with "ts" data:
## Start = 12, End = 4213
## 
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start, 
##     end = end)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.9668 -0.6233  0.2033  0.8309  4.6228 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -0.00357    0.01999  -0.179    0.858    
## L(quantity, 1)  -0.76453    0.01542 -49.587  < 2e-16 ***
## L(quantity, 2)  -0.61442    0.01931 -31.811  < 2e-16 ***
## L(quantity, 3)  -0.49434    0.02124 -23.275  < 2e-16 ***
## L(quantity, 4)  -0.41705    0.02214 -18.835  < 2e-16 ***
## L(quantity, 5)  -0.36631    0.02248 -16.293  < 2e-16 ***
## L(quantity, 6)  -0.33225    0.02248 -14.779  < 2e-16 ***
## L(quantity, 7)  -0.28288    0.02214 -12.777  < 2e-16 ***
## L(quantity, 8)  -0.22535    0.02123 -10.616  < 2e-16 ***
## L(quantity, 9)  -0.13269    0.01931  -6.872 7.27e-12 ***
## L(quantity, 10) -0.07577    0.01542  -4.914 9.26e-07 ***
## price            0.07466    0.53333   0.140    0.889    
## L(price, 1)     -0.08096    0.53866  -0.150    0.881    
## L(price, 2)     -0.82585    0.53835  -1.534    0.125    
## L(price, 3)      0.40747    0.53852   0.757    0.449    
## L(price, 4)     -0.52027    0.53860  -0.966    0.334    
## L(price, 5)     -0.58606    0.53856  -1.088    0.277    
## L(price, 6)     -0.34992    0.53864  -0.650    0.516    
## L(price, 7)     -0.05044    0.53835  -0.094    0.925    
## L(price, 8)     -0.15107    0.53821  -0.281    0.779    
## L(price, 9)      0.42956    0.53821   0.798    0.425    
## L(price, 10)     0.61028    0.53108   1.149    0.251    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.296 on 4180 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3734, Adjusted R-squared:  0.3703 
## F-statistic: 118.6 on 21 and 4180 DF,  p-value: < 2.2e-16
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter

## 
## Partial autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      1      2      3      4      5      6      7      8      9     10     11 
## -0.466 -0.297 -0.199 -0.145 -0.117 -0.117 -0.112 -0.124 -0.075 -0.075 -0.070 
##     12     13     14     15     16     17     18     19     20     21     22 
## -0.090 -0.081 -0.031 -0.060 -0.078 -0.039 -0.020 -0.046 -0.016 -0.024  0.010 
##     23     24     25     26     27     28     29     30     31     32     33 
## -0.030 -0.070 -0.077 -0.070 -0.079 -0.071 -0.092 -0.046 -0.060 -0.029  0.004 
##     34     35     36 
## -0.004  0.023  0.007
## Warning in plot.window(...): "title" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "title" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "title" is not a
## graphical parameter
## Warning in box(...): "title" is not a graphical parameter
## Warning in title(...): "title" is not a graphical parameter
## 
## Autocorrelations of series 'na.omit(new_df$quantity)', by lag
## 
##      0      1      2      3      4      5      6      7      8      9     10 
##  1.000 -0.466 -0.016  0.006 -0.004 -0.007 -0.015 -0.002 -0.012  0.032 -0.019 
##     11     12     13     14     15     16     17     18     19     20     21 
##  0.002 -0.014  0.011  0.028 -0.037 -0.002  0.031 -0.002 -0.025  0.027 -0.017 
##     22     23     24     25     26     27     28     29     30     31     32 
##  0.023 -0.036 -0.014  0.011  0.008 -0.006  0.008 -0.013  0.037 -0.021  0.017 
##     33     34     35     36 
##  0.007 -0.016  0.017 -0.023 
## 
## Call:
## arima(x = na.omit(new_df$quantity), order = c(5, 0, 0))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5  intercept
##       -0.7085  -0.5120  -0.3505  -0.2254  -0.1170     -9e-04
## s.e.   0.0153   0.0185   0.0194   0.0185   0.0153      7e-03
## 
## sigma^2 estimated as 1.766:  log likelihood = -7174.58,  aic = 14363.17
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample

##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  quantity_diff
## Chi-squared = 557.7, df = 5, p-value < 2.2e-16
## 
## 
## Time series regression with "ts" data:
## Start = 2, End = 4207
## 
## Call:
## dynlm(formula = ehatsq ~ L(ehatsq) + L(ehatsq, k = -2) + L(ehatsq, 
##     k = -3) + L(ehatsq, k = -4) + L(ehatsq, k = -5), data = ehatsq)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.516 -1.534 -1.122  0.036 60.606 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1.485611   0.080822  18.381  < 2e-16 ***
## L(ehatsq)         0.048064   0.015400   3.121  0.00181 ** 
## L(ehatsq, k = -2) 0.041418   0.015425   2.685  0.00728 ** 
## L(ehatsq, k = -3) 0.026571   0.015431   1.722  0.08517 .  
## L(ehatsq, k = -4) 0.007893   0.015441   0.511  0.60928    
## L(ehatsq, k = -5) 0.035668   0.015421   2.313  0.02077 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.738 on 4200 degrees of freedom
## Multiple R-squared:  0.006697,   Adjusted R-squared:  0.005515 
## F-statistic: 5.664 on 5 and 4200 DF,  p-value: 3.277e-05