WindSpeed Data Analysis

r = getOption("repos")
r["CRAN"] = "http://cran.us.r-project.org"
options(repos = r)

install.packages('dgof')

## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)

## 
## The downloaded binary packages are in
##  /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages

library(dgof)

## 
## Attaching package: 'dgof'

## The following object is masked from 'package:stats':
## 
##     ks.test

library(sjPlot)

## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!

library('dplyr')

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library('mlbench')
library('caret')

## Loading required package: lattice

## Loading required package: ggplot2

library('corrplot')

## corrplot 0.84 loaded

library('ggplot2')
library('GGally')

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

library('BAS')
library('MASS')

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

library('car')

## Loading required package: carData

## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library('readr')
install.packages('arm')

## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)

## 
## The downloaded binary packages are in
##  /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages

library('arm')

## Loading required package: Matrix

## Loading required package: lme4

## 
## arm (Version 1.11-2, built: 2020-7-27)

## Working directory is /Users/vincenttanoe/Desktop/statistics_Engineering

## 
## Attaching package: 'arm'

## The following object is masked from 'package:car':
## 
##     logit

## The following object is masked from 'package:BAS':
## 
##     bayesglm.fit

## The following object is masked from 'package:corrplot':
## 
##     corrplot

install.packages('MCMCpack')

## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)

## 
## The downloaded binary packages are in
##  /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages

library('MCMCpack')

## Loading required package: coda

## 
## Attaching package: 'coda'

## The following object is masked from 'package:arm':
## 
##     traceplot

## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)

## ## Copyright (C) 2003-2021 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park

## ##
## ## Support provided by the U.S. National Science Foundation

## ## (Grants SES-0350646 and SES-0350613)
## ##

#ws=read_csv('ws.csv')
#attach(ws)

wind=read_csv('winds.csv')

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   `Bear Creek` = col_double(),
##   `Frey Farm` = col_double(),
##   `Criterion Wind Park` = col_double(),
##   NedPower = col_double(),
##   Humboldt = col_double(),
##   LocustRidge = col_double(),
##   `Roth Rock` = col_double(),
##   Talbot = col_double(),
##   Mountaineer = col_double(),
##   BuffaloMountain = col_double(),
##   BitWorks = col_double(),
##   MtPeakUtility = col_double(),
##   Anacacho = col_double(),
##   DryLake = col_double(),
##   Kingman = col_double()
## )

med=read_csv('medium.csv')

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   BearCreek = col_double(),
##   FreyFarm = col_double(),
##   CriterionWP = col_double(),
##   NedPower = col_double(),
##   Humboldt = col_double(),
##   LocustRidge = col_double(),
##   RothRock = col_double(),
##   Talbot = col_double(),
##   Mountaineer = col_double(),
##   BuffaloMountain = col_double(),
##   BitWorks = col_double(),
##   MtPeakUtility = col_double(),
##   Anacacho = col_double(),
##   DryLake = col_double(),
##   Kingman = col_double()
## )

small=read_csv('small.csv')

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   BearCreek = col_double(),
##   FreyFarm = col_double(),
##   CreterionWP = col_double(),
##   NedPower = col_double(),
##   Humboldt = col_double(),
##   LocustRidge = col_double(),
##   RothRock = col_double(),
##   Talbot = col_double(),
##   Mountaineer = col_double(),
##   BuffaloMountain = col_double(),
##   BitWorks = col_double(),
##   MtPeakUtility = col_double(),
##   Anacacho = col_double(),
##   DryLake = col_double(),
##   Kingman = col_double()
## )

attach(wind)
names(wind)

##  [1] "Bear Creek"          "Frey Farm"           "Criterion Wind Park"
##  [4] "NedPower"            "Humboldt"            "LocustRidge"        
##  [7] "Roth Rock"           "Talbot"              "Mountaineer"        
## [10] "BuffaloMountain"     "BitWorks"            "MtPeakUtility"      
## [13] "Anacacho"            "DryLake"             "Kingman"

attach(med)

## The following objects are masked from wind:
## 
##     Anacacho, BitWorks, BuffaloMountain, DryLake, Humboldt, Kingman,
##     LocustRidge, Mountaineer, MtPeakUtility, NedPower, Talbot

attach(small)

## The following objects are masked from med:
## 
##     Anacacho, BearCreek, BitWorks, BuffaloMountain, DryLake, FreyFarm,
##     Humboldt, Kingman, LocustRidge, Mountaineer, MtPeakUtility,
##     NedPower, RothRock, Talbot

## The following objects are masked from wind:
## 
##     Anacacho, BitWorks, BuffaloMountain, DryLake, Humboldt, Kingman,
##     LocustRidge, Mountaineer, MtPeakUtility, NedPower, Talbot

names(med)

##  [1] "BearCreek"       "FreyFarm"        "CriterionWP"     "NedPower"       
##  [5] "Humboldt"        "LocustRidge"     "RothRock"        "Talbot"         
##  [9] "Mountaineer"     "BuffaloMountain" "BitWorks"        "MtPeakUtility"  
## [13] "Anacacho"        "DryLake"         "Kingman"

names(small)

##  [1] "BearCreek"       "FreyFarm"        "CreterionWP"     "NedPower"       
##  [5] "Humboldt"        "LocustRidge"     "RothRock"        "Talbot"         
##  [9] "Mountaineer"     "BuffaloMountain" "BitWorks"        "MtPeakUtility"  
## [13] "Anacacho"        "DryLake"         "Kingman"

str(med)

## tibble [365 × 15] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ BearCreek      : num [1:365] 9.1 11.28 11.73 7.87 7.64 ...
##  $ FreyFarm       : num [1:365] 7.94 10.36 10.26 5.26 5.86 ...
##  $ CriterionWP    : num [1:365] 15.5 14.4 14.1 12.7 12.7 ...
##  $ NedPower       : num [1:365] 12.9 13.5 13.1 11.8 12.8 ...
##  $ Humboldt       : num [1:365] 9.8 12.13 11.18 7.77 8.63 ...
##  $ LocustRidge    : num [1:365] 5.41 10.08 9.74 3.89 5.1 ...
##  $ RothRock       : num [1:365] 12.8 12.1 11 10.2 10.8 ...
##  $ Talbot         : num [1:365] 7.68 11.22 10.48 6.16 6.64 ...
##  $ Mountaineer    : num [1:365] 12.2 14.6 18 14.8 17.1 ...
##  $ BuffaloMountain: num [1:365] 14.59 13.2 9.72 9.72 8.35 ...
##  $ BitWorks       : num [1:365] 9.95 8.94 8.02 6.23 6.74 9.71 6.98 5.36 5.07 4.25 ...
##  $ MtPeakUtility  : num [1:365] 10.6 6.85 8.24 7.89 3.96 11 7.34 4.89 9.24 8.39 ...
##  $ Anacacho       : num [1:365] 9.2 4.97 6.3 5.7 4.33 ...
##  $ DryLake        : num [1:365] 2.76 3.12 2.51 4.14 1.34 1.98 9.2 9.68 5.24 2.12 ...
##  $ Kingman        : num [1:365] 11.73 9.26 3.97 10.44 2.76 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   BearCreek = col_double(),
##   ..   FreyFarm = col_double(),
##   ..   CriterionWP = col_double(),
##   ..   NedPower = col_double(),
##   ..   Humboldt = col_double(),
##   ..   LocustRidge = col_double(),
##   ..   RothRock = col_double(),
##   ..   Talbot = col_double(),
##   ..   Mountaineer = col_double(),
##   ..   BuffaloMountain = col_double(),
##   ..   BitWorks = col_double(),
##   ..   MtPeakUtility = col_double(),
##   ..   Anacacho = col_double(),
##   ..   DryLake = col_double(),
##   ..   Kingman = col_double()
##   .. )

Features selections

set.seed(7)
correlationMatrix <- cor(wind)
highlyCorrelated <- findCorrelation(correlationMatrix,cutoff = 0.5)
print(highlyCorrelated)

## [1]  5  4  3  7  2  1  9  6 12

Correlation plot

This is a scatter plot

data1=dplyr::select(wind,c(5,4,3,7,2,1,9,6,12))
med1=dplyr::select(med,c(5,4,3,7,2,1,9,6,12))
small1=dplyr::select(small,c(5,4,3,7,2,1,9,6,12))
summary(med1)

##     Humboldt         NedPower       CriterionWP       RothRock     
##  Min.   : 1.820   Min.   : 1.960   Min.   : 2.26   Min.   : 1.600  
##  1st Qu.: 5.630   1st Qu.: 5.130   1st Qu.: 5.40   1st Qu.: 4.720  
##  Median : 7.300   Median : 7.100   Median : 7.46   Median : 6.490  
##  Mean   : 7.656   Mean   : 7.664   Mean   : 8.23   Mean   : 7.046  
##  3rd Qu.: 9.300   3rd Qu.: 9.790   3rd Qu.:10.72   3rd Qu.: 8.940  
##  Max.   :17.500   Max.   :17.900   Max.   :21.31   Max.   :16.490  
##     FreyFarm        BearCreek       Mountaineer      LocustRidge    
##  Min.   : 1.000   Min.   : 2.140   Min.   : 2.170   Min.   : 1.190  
##  1st Qu.: 3.850   1st Qu.: 5.570   1st Qu.: 5.270   1st Qu.: 3.720  
##  Median : 5.320   Median : 7.370   Median : 8.100   Median : 5.000  
##  Mean   : 5.732   Mean   : 7.665   Mean   : 8.755   Mean   : 5.408  
##  3rd Qu.: 7.170   3rd Qu.: 9.210   3rd Qu.:11.120   3rd Qu.: 6.730  
##  Max.   :14.340   Max.   :18.770   Max.   :25.760   Max.   :18.870  
##  MtPeakUtility   
##  Min.   : 2.080  
##  1st Qu.: 5.860  
##  Median : 7.590  
##  Mean   : 7.732  
##  3rd Qu.: 9.630  
##  Max.   :15.180

ggpairs(data1)

head(data1)

Random Forest for data selection

#prepare training scheme
#control <- trainControl(method="repeatedcv", number=10, repeats=3)
#train the model
#data(data1)
#model <- train(Humboldt~.,data=data1, preProcess="scale", trControl=control)
#estimate variable importance
#importance <- varImp(model,scale=FALSE)
#summary importance
#print(importance)
#plot(importance)

#install.packages("randomForest")
# Load the library 
#library(randomForest) 
#reg.rf <- randomForest(Humboldt ~ ., data = data1, mtry = 3, 
                         #importance = TRUE, na.action = na.omit)
# Print regression model 
#print(reg.rf)
#Output to be present as PNG file  
#png(file = "randomForestRegression.png") 
  
# Plot the error vs the number of trees graph 
#plot(reg.rf)

#plot(reg.rf)

Run the OLS regression

data1=dplyr::rename(data1,CriterionWP=`Criterion Wind Park`,RothRock=`Roth Rock`,FreyFarm=`Frey Farm`,
                    BearCreek=`Bear Creek`)
data3=dplyr::select(data1,c(1,5,6,7,8,9)) 
med3=dplyr::select(med1,c(1,5,6,7,8,9)) 
small3=dplyr::select(small1,c(1,5,6,7,8,9))

WIND SPEED DATA

ols=lm(Humboldt~.,data=data1)
summary(ols)

## 
## Call:
## lm(formula = Humboldt ~ ., data = data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.5601 -0.8547 -0.0311  0.8230 11.8557 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.053853   0.050258   1.072 0.283963    
## NedPower      0.040274   0.011893   3.386 0.000712 ***
## CriterionWP   0.070946   0.009027   7.859 4.33e-15 ***
## RothRock      0.034915   0.012041   2.900 0.003745 ** 
## FreyFarm      0.199217   0.007607  26.187  < 2e-16 ***
## BearCreek     0.594771   0.007316  81.301  < 2e-16 ***
## Mountaineer   0.009259   0.004750   1.949 0.051309 .  
## LocustRidge   0.075561   0.007489  10.090  < 2e-16 ***
## MtPeakUtility 0.035300   0.004518   7.813 6.22e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.443 on 8751 degrees of freedom
## Multiple R-squared:  0.838,  Adjusted R-squared:  0.8379 
## F-statistic:  5660 on 8 and 8751 DF,  p-value: < 2.2e-16

vif(ols)

##      NedPower   CriterionWP      RothRock      FreyFarm     BearCreek 
##      9.235150      6.703096      8.845728      2.691293      2.988862 
##   Mountaineer   LocustRidge MtPeakUtility 
##      2.787867      2.662189      1.041701

ols=lm(Humboldt~NedPower+CriterionWP+RothRock+Mountaineer+FreyFarm+LocustRidge+BearCreek+MtPeakUtility,data=data1)
model1=vif(ols)
ols3=lm(Humboldt~FreyFarm+LocustRidge+BearCreek+MtPeakUtility,data=data1)
model2=vif(ols3)
ols4=lm(Humboldt~BearCreek+MtPeakUtility,data=data1)
model3=vif(ols4)
tab_model(ols,ols3,ols4)

	Humboldt			Humboldt			Humboldt
Predictors	Estimates	CI	p	Estimates	CI	p	Estimates	CI	p
(Intercept)	0.05	-0.04 – 0.15	0.284	0.44	0.34 – 0.54	<0.001	0.67	0.56 – 0.78	<0.001
NedPower	0.04	0.02 – 0.06	0.001
CriterionWP	0.07	0.05 – 0.09	<0.001
RothRock	0.03	0.01 – 0.06	0.004
Mountaineer	0.01	-0.00 – 0.02	0.051
FreyFarm	0.20	0.18 – 0.21	<0.001	0.29	0.27 – 0.30	<0.001
LocustRidge	0.08	0.06 – 0.09	<0.001	0.06	0.04 – 0.07	<0.001
BearCreek	0.59	0.58 – 0.61	<0.001	0.64	0.62 – 0.65	<0.001	0.87	0.86 – 0.87	<0.001
MtPeakUtility	0.04	0.03 – 0.04	<0.001	0.05	0.04 – 0.06	<0.001	0.05	0.04 – 0.06	<0.001
Observations	8760			8760			8760
R² / R² adjusted	0.838 / 0.838			0.819 / 0.819			0.780 / 0.780

c(model1,model2,model3)

##      NedPower   CriterionWP      RothRock   Mountaineer      FreyFarm 
##      9.235150      6.703096      8.845728      2.787867      2.691293 
##   LocustRidge     BearCreek MtPeakUtility      FreyFarm   LocustRidge 
##      2.662189      2.988862      1.041701      2.316618      2.586382 
##     BearCreek MtPeakUtility     BearCreek MtPeakUtility 
##      2.881032      1.027176      1.003187      1.003187

SMALL DATA

sols=lm(Humboldt~.,data=small1)
summary(sols)

## 
## Call:
## lm(formula = Humboldt ~ ., data = small1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.97372 -0.14432  0.00986  0.15047  1.22829 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.12313    0.35776   0.344  0.73235    
## NedPower       0.32475    0.20596   1.577  0.12201    
## CreterionWP   -0.07638    0.15823  -0.483  0.63170    
## RothRock       0.25802    0.24004   1.075  0.28828    
## FreyFarm       0.28128    0.13365   2.105  0.04108 *  
## BearCreek      0.56046    0.09204   6.089  2.5e-07 ***
## Mountaineer   -0.18399    0.05974  -3.080  0.00356 ** 
## LocustRidge   -0.02244    0.13767  -0.163  0.87126    
## MtPeakUtility -0.03898    0.04491  -0.868  0.39004    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.399 on 44 degrees of freedom
## Multiple R-squared:  0.9604, Adjusted R-squared:  0.9532 
## F-statistic: 133.2 on 8 and 44 DF,  p-value: < 2.2e-16

vif_medium=vif(sols)

MEDIUM DATA

mols=lm(Humboldt~.,data=med1)
summary(mols)

## 
## Call:
## lm(formula = Humboldt ~ ., data = med1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2551 -0.3749 -0.0234  0.3360  3.3637 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.13538    0.13909  -0.973   0.3311    
## NedPower      -0.04309    0.05903  -0.730   0.4659    
## CriterionWP    0.03442    0.04117   0.836   0.4036    
## RothRock       0.14551    0.06032   2.412   0.0164 *  
## FreyFarm       0.28445    0.03601   7.900 3.51e-14 ***
## BearCreek      0.68473    0.02916  23.479  < 2e-16 ***
## Mountaineer   -0.01227    0.01825  -0.672   0.5018    
## LocustRidge   -0.03041    0.03430  -0.887   0.3758    
## MtPeakUtility  0.02655    0.01367   1.943   0.0528 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6359 on 356 degrees of freedom
## Multiple R-squared:  0.9509, Adjusted R-squared:  0.9498 
## F-statistic: 861.8 on 8 and 356 DF,  p-value: < 2.2e-16

vif(mols)

##      NedPower   CriterionWP      RothRock      FreyFarm     BearCreek 
##     34.304333     21.533175     31.992690      6.938840      6.205030 
##   Mountaineer   LocustRidge MtPeakUtility 
##      6.241221      6.705090      1.087326

Run the model without the highest Variance(NedPower,Criterion Wind Park, Roth Roth) LARGE

ols1=lm(Humboldt~.,data=data3)
summary(ols1)

## 
## Call:
## lm(formula = Humboldt ~ ., data = data3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.1492  -0.8924  -0.0461   0.8364  11.7037 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.308617   0.051008   6.050 1.50e-09 ***
## FreyFarm      0.241145   0.007685  31.380  < 2e-16 ***
## BearCreek     0.613482   0.007533  81.444  < 2e-16 ***
## Mountaineer   0.071843   0.003762  19.095  < 2e-16 ***
## LocustRidge   0.048212   0.007668   6.288 3.38e-10 ***
## MtPeakUtility 0.048244   0.004651  10.372  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.495 on 8754 degrees of freedom
## Multiple R-squared:  0.826,  Adjusted R-squared:  0.8259 
## F-statistic:  8312 on 5 and 8754 DF,  p-value: < 2.2e-16

vif(ols1)

##      FreyFarm     BearCreek   Mountaineer   LocustRidge MtPeakUtility 
##      2.557069      2.950426      1.628348      2.598426      1.028080

Run the model without the highest Variance(NedPower,Criterion Wind Park, Roth Roth) MEDIUM

mols1=lm(Humboldt~.,data=med3)
summary(mols1)

## 
## Call:
## lm(formula = Humboldt ~ ., data = med3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0242 -0.4047 -0.0294  0.3583  3.6234 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.04868    0.14946  -0.326 0.744848    
## FreyFarm       0.37677    0.03637  10.359  < 2e-16 ***
## BearCreek      0.72013    0.03086  23.336  < 2e-16 ***
## Mountaineer    0.03962    0.01275   3.108 0.002033 ** 
## LocustRidge   -0.12235    0.03395  -3.604 0.000358 ***
## MtPeakUtility  0.04389    0.01459   3.009 0.002808 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6895 on 359 degrees of freedom
## Multiple R-squared:  0.9418, Adjusted R-squared:  0.941 
## F-statistic:  1162 on 5 and 359 DF,  p-value: < 2.2e-16

vif(mols1)

##      FreyFarm     BearCreek   Mountaineer   LocustRidge MtPeakUtility 
##      6.022470      5.910097      2.589966      5.589773      1.054012

Run the model without the highest Variance(NedPower,Criterion Wind Park, Roth Roth) SMALL

sols1=lm(Humboldt~.,data=small3)
summary(sols1)

## 
## Call:
## lm(formula = Humboldt ~ ., data = small3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6543 -0.2844 -0.1062  0.1833  3.3381 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.14680    0.53405   0.275  0.78462    
## FreyFarm       0.51857    0.17632   2.941  0.00506 ** 
## BearCreek      0.68555    0.13434   5.103 5.93e-06 ***
## Mountaineer    0.12310    0.06267   1.964  0.05542 .  
## LocustRidge   -0.37257    0.17288  -2.155  0.03631 *  
## MtPeakUtility  0.03543    0.06667   0.531  0.59759    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.609 on 47 degrees of freedom
## Multiple R-squared:  0.9013, Adjusted R-squared:  0.8908 
## F-statistic: 85.88 on 5 and 47 DF,  p-value: < 2.2e-16

vif(sols1)

##      FreyFarm     BearCreek   Mountaineer   LocustRidge MtPeakUtility 
##      9.070087      7.991427      4.220850      8.827098      1.196373

ONLY 4 independent variables Large

data2=dplyr::select(data1,c(1,6,9)) 
#data2=dplyr::rename(data2,BearCreek=`Bear Creek`)
ols2=lm(Humboldt~.,data=data2)
summary(ols2)

## 
## Call:
## lm(formula = Humboldt ~ ., data = data2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.1490  -1.0214  -0.1376   0.9528  13.1351 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.671202   0.056393  11.902   <2e-16 ***
## BearCreek     0.865158   0.004938 175.194   <2e-16 ***
## MtPeakUtility 0.045658   0.005166   8.838   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.681 on 8757 degrees of freedom
## Multiple R-squared:   0.78,  Adjusted R-squared:  0.7799 
## F-statistic: 1.552e+04 on 2 and 8757 DF,  p-value: < 2.2e-16

#ols_plot_obs_fit(ols2)
vif(ols2)

##     BearCreek MtPeakUtility 
##      1.003187      1.003187

ONLY 4 independent variables Medium

med2=dplyr::select(med1,c(1,6,9)) 
mols2=lm(Humboldt~.,data=med2)
summary(mols2)

## 
## Call:
## lm(formula = Humboldt ~ ., data = med2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6026 -0.5450 -0.0297  0.5244  3.8371 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.03445    0.18288  -0.188  0.85068    
## BearCreek      0.94960    0.01555  61.062  < 2e-16 ***
## MtPeakUtility  0.05315    0.01741   3.053  0.00243 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8444 on 362 degrees of freedom
## Multiple R-squared:  0.912,  Adjusted R-squared:  0.9115 
## F-statistic:  1875 on 2 and 362 DF,  p-value: < 2.2e-16

vif(mols2)

##     BearCreek MtPeakUtility 
##      1.000682      1.000682

ONLY 4 independent variables Small

install.packages("olsrr")

## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)

## 
## The downloaded binary packages are in
##  /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages

library(olsrr)

## 
## Attaching package: 'olsrr'

## The following object is masked from 'package:MASS':
## 
##     cement

## The following object is masked from 'package:datasets':
## 
##     rivers

small2=dplyr::select(small1,c(1,6,9)) 
sols2=lm(Humboldt~.,data=small2)
summary(sols2)

## 
## Call:
## lm(formula = Humboldt ~ ., data = small2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1838 -0.3123 -0.0293  0.2213  3.7773 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.11732    0.58968  -0.199    0.843    
## BearCreek      0.94540    0.05634  16.779   <2e-16 ***
## MtPeakUtility  0.07615    0.07227   1.054    0.297    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6814 on 50 degrees of freedom
## Multiple R-squared:  0.8686, Adjusted R-squared:  0.8633 
## F-statistic: 165.3 on 2 and 50 DF,  p-value: < 2.2e-16

ols_plot_obs_fit(sols2)

#vif(sols2)

Evaluation of the correlation

#install.packages('corrplot')
#ggpairs(data2)
install.packages("sjPlot")

## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)

## 
## The downloaded binary packages are in
##  /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages

install.packages("sjmisc")

## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)

## 
## The downloaded binary packages are in
##  /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages

install.packages("sjlabelled")

## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)

## 
## The downloaded binary packages are in
##  /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages

library(sjPlot)
library(sjmisc)

## Learn more about sjmisc with 'browseVignettes("sjmisc")'.

library(sjlabelled)

## 
## Attaching package: 'sjlabelled'

## The following objects are masked from 'package:sjmisc':
## 
##     to_character, to_factor, to_label, to_numeric

## The following object is masked from 'package:dplyr':
## 
##     as_label

tab_model(ols2,mols2,sols2)

	Humboldt			Humboldt			Humboldt
Predictors	Estimates	CI	p	Estimates	CI	p	Estimates	CI	p
(Intercept)	0.67	0.56 – 0.78	<0.001	-0.03	-0.39 – 0.33	0.851	-0.12	-1.30 – 1.07	0.843
BearCreek	0.87	0.86 – 0.87	<0.001	0.95	0.92 – 0.98	<0.001	0.95	0.83 – 1.06	<0.001
MtPeakUtility	0.05	0.04 – 0.06	<0.001	0.05	0.02 – 0.09	0.002	0.08	-0.07 – 0.22	0.297
Observations	8760			365			53
R² / R² adjusted	0.780 / 0.780			0.912 / 0.911			0.869 / 0.863

tab_model(ols2,mols2,sols2)

	Humboldt			Humboldt			Humboldt
Predictors	Estimates	CI	p	Estimates	CI	p	Estimates	CI	p
(Intercept)	0.67	0.56 – 0.78	<0.001	-0.03	-0.39 – 0.33	0.851	-0.12	-1.30 – 1.07	0.843
BearCreek	0.87	0.86 – 0.87	<0.001	0.95	0.92 – 0.98	<0.001	0.95	0.83 – 1.06	<0.001
MtPeakUtility	0.05	0.04 – 0.06	<0.001	0.05	0.02 – 0.09	0.002	0.08	-0.07 – 0.22	0.297
Observations	8760			365			53
R² / R² adjusted	0.780 / 0.780			0.912 / 0.911			0.869 / 0.863

Bayesian Model #https://bayesball.github.io/BOOK/simple-linear-regression.html#a-simple-linear-regression-model#

bayGLM=arm::bayesglm(Humboldt~.,data=data2,
                prior.mean=0,prior.scale=Inf, prior.df=Inf)
summary(bayGLM)

## 
## Call:
## arm::bayesglm(formula = Humboldt ~ ., data = data2, prior.mean = 0, 
##     prior.scale = Inf, prior.df = Inf)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -11.1490   -1.0214   -0.1376    0.9528   13.1351  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.671201   0.056393  11.902   <2e-16 ***
## BearCreek     0.865158   0.004938 175.194   <2e-16 ***
## MtPeakUtility 0.045658   0.005166   8.838   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 2.826083)
## 
##     Null deviance: 112482  on 8759  degrees of freedom
## Residual deviance:  24748  on 8757  degrees of freedom
## AIC: 33965
## 
## Number of Fisher Scoring iterations: 4

#ols_plot_obs_fit(bayGLM)

Simulation of the posterior

posterior=coef(sim(bayGLM))
head(posterior,10)

##       (Intercept) BearCreek MtPeakUtility
##  [1,]   0.6565612 0.8613387    0.05033214
##  [2,]   0.5876950 0.8688189    0.04876587
##  [3,]   0.7173126 0.8603945    0.04874678
##  [4,]   0.7791422 0.8684534    0.03260271
##  [5,]   0.6654422 0.8662332    0.04585529
##  [6,]   0.6296822 0.8673664    0.04620767
##  [7,]   0.5975143 0.8672432    0.04851312
##  [8,]   0.5938396 0.8721629    0.05008377
##  [9,]   0.5371307 0.8705376    0.05620814
## [10,]   0.6441717 0.8677984    0.04440694

MCMC regression

modelmcm=MCMCregress(Humboldt~.,data=data2,family=gaussian,
                     burnin=1000,mcmc=1000,thin=1,
                     verbose=0)
summary(modelmcm)

## 
## Iterations = 1001:2000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 1000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                 Mean       SD  Naive SE Time-series SE
## (Intercept)   0.6754 0.055516 0.0017556      0.0015056
## BearCreek     0.8649 0.004737 0.0001498      0.0001498
## MtPeakUtility 0.0455 0.005229 0.0001653      0.0001511
## sigma2        2.8262 0.043548 0.0013771      0.0013771
## 
## 2. Quantiles for each variable:
## 
##                  2.5%     25%     50%    75%   97.5%
## (Intercept)   0.56712 0.63751 0.67503 0.7134 0.78038
## BearCreek     0.85498 0.86171 0.86494 0.8681 0.87383
## MtPeakUtility 0.03538 0.04199 0.04562 0.0490 0.05596
## sigma2        2.73911 2.79707 2.82646 2.8573 2.91005

plot(modelmcm,col='blue',ask=FALSE)

KS TEST GRAPH

#https://www.itl.nist.gov/div898/handbook/eda/section3/eda35g.htm
sample1<-data1$Humboldt
sample2<-data1$BearCreek 
group <- c(rep("sample1", length(sample1)), rep("sample2", length(sample2)))
dat <- data.frame(KSD = c(sample1,sample2), group = group)
# create ECDF of data
cdf1 <- ecdf(sample1) 
cdf2 <- ecdf(sample2)  
# find min and max statistics to draw line between points of greatest distance
minMax <- seq(min(sample1,sample2), max(sample1,sample2), length.out=length(sample1)) 
x0 <- minMax[which( abs(cdf1(minMax) - cdf2(minMax)) == max(abs(cdf1(minMax) - cdf2(minMax))) )] 
y0 <- cdf1(x0) 
y1 <- cdf2(x0) 
plot(cdf1, verticals=TRUE, do.points=FALSE, col="blue", main ='Humboldt vs BearCreek') 
plot(cdf2, verticals=TRUE, do.points=FALSE, col="green", add=TRUE,) 
## alternatine, use standard R plot of ecdf 
#plot(f.a, col="blue") 
#lines(f.b, col="green") 

points(c(x0, x0), c(y0, y1), pch=16, col="red") 
segments(x0, y0, x0, y1, col="red", lty="dotted")

Grap the MCMC posterior results

par(mfrow=c(2,2))
plot(modelmcm,trace = TRUE, density = TRUE, smooth = TRUE,
        auto.layout = FALSE, ask = TRUE, col='blue',pch=10)

LASSO REGRESSION

library(lars)

## Loaded lars 1.2

x=model.matrix(Humboldt~.,data=data1)
x=x[,-1]
lasso=lars(x=x,y=data1$Humboldt,trace=TRUE)

## LASSO sequence
## Computing X'X .....
## LARS Step 1 :     Variable 5     added
## LARS Step 2 :     Variable 4     added
## LARS Step 3 :     Variable 1     added
## LARS Step 4 :     Variable 2     added
## LARS Step 5 :     Variable 7     added
## LARS Step 6 :     Variable 3     added
## LARS Step 7 :     Variable 6     added
## LARS Step 8 :     Variable 8     added
## Computing residuals, RSS etc .....

lasso

## 
## Call:
## lars(x = x, y = data1$Humboldt, trace = TRUE)
## R-squared: 0.838 
## Sequence of LASSO moves:
##      BearCreek FreyFarm NedPower CriterionWP LocustRidge RothRock Mountaineer
## Var          5        4        1           2           7        3           6
## Step         1        2        3           4           5        6           7
##      MtPeakUtility
## Var              8
## Step             8

plot(lasso)

coef(lasso,s=c(.25,.50,0.75,1.0),mode="fraction")

##        NedPower CriterionWP   RothRock   FreyFarm BearCreek Mountaineer
## [1,] 0.00000000  0.00000000 0.00000000 0.00000000 0.2649701 0.000000000
## [2,] 0.00000000  0.00000000 0.00000000 0.07629533 0.4603191 0.000000000
## [3,] 0.04621248  0.02207510 0.00000000 0.17034866 0.5627058 0.000000000
## [4,] 0.04027427  0.07094643 0.03491481 0.19921720 0.5947707 0.009259155
##      LocustRidge MtPeakUtility
## [1,]  0.00000000    0.00000000
## [2,]  0.00000000    0.00000000
## [3,]  0.00000000    0.00000000
## [4,]  0.07556118    0.03529959

##cross-validation
lars::cv.lars(x=x,y=data1$Humboldt)

NON BAYESIAN AND BAYESIAN REGRESSION LARGE

#linear mix effects models lm24
priors=brms::get_prior(Humboldt ~.,data=data2,
          family = Gamma('identity'))
summary(priors)

##     prior              class               coef              group          
##  Length:5           Length:5           Length:5           Length:5          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      resp               dpar              nlpar              bound          
##  Length:5           Length:5           Length:5           Length:5          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##     source         
##  Length:5          
##  Class :character  
##  Mode  :character

#Baysian Model
fit=brms::brm(Humboldt~.,data=data2)

## Compiling Stan program...

## Start sampling

## 
## SAMPLING FOR MODEL '6e03d2e53663040a84fd0ec04120312a' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 3.9e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.39 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.150075 seconds (Warm-up)
## Chain 1:                0.135171 seconds (Sampling)
## Chain 1:                0.285246 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL '6e03d2e53663040a84fd0ec04120312a' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 3.9e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.39 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.146732 seconds (Warm-up)
## Chain 2:                0.160795 seconds (Sampling)
## Chain 2:                0.307527 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL '6e03d2e53663040a84fd0ec04120312a' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 5.6e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.56 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.152121 seconds (Warm-up)
## Chain 3:                0.156307 seconds (Sampling)
## Chain 3:                0.308428 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL '6e03d2e53663040a84fd0ec04120312a' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 5.5e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.55 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.147925 seconds (Warm-up)
## Chain 4:                0.123424 seconds (Sampling)
## Chain 4:                0.271349 seconds (Total)
## Chain 4:

summary(fit)

##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: Humboldt ~ BearCreek + MtPeakUtility 
##    Data: data2 (Number of observations: 8760) 
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup samples = 4000
## 
## Population-Level Effects: 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept         0.67      0.06     0.56     0.79 1.00     4810     3152
## BearCreek         0.87      0.00     0.86     0.87 1.00     5116     3022
## MtPeakUtility     0.05      0.01     0.04     0.06 1.00     4945     3181
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     1.68      0.01     1.66     1.71 1.00     4205     3193
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

plot(fit)

mod = brms::brm(
Humboldt~., data = data2,
prior = brms::set_prior('uniform(-1, 1)'), iter = 2000,
chains = 4,
cores = 4
)

## Warning: It appears as if you have specified a lower bounded prior on a parameter that has no natural lower bound.
## If this is really what you want, please specify argument 'lb' of 'set_prior' appropriately.
## Warning occurred for prior 
## b ~ uniform(-1, 1)

## Warning: It appears as if you have specified an upper bounded prior on a parameter that has no natural upper bound.
## If this is really what you want, please specify argument 'ub' of 'set_prior' appropriately.
## Warning occurred for prior 
## b ~ uniform(-1, 1)

## Compiling Stan program...

## Start sampling

summary(mod)

##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: Humboldt ~ BearCreek + MtPeakUtility 
##    Data: data2 (Number of observations: 8760) 
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup samples = 4000
## 
## Population-Level Effects: 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept         0.67      0.06     0.56     0.78 1.00     5029     3320
## BearCreek         0.87      0.00     0.86     0.87 1.00     4917     3111
## MtPeakUtility     0.05      0.01     0.04     0.06 1.00     4747     3030
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     1.68      0.01     1.66     1.71 1.00     3562     2771
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

plot(mod,ask=FALSE)

NON BAYESIAN AND BAYESIAN REGRESSION MEDIUM

#linear mix effects models lm24
mpriors=brms::get_prior(Humboldt ~.,data=med2,
           family = gaussian)
summary(mpriors)

##     prior              class               coef              group          
##  Length:5           Length:5           Length:5           Length:5          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      resp               dpar              nlpar              bound          
##  Length:5           Length:5           Length:5           Length:5          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##     source         
##  Length:5          
##  Class :character  
##  Mode  :character

#Baysian Model
mfit=brms::brm(Humboldt~.,data=med2)

## Compiling Stan program...

## Start sampling

## 
## SAMPLING FOR MODEL '87bfc04baecc260324aa6a8522ed233d' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 1.6e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.16 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.016419 seconds (Warm-up)
## Chain 1:                0.016186 seconds (Sampling)
## Chain 1:                0.032605 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL '87bfc04baecc260324aa6a8522ed233d' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 8e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.01622 seconds (Warm-up)
## Chain 2:                0.017184 seconds (Sampling)
## Chain 2:                0.033404 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL '87bfc04baecc260324aa6a8522ed233d' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 1e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.016752 seconds (Warm-up)
## Chain 3:                0.016311 seconds (Sampling)
## Chain 3:                0.033063 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL '87bfc04baecc260324aa6a8522ed233d' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 5e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.05 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.016172 seconds (Warm-up)
## Chain 4:                0.016703 seconds (Sampling)
## Chain 4:                0.032875 seconds (Total)
## Chain 4:

summary(mfit)

##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: Humboldt ~ BearCreek + MtPeakUtility 
##    Data: med2 (Number of observations: 365) 
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup samples = 4000
## 
## Population-Level Effects: 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -0.03      0.19    -0.40     0.33 1.00     4350     2973
## BearCreek         0.95      0.02     0.92     0.98 1.00     4873     3049
## MtPeakUtility     0.05      0.02     0.02     0.09 1.00     4382     3486
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.85      0.03     0.79     0.91 1.00     4452     3148
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

plot(mfit)

mmod = brms::brm(
Humboldt~., data =med2,
prior = brms::set_prior('normal(0, 1)'), iter = 2000,
chains = 4,
cores = 4
)

## Compiling Stan program...

## Start sampling

summary(mmod)

##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: Humboldt ~ BearCreek + MtPeakUtility 
##    Data: med2 (Number of observations: 365) 
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup samples = 4000
## 
## Population-Level Effects: 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -0.04      0.18    -0.40     0.31 1.00     4166     3223
## BearCreek         0.95      0.02     0.92     0.98 1.00     4259     3168
## MtPeakUtility     0.05      0.02     0.02     0.09 1.00     4042     2894
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.85      0.03     0.79     0.91 1.00     3876     3219
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

plot(mmod,ask=FALSE)

NON BAYESIAN AND BAYESIAN REGRESSION SMALL

#linear mix effects models lm24
spriors=brms::get_prior(Humboldt ~.,data=small2,
           family = gaussian)
summary(spriors)

##     prior              class               coef              group          
##  Length:5           Length:5           Length:5           Length:5          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      resp               dpar              nlpar              bound          
##  Length:5           Length:5           Length:5           Length:5          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##     source         
##  Length:5          
##  Class :character  
##  Mode  :character

#Baysian Model
sfit=brms::brm(Humboldt~BearCreek+MtPeakUtility,data=small2)

## Compiling Stan program...

## Start sampling

## 
## SAMPLING FOR MODEL '25e7b0157afbb9ef97fbd6ba6acfab43' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 1.5e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.15 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.012439 seconds (Warm-up)
## Chain 1:                0.012849 seconds (Sampling)
## Chain 1:                0.025288 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL '25e7b0157afbb9ef97fbd6ba6acfab43' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 6e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.06 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.012561 seconds (Warm-up)
## Chain 2:                0.013008 seconds (Sampling)
## Chain 2:                0.025569 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL '25e7b0157afbb9ef97fbd6ba6acfab43' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 4e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.012462 seconds (Warm-up)
## Chain 3:                0.012434 seconds (Sampling)
## Chain 3:                0.024896 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL '25e7b0157afbb9ef97fbd6ba6acfab43' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 7e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.07 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.012891 seconds (Warm-up)
## Chain 4:                0.012969 seconds (Sampling)
## Chain 4:                0.02586 seconds (Total)
## Chain 4:

summary(sfit)

##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: Humboldt ~ BearCreek + MtPeakUtility 
##    Data: small2 (Number of observations: 53) 
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup samples = 4000
## 
## Population-Level Effects: 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -0.10      0.62    -1.31     1.09 1.00     4368     2888
## BearCreek         0.95      0.06     0.83     1.06 1.00     3849     3114
## MtPeakUtility     0.07      0.07    -0.07     0.23 1.00     3862     3106
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.70      0.07     0.58     0.85 1.00     3542     2769
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

plot(sfit)

smod = brms::brm(
Humboldt~., data = small2,
prior = brms::set_prior('normal(0,1)'), iter = 2000,
chains = 4,
cores = 4
)

## Compiling Stan program...

## Start sampling

summary(smod)

##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: Humboldt ~ BearCreek + MtPeakUtility 
##    Data: small2 (Number of observations: 53) 
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup samples = 4000
## 
## Population-Level Effects: 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        -0.10      0.62    -1.30     1.14 1.00     4769     2778
## BearCreek         0.94      0.06     0.82     1.06 1.00     3728     2666
## MtPeakUtility     0.08      0.08    -0.08     0.22 1.00     3647     3038
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.70      0.07     0.58     0.85 1.00     3492     2907
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

plot(smod,ask=FALSE)

Model Diagnostics Large

pp = brms::pp_check(mod)

## Using 10 posterior samples for ppc type 'dens_overlay' by default.

pp+theme_bw()

#Default Model Predictions
plot(brms::conditional_effects(mod),points=T)

Model Diagnostics Medium

pp = brms::pp_check(mmod)

## Using 10 posterior samples for ppc type 'dens_overlay' by default.

pp+theme_bw()

#Default Model Predictions
plot(brms::conditional_effects(mmod),col='blue',points=T)

Model Diagnostics Small

pp = brms::pp_check(smod)

## Using 10 posterior samples for ppc type 'dens_overlay' by default.

pp+theme_bw()

#Default Model Predictions
plot(brms::conditional_effects(smod),points=T)

Extract MCMC for regression coefficients

posterior_coeff=brms::posterior_samples(mod)
#head(posterior_coeff)
#plot(posterior_coeff)
nrow(posterior_coeff)

## [1] 4000

install.packages("ggmcmc")

## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)

## 
## The downloaded binary packages are in
##  /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages

library(ggmcmc)

## Loading required package: tidyr

## 
## Attaching package: 'tidyr'

## The following object is masked from 'package:sjmisc':
## 
##     replace_na

## The following objects are masked from 'package:Matrix':
## 
##     expand, pack, unpack

library(ggthemes)
library(ggridges)
model1tranformed <- ggs(mod) # the ggs function transforms the brms output into a longformat tibble, that we can use to make different types of plots.

summary(mod)

##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: Humboldt ~ BearCreek + MtPeakUtility 
##    Data: data2 (Number of observations: 8760) 
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup samples = 4000
## 
## Population-Level Effects: 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept         0.67      0.06     0.56     0.78 1.00     5029     3320
## BearCreek         0.87      0.00     0.86     0.87 1.00     4917     3111
## MtPeakUtility     0.05      0.01     0.04     0.06 1.00     4747     3030
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     1.68      0.01     1.66     1.71 1.00     3562     2771
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

summary(mod)$fixed[1,1:4]

##   Estimate  Est.Error   l-95% CI   u-95% CI 
## 0.67017785 0.05674332 0.56118136 0.78465390

ggplot(filter(model1tranformed, Parameter %in% c("b_Intercept", "b_BearCreek", "b_Mountaineer","b_MtPeakUtility")),
       aes(x   = Iteration,
           y   = value, 
           col = as.factor(Chain)))+
  geom_line() +
  geom_vline(xintercept = 1000)+
  facet_grid(Parameter ~ . ,
             scale  = 'free_y',
             switch = 'y')+
  labs(title = "Caterpillar Plots", 
       col   = "Chains")

#model2 <- brms::brm(Humboldt~., data =small2, 
              #warmup = 1000, iter = 3000, 
              #cores = 2, chains = 2, 
              #seed = 123)
#summary(model2)$fixed

#summary(model2)$fixed

mod3 <- brms::brm(
Humboldt~., data = data1,
prior = brms::set_prior('normal(0, 1)'), iter = 2000,
chains = 4,
cores = 4
)

## Compiling Stan program...

## Start sampling

summary(mod3)

##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: Humboldt ~ NedPower + CriterionWP + RothRock + FreyFarm + BearCreek + Mountaineer + LocustRidge + MtPeakUtility 
##    Data: data1 (Number of observations: 8760) 
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup samples = 4000
## 
## Population-Level Effects: 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept         0.05      0.05    -0.05     0.15 1.00     4812     3693
## NedPower          0.04      0.01     0.02     0.06 1.00     2504     2313
## CriterionWP       0.07      0.01     0.05     0.09 1.00     3320     2746
## RothRock          0.03      0.01     0.01     0.06 1.00     2850     2318
## FreyFarm          0.20      0.01     0.18     0.21 1.00     4029     2921
## BearCreek         0.59      0.01     0.58     0.61 1.00     3212     2761
## Mountaineer       0.01      0.00    -0.00     0.02 1.00     3820     3208
## LocustRidge       0.08      0.01     0.06     0.09 1.00     3334     2476
## MtPeakUtility     0.04      0.00     0.03     0.04 1.00     5324     3167
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     1.44      0.01     1.42     1.46 1.00     4334     3058
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

plot(mod3,ask=FALSE)

pp = brms::pp_check(mod3)

## Using 10 posterior samples for ppc type 'dens_overlay' by default.

pp+theme_bw()

#Default Model Predictions
brms::conditional_effects(mod3)

#install.packages("bayesplot")
#install.packages("rstanarm")
library(rstanarm)

## Loading required package: Rcpp

## This is rstanarm version 2.21.1

## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!

## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.

## - For execution on a local, multicore CPU with excess RAM we recommend calling

##   options(mc.cores = parallel::detectCores())

## 
## Attaching package: 'rstanarm'

## The following objects are masked from 'package:arm':
## 
##     invlogit, logit

## The following object is masked from 'package:car':
## 
##     logit

## The following objects are masked from 'package:caret':
## 
##     compare_models, R2

library(bayesplot)

## This is bayesplot version 1.7.2

## - Online documentation and vignettes at mc-stan.org/bayesplot

## - bayesplot theme set to bayesplot::theme_default()

##    * Does _not_ affect other ggplot2 plots

##    * See ?bayesplot_theme_set for details on theme setting

fit1 <- stan_glm(Humboldt ~ ., data = data2)

## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 5.8e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.58 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.034167 seconds (Warm-up)
## Chain 1:                0.469982 seconds (Sampling)
## Chain 1:                0.504149 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 8e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.037167 seconds (Warm-up)
## Chain 2:                0.468174 seconds (Sampling)
## Chain 2:                0.505341 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 8e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.041709 seconds (Warm-up)
## Chain 3:                0.481211 seconds (Sampling)
## Chain 3:                0.52292 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 8e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.037844 seconds (Warm-up)
## Chain 4:                0.470887 seconds (Sampling)
## Chain 4:                0.508731 seconds (Total)
## Chain 4:

posterior <- as.matrix(fit1)

plot_title <- ggtitle("Posterior distributions",
                      "with medians and 80% intervals")
mcmc_areas(posterior,
           pars = c("BearCreek", "MtPeakUtility"),
           prob = 0.8) + plot_title

color_scheme_set("red")
ppc_dens_overlay(y = fit1$y,
                 yrep = posterior_predict(fit, draws = 50))

# Import library
library(BAS)

# Use `bas.lm` to run regression model
cog.bas = bas.lm(Humboldt ~ ., data = data2, prior = "BIC", 
                 #modelprior = Bernoulli(1), 
                 include.always = ~ ., 
                 n.models = 1)
cog.coef = coef(cog.bas)
cog.coef

## 
##  Marginal Posterior Summaries of Coefficients: 
## 
##  Using  BMA 
## 
##  Based on the top  1 models 
##                post mean  post SD   post p(B != 0)
## Intercept      7.655643   0.017961  1.000000      
## BearCreek      0.865158   0.004938  1.000000      
## MtPeakUtility  0.045658   0.005166  1.000000

par(mfrow = c(2, 2), col.lab = "darkgrey", col.axis = "darkgrey", col = "darkgrey")
plot(cog.coef, subset = 2:3, ask = F)
confint(cog.coef, parm = 2:3)

##                     2.5%     97.5%       beta
## BearCreek     0.85547819 0.8748386 0.86515837
## MtPeakUtility 0.03553192 0.0557844 0.04565816
## attr(,"Probability")
## [1] 0.95
## attr(,"class")
## [1] "confint.bas"

out = confint(cog.coef)[, 1:2]  

# Extract the upper and lower bounds of the credible intervals
names = c("posterior mean", "posterior std", colnames(out))
out = cbind(cog.coef$postmean, cog.coef$postsd, out)
colnames(out) = names

round(out, 2)

##               posterior mean posterior std 2.5% 97.5%
## Intercept               7.66          0.02 7.62  7.69
## BearCreek               0.87          0.00 0.86  0.87
## MtPeakUtility           0.05          0.01 0.04  0.06

#https://cran.r-project.org/web/packages/bayesm/vignettes/bayesm_Overview_Vignette.html
par(mfrow = c(1,2))

curve(dnorm(x,0,10), xlab = "", ylab = "", xlim = c(-30,30),
      main = expression(paste("Prior for ", beta[j])),
      col = "dodgerblue4")

nu  <- 3
ssq <- var(log(data2$Humboldt))
curve(nu*ssq/dchisq(x,nu), xlab = "", ylab = "", xlim = c(0,1),
      main = expression(paste("Prior for ", sigma^2)), 
      col = "darkred")

par(mfrow = c(1,1))

###B <- 1000+1 #burn in draws to discard 
##R <- 10000

##par(mfrow = c(1,2))
##hist(out$betadraw[B:R,2], breaks = 30, 
    #main = "Posterior Dist. of BearCreek Coef.", 
     #yaxt = "n", yaxs="i",#
     #xlab = "", ylab = "", 
     #col = "dodgerblue4", border = "gray")
#hist(out$sigmasqdraw[B:R], breaks = 30, 
    # main = "Posterior Dist. of Sigma2", 
    # yaxt = "n", yaxs="i",#
    # xlab = "", ylab = "", #
    # col = "darkred", border = "gray")
#par(mfrow = c(1,1))
#apply(out$betadraw[B:R,2:3], 2, mean)
#summary(out$betadraw)
#plot.bayesm.mat(out$betadraw[,2],names ='Bear Creek')''

library(dgof)
print(ks.test(data1$Humboldt,data1$BearCreek))

## Warning in ks.test(data1$Humboldt, data1$BearCreek): cannot compute correct p-
## values with ties

## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  data1$Humboldt and data1$BearCreek
## D = 0.014155, p-value = 0.344
## alternative hypothesis: two-sided

print(ks.test(data1$Humboldt, data1$MtPeakUtility))

## Warning in ks.test(data1$Humboldt, data1$MtPeakUtility): cannot compute correct
## p-values with ties

## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  data1$Humboldt and data1$MtPeakUtility
## D = 0.069977, p-value < 2.2e-16
## alternative hypothesis: two-sided

WindSpeed Data Analysis

vince

11/13/2020