r = getOption("repos")
r["CRAN"] = "http://cran.us.r-project.org"
options(repos = r)
install.packages('dgof')
## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)
##
## The downloaded binary packages are in
## /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages
library(dgof)
##
## Attaching package: 'dgof'
## The following object is masked from 'package:stats':
##
## ks.test
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
library('dplyr')
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library('mlbench')
library('caret')
## Loading required package: lattice
## Loading required package: ggplot2
library('corrplot')
## corrplot 0.84 loaded
library('ggplot2')
library('GGally')
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library('BAS')
library('MASS')
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library('car')
## Loading required package: carData
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library('readr')
install.packages('arm')
## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)
##
## The downloaded binary packages are in
## /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages
library('arm')
## Loading required package: Matrix
## Loading required package: lme4
##
## arm (Version 1.11-2, built: 2020-7-27)
## Working directory is /Users/vincenttanoe/Desktop/statistics_Engineering
##
## Attaching package: 'arm'
## The following object is masked from 'package:car':
##
## logit
## The following object is masked from 'package:BAS':
##
## bayesglm.fit
## The following object is masked from 'package:corrplot':
##
## corrplot
install.packages('MCMCpack')
## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)
##
## The downloaded binary packages are in
## /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages
library('MCMCpack')
## Loading required package: coda
##
## Attaching package: 'coda'
## The following object is masked from 'package:arm':
##
## traceplot
## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)
## ## Copyright (C) 2003-2021 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
## ##
## ## Support provided by the U.S. National Science Foundation
## ## (Grants SES-0350646 and SES-0350613)
## ##
#ws=read_csv('ws.csv')
#attach(ws)
wind=read_csv('winds.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## `Bear Creek` = col_double(),
## `Frey Farm` = col_double(),
## `Criterion Wind Park` = col_double(),
## NedPower = col_double(),
## Humboldt = col_double(),
## LocustRidge = col_double(),
## `Roth Rock` = col_double(),
## Talbot = col_double(),
## Mountaineer = col_double(),
## BuffaloMountain = col_double(),
## BitWorks = col_double(),
## MtPeakUtility = col_double(),
## Anacacho = col_double(),
## DryLake = col_double(),
## Kingman = col_double()
## )
med=read_csv('medium.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## BearCreek = col_double(),
## FreyFarm = col_double(),
## CriterionWP = col_double(),
## NedPower = col_double(),
## Humboldt = col_double(),
## LocustRidge = col_double(),
## RothRock = col_double(),
## Talbot = col_double(),
## Mountaineer = col_double(),
## BuffaloMountain = col_double(),
## BitWorks = col_double(),
## MtPeakUtility = col_double(),
## Anacacho = col_double(),
## DryLake = col_double(),
## Kingman = col_double()
## )
small=read_csv('small.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## BearCreek = col_double(),
## FreyFarm = col_double(),
## CreterionWP = col_double(),
## NedPower = col_double(),
## Humboldt = col_double(),
## LocustRidge = col_double(),
## RothRock = col_double(),
## Talbot = col_double(),
## Mountaineer = col_double(),
## BuffaloMountain = col_double(),
## BitWorks = col_double(),
## MtPeakUtility = col_double(),
## Anacacho = col_double(),
## DryLake = col_double(),
## Kingman = col_double()
## )
attach(wind)
names(wind)
## [1] "Bear Creek" "Frey Farm" "Criterion Wind Park"
## [4] "NedPower" "Humboldt" "LocustRidge"
## [7] "Roth Rock" "Talbot" "Mountaineer"
## [10] "BuffaloMountain" "BitWorks" "MtPeakUtility"
## [13] "Anacacho" "DryLake" "Kingman"
attach(med)
## The following objects are masked from wind:
##
## Anacacho, BitWorks, BuffaloMountain, DryLake, Humboldt, Kingman,
## LocustRidge, Mountaineer, MtPeakUtility, NedPower, Talbot
attach(small)
## The following objects are masked from med:
##
## Anacacho, BearCreek, BitWorks, BuffaloMountain, DryLake, FreyFarm,
## Humboldt, Kingman, LocustRidge, Mountaineer, MtPeakUtility,
## NedPower, RothRock, Talbot
## The following objects are masked from wind:
##
## Anacacho, BitWorks, BuffaloMountain, DryLake, Humboldt, Kingman,
## LocustRidge, Mountaineer, MtPeakUtility, NedPower, Talbot
names(med)
## [1] "BearCreek" "FreyFarm" "CriterionWP" "NedPower"
## [5] "Humboldt" "LocustRidge" "RothRock" "Talbot"
## [9] "Mountaineer" "BuffaloMountain" "BitWorks" "MtPeakUtility"
## [13] "Anacacho" "DryLake" "Kingman"
names(small)
## [1] "BearCreek" "FreyFarm" "CreterionWP" "NedPower"
## [5] "Humboldt" "LocustRidge" "RothRock" "Talbot"
## [9] "Mountaineer" "BuffaloMountain" "BitWorks" "MtPeakUtility"
## [13] "Anacacho" "DryLake" "Kingman"
str(med)
## tibble [365 × 15] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ BearCreek : num [1:365] 9.1 11.28 11.73 7.87 7.64 ...
## $ FreyFarm : num [1:365] 7.94 10.36 10.26 5.26 5.86 ...
## $ CriterionWP : num [1:365] 15.5 14.4 14.1 12.7 12.7 ...
## $ NedPower : num [1:365] 12.9 13.5 13.1 11.8 12.8 ...
## $ Humboldt : num [1:365] 9.8 12.13 11.18 7.77 8.63 ...
## $ LocustRidge : num [1:365] 5.41 10.08 9.74 3.89 5.1 ...
## $ RothRock : num [1:365] 12.8 12.1 11 10.2 10.8 ...
## $ Talbot : num [1:365] 7.68 11.22 10.48 6.16 6.64 ...
## $ Mountaineer : num [1:365] 12.2 14.6 18 14.8 17.1 ...
## $ BuffaloMountain: num [1:365] 14.59 13.2 9.72 9.72 8.35 ...
## $ BitWorks : num [1:365] 9.95 8.94 8.02 6.23 6.74 9.71 6.98 5.36 5.07 4.25 ...
## $ MtPeakUtility : num [1:365] 10.6 6.85 8.24 7.89 3.96 11 7.34 4.89 9.24 8.39 ...
## $ Anacacho : num [1:365] 9.2 4.97 6.3 5.7 4.33 ...
## $ DryLake : num [1:365] 2.76 3.12 2.51 4.14 1.34 1.98 9.2 9.68 5.24 2.12 ...
## $ Kingman : num [1:365] 11.73 9.26 3.97 10.44 2.76 ...
## - attr(*, "spec")=
## .. cols(
## .. BearCreek = col_double(),
## .. FreyFarm = col_double(),
## .. CriterionWP = col_double(),
## .. NedPower = col_double(),
## .. Humboldt = col_double(),
## .. LocustRidge = col_double(),
## .. RothRock = col_double(),
## .. Talbot = col_double(),
## .. Mountaineer = col_double(),
## .. BuffaloMountain = col_double(),
## .. BitWorks = col_double(),
## .. MtPeakUtility = col_double(),
## .. Anacacho = col_double(),
## .. DryLake = col_double(),
## .. Kingman = col_double()
## .. )
Features selections
set.seed(7)
correlationMatrix <- cor(wind)
highlyCorrelated <- findCorrelation(correlationMatrix,cutoff = 0.5)
print(highlyCorrelated)
## [1] 5 4 3 7 2 1 9 6 12
Correlation plot
This is a scatter plot
data1=dplyr::select(wind,c(5,4,3,7,2,1,9,6,12))
med1=dplyr::select(med,c(5,4,3,7,2,1,9,6,12))
small1=dplyr::select(small,c(5,4,3,7,2,1,9,6,12))
summary(med1)
## Humboldt NedPower CriterionWP RothRock
## Min. : 1.820 Min. : 1.960 Min. : 2.26 Min. : 1.600
## 1st Qu.: 5.630 1st Qu.: 5.130 1st Qu.: 5.40 1st Qu.: 4.720
## Median : 7.300 Median : 7.100 Median : 7.46 Median : 6.490
## Mean : 7.656 Mean : 7.664 Mean : 8.23 Mean : 7.046
## 3rd Qu.: 9.300 3rd Qu.: 9.790 3rd Qu.:10.72 3rd Qu.: 8.940
## Max. :17.500 Max. :17.900 Max. :21.31 Max. :16.490
## FreyFarm BearCreek Mountaineer LocustRidge
## Min. : 1.000 Min. : 2.140 Min. : 2.170 Min. : 1.190
## 1st Qu.: 3.850 1st Qu.: 5.570 1st Qu.: 5.270 1st Qu.: 3.720
## Median : 5.320 Median : 7.370 Median : 8.100 Median : 5.000
## Mean : 5.732 Mean : 7.665 Mean : 8.755 Mean : 5.408
## 3rd Qu.: 7.170 3rd Qu.: 9.210 3rd Qu.:11.120 3rd Qu.: 6.730
## Max. :14.340 Max. :18.770 Max. :25.760 Max. :18.870
## MtPeakUtility
## Min. : 2.080
## 1st Qu.: 5.860
## Median : 7.590
## Mean : 7.732
## 3rd Qu.: 9.630
## Max. :15.180
ggpairs(data1)
head(data1)
Random Forest for data selection
#prepare training scheme
#control <- trainControl(method="repeatedcv", number=10, repeats=3)
#train the model
#data(data1)
#model <- train(Humboldt~.,data=data1, preProcess="scale", trControl=control)
#estimate variable importance
#importance <- varImp(model,scale=FALSE)
#summary importance
#print(importance)
#plot(importance)
#install.packages("randomForest")
# Load the library
#library(randomForest)
#reg.rf <- randomForest(Humboldt ~ ., data = data1, mtry = 3,
#importance = TRUE, na.action = na.omit)
# Print regression model
#print(reg.rf)
#Output to be present as PNG file
#png(file = "randomForestRegression.png")
# Plot the error vs the number of trees graph
#plot(reg.rf)
#plot(reg.rf)
Run the OLS regression
data1=dplyr::rename(data1,CriterionWP=`Criterion Wind Park`,RothRock=`Roth Rock`,FreyFarm=`Frey Farm`,
BearCreek=`Bear Creek`)
data3=dplyr::select(data1,c(1,5,6,7,8,9))
med3=dplyr::select(med1,c(1,5,6,7,8,9))
small3=dplyr::select(small1,c(1,5,6,7,8,9))
WIND SPEED DATA
ols=lm(Humboldt~.,data=data1)
summary(ols)
##
## Call:
## lm(formula = Humboldt ~ ., data = data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5601 -0.8547 -0.0311 0.8230 11.8557
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.053853 0.050258 1.072 0.283963
## NedPower 0.040274 0.011893 3.386 0.000712 ***
## CriterionWP 0.070946 0.009027 7.859 4.33e-15 ***
## RothRock 0.034915 0.012041 2.900 0.003745 **
## FreyFarm 0.199217 0.007607 26.187 < 2e-16 ***
## BearCreek 0.594771 0.007316 81.301 < 2e-16 ***
## Mountaineer 0.009259 0.004750 1.949 0.051309 .
## LocustRidge 0.075561 0.007489 10.090 < 2e-16 ***
## MtPeakUtility 0.035300 0.004518 7.813 6.22e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.443 on 8751 degrees of freedom
## Multiple R-squared: 0.838, Adjusted R-squared: 0.8379
## F-statistic: 5660 on 8 and 8751 DF, p-value: < 2.2e-16
vif(ols)
## NedPower CriterionWP RothRock FreyFarm BearCreek
## 9.235150 6.703096 8.845728 2.691293 2.988862
## Mountaineer LocustRidge MtPeakUtility
## 2.787867 2.662189 1.041701
ols=lm(Humboldt~NedPower+CriterionWP+RothRock+Mountaineer+FreyFarm+LocustRidge+BearCreek+MtPeakUtility,data=data1)
model1=vif(ols)
ols3=lm(Humboldt~FreyFarm+LocustRidge+BearCreek+MtPeakUtility,data=data1)
model2=vif(ols3)
ols4=lm(Humboldt~BearCreek+MtPeakUtility,data=data1)
model3=vif(ols4)
tab_model(ols,ols3,ols4)
| Â | Humboldt | Humboldt | Humboldt | ||||||
|---|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | 0.05 | -0.04 – 0.15 | 0.284 | 0.44 | 0.34 – 0.54 | <0.001 | 0.67 | 0.56 – 0.78 | <0.001 |
| NedPower | 0.04 | 0.02 – 0.06 | 0.001 | ||||||
| CriterionWP | 0.07 | 0.05 – 0.09 | <0.001 | ||||||
| RothRock | 0.03 | 0.01 – 0.06 | 0.004 | ||||||
| Mountaineer | 0.01 | -0.00 – 0.02 | 0.051 | ||||||
| FreyFarm | 0.20 | 0.18 – 0.21 | <0.001 | 0.29 | 0.27 – 0.30 | <0.001 | |||
| LocustRidge | 0.08 | 0.06 – 0.09 | <0.001 | 0.06 | 0.04 – 0.07 | <0.001 | |||
| BearCreek | 0.59 | 0.58 – 0.61 | <0.001 | 0.64 | 0.62 – 0.65 | <0.001 | 0.87 | 0.86 – 0.87 | <0.001 |
| MtPeakUtility | 0.04 | 0.03 – 0.04 | <0.001 | 0.05 | 0.04 – 0.06 | <0.001 | 0.05 | 0.04 – 0.06 | <0.001 |
| Observations | 8760 | 8760 | 8760 | ||||||
| R2 / R2 adjusted | 0.838 / 0.838 | 0.819 / 0.819 | 0.780 / 0.780 | ||||||
c(model1,model2,model3)
## NedPower CriterionWP RothRock Mountaineer FreyFarm
## 9.235150 6.703096 8.845728 2.787867 2.691293
## LocustRidge BearCreek MtPeakUtility FreyFarm LocustRidge
## 2.662189 2.988862 1.041701 2.316618 2.586382
## BearCreek MtPeakUtility BearCreek MtPeakUtility
## 2.881032 1.027176 1.003187 1.003187
SMALL DATA
sols=lm(Humboldt~.,data=small1)
summary(sols)
##
## Call:
## lm(formula = Humboldt ~ ., data = small1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.97372 -0.14432 0.00986 0.15047 1.22829
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.12313 0.35776 0.344 0.73235
## NedPower 0.32475 0.20596 1.577 0.12201
## CreterionWP -0.07638 0.15823 -0.483 0.63170
## RothRock 0.25802 0.24004 1.075 0.28828
## FreyFarm 0.28128 0.13365 2.105 0.04108 *
## BearCreek 0.56046 0.09204 6.089 2.5e-07 ***
## Mountaineer -0.18399 0.05974 -3.080 0.00356 **
## LocustRidge -0.02244 0.13767 -0.163 0.87126
## MtPeakUtility -0.03898 0.04491 -0.868 0.39004
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.399 on 44 degrees of freedom
## Multiple R-squared: 0.9604, Adjusted R-squared: 0.9532
## F-statistic: 133.2 on 8 and 44 DF, p-value: < 2.2e-16
vif_medium=vif(sols)
MEDIUM DATA
mols=lm(Humboldt~.,data=med1)
summary(mols)
##
## Call:
## lm(formula = Humboldt ~ ., data = med1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2551 -0.3749 -0.0234 0.3360 3.3637
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.13538 0.13909 -0.973 0.3311
## NedPower -0.04309 0.05903 -0.730 0.4659
## CriterionWP 0.03442 0.04117 0.836 0.4036
## RothRock 0.14551 0.06032 2.412 0.0164 *
## FreyFarm 0.28445 0.03601 7.900 3.51e-14 ***
## BearCreek 0.68473 0.02916 23.479 < 2e-16 ***
## Mountaineer -0.01227 0.01825 -0.672 0.5018
## LocustRidge -0.03041 0.03430 -0.887 0.3758
## MtPeakUtility 0.02655 0.01367 1.943 0.0528 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6359 on 356 degrees of freedom
## Multiple R-squared: 0.9509, Adjusted R-squared: 0.9498
## F-statistic: 861.8 on 8 and 356 DF, p-value: < 2.2e-16
vif(mols)
## NedPower CriterionWP RothRock FreyFarm BearCreek
## 34.304333 21.533175 31.992690 6.938840 6.205030
## Mountaineer LocustRidge MtPeakUtility
## 6.241221 6.705090 1.087326
Run the model without the highest Variance(NedPower,Criterion Wind Park, Roth Roth) LARGE
ols1=lm(Humboldt~.,data=data3)
summary(ols1)
##
## Call:
## lm(formula = Humboldt ~ ., data = data3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.1492 -0.8924 -0.0461 0.8364 11.7037
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.308617 0.051008 6.050 1.50e-09 ***
## FreyFarm 0.241145 0.007685 31.380 < 2e-16 ***
## BearCreek 0.613482 0.007533 81.444 < 2e-16 ***
## Mountaineer 0.071843 0.003762 19.095 < 2e-16 ***
## LocustRidge 0.048212 0.007668 6.288 3.38e-10 ***
## MtPeakUtility 0.048244 0.004651 10.372 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.495 on 8754 degrees of freedom
## Multiple R-squared: 0.826, Adjusted R-squared: 0.8259
## F-statistic: 8312 on 5 and 8754 DF, p-value: < 2.2e-16
vif(ols1)
## FreyFarm BearCreek Mountaineer LocustRidge MtPeakUtility
## 2.557069 2.950426 1.628348 2.598426 1.028080
Run the model without the highest Variance(NedPower,Criterion Wind Park, Roth Roth) MEDIUM
mols1=lm(Humboldt~.,data=med3)
summary(mols1)
##
## Call:
## lm(formula = Humboldt ~ ., data = med3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0242 -0.4047 -0.0294 0.3583 3.6234
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.04868 0.14946 -0.326 0.744848
## FreyFarm 0.37677 0.03637 10.359 < 2e-16 ***
## BearCreek 0.72013 0.03086 23.336 < 2e-16 ***
## Mountaineer 0.03962 0.01275 3.108 0.002033 **
## LocustRidge -0.12235 0.03395 -3.604 0.000358 ***
## MtPeakUtility 0.04389 0.01459 3.009 0.002808 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6895 on 359 degrees of freedom
## Multiple R-squared: 0.9418, Adjusted R-squared: 0.941
## F-statistic: 1162 on 5 and 359 DF, p-value: < 2.2e-16
vif(mols1)
## FreyFarm BearCreek Mountaineer LocustRidge MtPeakUtility
## 6.022470 5.910097 2.589966 5.589773 1.054012
Run the model without the highest Variance(NedPower,Criterion Wind Park, Roth Roth) SMALL
sols1=lm(Humboldt~.,data=small3)
summary(sols1)
##
## Call:
## lm(formula = Humboldt ~ ., data = small3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6543 -0.2844 -0.1062 0.1833 3.3381
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.14680 0.53405 0.275 0.78462
## FreyFarm 0.51857 0.17632 2.941 0.00506 **
## BearCreek 0.68555 0.13434 5.103 5.93e-06 ***
## Mountaineer 0.12310 0.06267 1.964 0.05542 .
## LocustRidge -0.37257 0.17288 -2.155 0.03631 *
## MtPeakUtility 0.03543 0.06667 0.531 0.59759
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.609 on 47 degrees of freedom
## Multiple R-squared: 0.9013, Adjusted R-squared: 0.8908
## F-statistic: 85.88 on 5 and 47 DF, p-value: < 2.2e-16
vif(sols1)
## FreyFarm BearCreek Mountaineer LocustRidge MtPeakUtility
## 9.070087 7.991427 4.220850 8.827098 1.196373
ONLY 4 independent variables Large
data2=dplyr::select(data1,c(1,6,9))
#data2=dplyr::rename(data2,BearCreek=`Bear Creek`)
ols2=lm(Humboldt~.,data=data2)
summary(ols2)
##
## Call:
## lm(formula = Humboldt ~ ., data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.1490 -1.0214 -0.1376 0.9528 13.1351
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.671202 0.056393 11.902 <2e-16 ***
## BearCreek 0.865158 0.004938 175.194 <2e-16 ***
## MtPeakUtility 0.045658 0.005166 8.838 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.681 on 8757 degrees of freedom
## Multiple R-squared: 0.78, Adjusted R-squared: 0.7799
## F-statistic: 1.552e+04 on 2 and 8757 DF, p-value: < 2.2e-16
#ols_plot_obs_fit(ols2)
vif(ols2)
## BearCreek MtPeakUtility
## 1.003187 1.003187
ONLY 4 independent variables Medium
med2=dplyr::select(med1,c(1,6,9))
mols2=lm(Humboldt~.,data=med2)
summary(mols2)
##
## Call:
## lm(formula = Humboldt ~ ., data = med2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6026 -0.5450 -0.0297 0.5244 3.8371
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.03445 0.18288 -0.188 0.85068
## BearCreek 0.94960 0.01555 61.062 < 2e-16 ***
## MtPeakUtility 0.05315 0.01741 3.053 0.00243 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8444 on 362 degrees of freedom
## Multiple R-squared: 0.912, Adjusted R-squared: 0.9115
## F-statistic: 1875 on 2 and 362 DF, p-value: < 2.2e-16
vif(mols2)
## BearCreek MtPeakUtility
## 1.000682 1.000682
ONLY 4 independent variables Small
install.packages("olsrr")
## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)
##
## The downloaded binary packages are in
## /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages
library(olsrr)
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:MASS':
##
## cement
## The following object is masked from 'package:datasets':
##
## rivers
small2=dplyr::select(small1,c(1,6,9))
sols2=lm(Humboldt~.,data=small2)
summary(sols2)
##
## Call:
## lm(formula = Humboldt ~ ., data = small2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1838 -0.3123 -0.0293 0.2213 3.7773
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.11732 0.58968 -0.199 0.843
## BearCreek 0.94540 0.05634 16.779 <2e-16 ***
## MtPeakUtility 0.07615 0.07227 1.054 0.297
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6814 on 50 degrees of freedom
## Multiple R-squared: 0.8686, Adjusted R-squared: 0.8633
## F-statistic: 165.3 on 2 and 50 DF, p-value: < 2.2e-16
ols_plot_obs_fit(sols2)
#vif(sols2)
Evaluation of the correlation
#install.packages('corrplot')
#ggpairs(data2)
install.packages("sjPlot")
## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)
##
## The downloaded binary packages are in
## /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages
install.packages("sjmisc")
## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)
##
## The downloaded binary packages are in
## /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages
install.packages("sjlabelled")
## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)
##
## The downloaded binary packages are in
## /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages
library(sjPlot)
library(sjmisc)
## Learn more about sjmisc with 'browseVignettes("sjmisc")'.
library(sjlabelled)
##
## Attaching package: 'sjlabelled'
## The following objects are masked from 'package:sjmisc':
##
## to_character, to_factor, to_label, to_numeric
## The following object is masked from 'package:dplyr':
##
## as_label
tab_model(ols2,mols2,sols2)
| Â | Humboldt | Humboldt | Humboldt | ||||||
|---|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | 0.67 | 0.56 – 0.78 | <0.001 | -0.03 | -0.39 – 0.33 | 0.851 | -0.12 | -1.30 – 1.07 | 0.843 |
| BearCreek | 0.87 | 0.86 – 0.87 | <0.001 | 0.95 | 0.92 – 0.98 | <0.001 | 0.95 | 0.83 – 1.06 | <0.001 |
| MtPeakUtility | 0.05 | 0.04 – 0.06 | <0.001 | 0.05 | 0.02 – 0.09 | 0.002 | 0.08 | -0.07 – 0.22 | 0.297 |
| Observations | 8760 | 365 | 53 | ||||||
| R2 / R2 adjusted | 0.780 / 0.780 | 0.912 / 0.911 | 0.869 / 0.863 | ||||||
tab_model(ols2,mols2,sols2)
| Â | Humboldt | Humboldt | Humboldt | ||||||
|---|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | 0.67 | 0.56 – 0.78 | <0.001 | -0.03 | -0.39 – 0.33 | 0.851 | -0.12 | -1.30 – 1.07 | 0.843 |
| BearCreek | 0.87 | 0.86 – 0.87 | <0.001 | 0.95 | 0.92 – 0.98 | <0.001 | 0.95 | 0.83 – 1.06 | <0.001 |
| MtPeakUtility | 0.05 | 0.04 – 0.06 | <0.001 | 0.05 | 0.02 – 0.09 | 0.002 | 0.08 | -0.07 – 0.22 | 0.297 |
| Observations | 8760 | 365 | 53 | ||||||
| R2 / R2 adjusted | 0.780 / 0.780 | 0.912 / 0.911 | 0.869 / 0.863 | ||||||
Bayesian Model #https://bayesball.github.io/BOOK/simple-linear-regression.html#a-simple-linear-regression-model#
bayGLM=arm::bayesglm(Humboldt~.,data=data2,
prior.mean=0,prior.scale=Inf, prior.df=Inf)
summary(bayGLM)
##
## Call:
## arm::bayesglm(formula = Humboldt ~ ., data = data2, prior.mean = 0,
## prior.scale = Inf, prior.df = Inf)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.1490 -1.0214 -0.1376 0.9528 13.1351
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.671201 0.056393 11.902 <2e-16 ***
## BearCreek 0.865158 0.004938 175.194 <2e-16 ***
## MtPeakUtility 0.045658 0.005166 8.838 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 2.826083)
##
## Null deviance: 112482 on 8759 degrees of freedom
## Residual deviance: 24748 on 8757 degrees of freedom
## AIC: 33965
##
## Number of Fisher Scoring iterations: 4
#ols_plot_obs_fit(bayGLM)
Simulation of the posterior
posterior=coef(sim(bayGLM))
head(posterior,10)
## (Intercept) BearCreek MtPeakUtility
## [1,] 0.6565612 0.8613387 0.05033214
## [2,] 0.5876950 0.8688189 0.04876587
## [3,] 0.7173126 0.8603945 0.04874678
## [4,] 0.7791422 0.8684534 0.03260271
## [5,] 0.6654422 0.8662332 0.04585529
## [6,] 0.6296822 0.8673664 0.04620767
## [7,] 0.5975143 0.8672432 0.04851312
## [8,] 0.5938396 0.8721629 0.05008377
## [9,] 0.5371307 0.8705376 0.05620814
## [10,] 0.6441717 0.8677984 0.04440694
MCMC regression
modelmcm=MCMCregress(Humboldt~.,data=data2,family=gaussian,
burnin=1000,mcmc=1000,thin=1,
verbose=0)
summary(modelmcm)
##
## Iterations = 1001:2000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 1000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## (Intercept) 0.6754 0.055516 0.0017556 0.0015056
## BearCreek 0.8649 0.004737 0.0001498 0.0001498
## MtPeakUtility 0.0455 0.005229 0.0001653 0.0001511
## sigma2 2.8262 0.043548 0.0013771 0.0013771
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## (Intercept) 0.56712 0.63751 0.67503 0.7134 0.78038
## BearCreek 0.85498 0.86171 0.86494 0.8681 0.87383
## MtPeakUtility 0.03538 0.04199 0.04562 0.0490 0.05596
## sigma2 2.73911 2.79707 2.82646 2.8573 2.91005
plot(modelmcm,col='blue',ask=FALSE)
KS TEST GRAPH
#https://www.itl.nist.gov/div898/handbook/eda/section3/eda35g.htm
sample1<-data1$Humboldt
sample2<-data1$BearCreek
group <- c(rep("sample1", length(sample1)), rep("sample2", length(sample2)))
dat <- data.frame(KSD = c(sample1,sample2), group = group)
# create ECDF of data
cdf1 <- ecdf(sample1)
cdf2 <- ecdf(sample2)
# find min and max statistics to draw line between points of greatest distance
minMax <- seq(min(sample1,sample2), max(sample1,sample2), length.out=length(sample1))
x0 <- minMax[which( abs(cdf1(minMax) - cdf2(minMax)) == max(abs(cdf1(minMax) - cdf2(minMax))) )]
y0 <- cdf1(x0)
y1 <- cdf2(x0)
plot(cdf1, verticals=TRUE, do.points=FALSE, col="blue", main ='Humboldt vs BearCreek')
plot(cdf2, verticals=TRUE, do.points=FALSE, col="green", add=TRUE,)
## alternatine, use standard R plot of ecdf
#plot(f.a, col="blue")
#lines(f.b, col="green")
points(c(x0, x0), c(y0, y1), pch=16, col="red")
segments(x0, y0, x0, y1, col="red", lty="dotted")
Grap the MCMC posterior results
par(mfrow=c(2,2))
plot(modelmcm,trace = TRUE, density = TRUE, smooth = TRUE,
auto.layout = FALSE, ask = TRUE, col='blue',pch=10)
LASSO REGRESSION
library(lars)
## Loaded lars 1.2
x=model.matrix(Humboldt~.,data=data1)
x=x[,-1]
lasso=lars(x=x,y=data1$Humboldt,trace=TRUE)
## LASSO sequence
## Computing X'X .....
## LARS Step 1 : Variable 5 added
## LARS Step 2 : Variable 4 added
## LARS Step 3 : Variable 1 added
## LARS Step 4 : Variable 2 added
## LARS Step 5 : Variable 7 added
## LARS Step 6 : Variable 3 added
## LARS Step 7 : Variable 6 added
## LARS Step 8 : Variable 8 added
## Computing residuals, RSS etc .....
lasso
##
## Call:
## lars(x = x, y = data1$Humboldt, trace = TRUE)
## R-squared: 0.838
## Sequence of LASSO moves:
## BearCreek FreyFarm NedPower CriterionWP LocustRidge RothRock Mountaineer
## Var 5 4 1 2 7 3 6
## Step 1 2 3 4 5 6 7
## MtPeakUtility
## Var 8
## Step 8
plot(lasso)
coef(lasso,s=c(.25,.50,0.75,1.0),mode="fraction")
## NedPower CriterionWP RothRock FreyFarm BearCreek Mountaineer
## [1,] 0.00000000 0.00000000 0.00000000 0.00000000 0.2649701 0.000000000
## [2,] 0.00000000 0.00000000 0.00000000 0.07629533 0.4603191 0.000000000
## [3,] 0.04621248 0.02207510 0.00000000 0.17034866 0.5627058 0.000000000
## [4,] 0.04027427 0.07094643 0.03491481 0.19921720 0.5947707 0.009259155
## LocustRidge MtPeakUtility
## [1,] 0.00000000 0.00000000
## [2,] 0.00000000 0.00000000
## [3,] 0.00000000 0.00000000
## [4,] 0.07556118 0.03529959
##cross-validation
lars::cv.lars(x=x,y=data1$Humboldt)
NON BAYESIAN AND BAYESIAN REGRESSION LARGE
#linear mix effects models lm24
priors=brms::get_prior(Humboldt ~.,data=data2,
family = Gamma('identity'))
summary(priors)
## prior class coef group
## Length:5 Length:5 Length:5 Length:5
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## resp dpar nlpar bound
## Length:5 Length:5 Length:5 Length:5
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## source
## Length:5
## Class :character
## Mode :character
#Baysian Model
fit=brms::brm(Humboldt~.,data=data2)
## Compiling Stan program...
## Start sampling
##
## SAMPLING FOR MODEL '6e03d2e53663040a84fd0ec04120312a' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 3.9e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.39 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.150075 seconds (Warm-up)
## Chain 1: 0.135171 seconds (Sampling)
## Chain 1: 0.285246 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL '6e03d2e53663040a84fd0ec04120312a' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 3.9e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.39 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.146732 seconds (Warm-up)
## Chain 2: 0.160795 seconds (Sampling)
## Chain 2: 0.307527 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL '6e03d2e53663040a84fd0ec04120312a' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 5.6e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.56 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.152121 seconds (Warm-up)
## Chain 3: 0.156307 seconds (Sampling)
## Chain 3: 0.308428 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL '6e03d2e53663040a84fd0ec04120312a' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 5.5e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.55 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.147925 seconds (Warm-up)
## Chain 4: 0.123424 seconds (Sampling)
## Chain 4: 0.271349 seconds (Total)
## Chain 4:
summary(fit)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: Humboldt ~ BearCreek + MtPeakUtility
## Data: data2 (Number of observations: 8760)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.67 0.06 0.56 0.79 1.00 4810 3152
## BearCreek 0.87 0.00 0.86 0.87 1.00 5116 3022
## MtPeakUtility 0.05 0.01 0.04 0.06 1.00 4945 3181
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1.68 0.01 1.66 1.71 1.00 4205 3193
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit)
mod = brms::brm(
Humboldt~., data = data2,
prior = brms::set_prior('uniform(-1, 1)'), iter = 2000,
chains = 4,
cores = 4
)
## Warning: It appears as if you have specified a lower bounded prior on a parameter that has no natural lower bound.
## If this is really what you want, please specify argument 'lb' of 'set_prior' appropriately.
## Warning occurred for prior
## b ~ uniform(-1, 1)
## Warning: It appears as if you have specified an upper bounded prior on a parameter that has no natural upper bound.
## If this is really what you want, please specify argument 'ub' of 'set_prior' appropriately.
## Warning occurred for prior
## b ~ uniform(-1, 1)
## Compiling Stan program...
## Start sampling
summary(mod)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: Humboldt ~ BearCreek + MtPeakUtility
## Data: data2 (Number of observations: 8760)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.67 0.06 0.56 0.78 1.00 5029 3320
## BearCreek 0.87 0.00 0.86 0.87 1.00 4917 3111
## MtPeakUtility 0.05 0.01 0.04 0.06 1.00 4747 3030
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1.68 0.01 1.66 1.71 1.00 3562 2771
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(mod,ask=FALSE)
NON BAYESIAN AND BAYESIAN REGRESSION MEDIUM
#linear mix effects models lm24
mpriors=brms::get_prior(Humboldt ~.,data=med2,
family = gaussian)
summary(mpriors)
## prior class coef group
## Length:5 Length:5 Length:5 Length:5
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## resp dpar nlpar bound
## Length:5 Length:5 Length:5 Length:5
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## source
## Length:5
## Class :character
## Mode :character
#Baysian Model
mfit=brms::brm(Humboldt~.,data=med2)
## Compiling Stan program...
## Start sampling
##
## SAMPLING FOR MODEL '87bfc04baecc260324aa6a8522ed233d' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 1.6e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.16 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.016419 seconds (Warm-up)
## Chain 1: 0.016186 seconds (Sampling)
## Chain 1: 0.032605 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL '87bfc04baecc260324aa6a8522ed233d' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 8e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.01622 seconds (Warm-up)
## Chain 2: 0.017184 seconds (Sampling)
## Chain 2: 0.033404 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL '87bfc04baecc260324aa6a8522ed233d' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 1e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.016752 seconds (Warm-up)
## Chain 3: 0.016311 seconds (Sampling)
## Chain 3: 0.033063 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL '87bfc04baecc260324aa6a8522ed233d' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 5e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.05 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.016172 seconds (Warm-up)
## Chain 4: 0.016703 seconds (Sampling)
## Chain 4: 0.032875 seconds (Total)
## Chain 4:
summary(mfit)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: Humboldt ~ BearCreek + MtPeakUtility
## Data: med2 (Number of observations: 365)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.03 0.19 -0.40 0.33 1.00 4350 2973
## BearCreek 0.95 0.02 0.92 0.98 1.00 4873 3049
## MtPeakUtility 0.05 0.02 0.02 0.09 1.00 4382 3486
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.85 0.03 0.79 0.91 1.00 4452 3148
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(mfit)
mmod = brms::brm(
Humboldt~., data =med2,
prior = brms::set_prior('normal(0, 1)'), iter = 2000,
chains = 4,
cores = 4
)
## Compiling Stan program...
## Start sampling
summary(mmod)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: Humboldt ~ BearCreek + MtPeakUtility
## Data: med2 (Number of observations: 365)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.04 0.18 -0.40 0.31 1.00 4166 3223
## BearCreek 0.95 0.02 0.92 0.98 1.00 4259 3168
## MtPeakUtility 0.05 0.02 0.02 0.09 1.00 4042 2894
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.85 0.03 0.79 0.91 1.00 3876 3219
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(mmod,ask=FALSE)
NON BAYESIAN AND BAYESIAN REGRESSION SMALL
#linear mix effects models lm24
spriors=brms::get_prior(Humboldt ~.,data=small2,
family = gaussian)
summary(spriors)
## prior class coef group
## Length:5 Length:5 Length:5 Length:5
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## resp dpar nlpar bound
## Length:5 Length:5 Length:5 Length:5
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## source
## Length:5
## Class :character
## Mode :character
#Baysian Model
sfit=brms::brm(Humboldt~BearCreek+MtPeakUtility,data=small2)
## Compiling Stan program...
## Start sampling
##
## SAMPLING FOR MODEL '25e7b0157afbb9ef97fbd6ba6acfab43' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 1.5e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.15 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.012439 seconds (Warm-up)
## Chain 1: 0.012849 seconds (Sampling)
## Chain 1: 0.025288 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL '25e7b0157afbb9ef97fbd6ba6acfab43' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 6e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.06 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.012561 seconds (Warm-up)
## Chain 2: 0.013008 seconds (Sampling)
## Chain 2: 0.025569 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL '25e7b0157afbb9ef97fbd6ba6acfab43' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 4e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.012462 seconds (Warm-up)
## Chain 3: 0.012434 seconds (Sampling)
## Chain 3: 0.024896 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL '25e7b0157afbb9ef97fbd6ba6acfab43' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 7e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.07 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.012891 seconds (Warm-up)
## Chain 4: 0.012969 seconds (Sampling)
## Chain 4: 0.02586 seconds (Total)
## Chain 4:
summary(sfit)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: Humboldt ~ BearCreek + MtPeakUtility
## Data: small2 (Number of observations: 53)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.10 0.62 -1.31 1.09 1.00 4368 2888
## BearCreek 0.95 0.06 0.83 1.06 1.00 3849 3114
## MtPeakUtility 0.07 0.07 -0.07 0.23 1.00 3862 3106
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.70 0.07 0.58 0.85 1.00 3542 2769
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(sfit)
smod = brms::brm(
Humboldt~., data = small2,
prior = brms::set_prior('normal(0,1)'), iter = 2000,
chains = 4,
cores = 4
)
## Compiling Stan program...
## Start sampling
summary(smod)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: Humboldt ~ BearCreek + MtPeakUtility
## Data: small2 (Number of observations: 53)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.10 0.62 -1.30 1.14 1.00 4769 2778
## BearCreek 0.94 0.06 0.82 1.06 1.00 3728 2666
## MtPeakUtility 0.08 0.08 -0.08 0.22 1.00 3647 3038
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.70 0.07 0.58 0.85 1.00 3492 2907
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(smod,ask=FALSE)
Model Diagnostics Large
pp = brms::pp_check(mod)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
pp+theme_bw()
#Default Model Predictions
plot(brms::conditional_effects(mod),points=T)
Model Diagnostics Medium
pp = brms::pp_check(mmod)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
pp+theme_bw()
#Default Model Predictions
plot(brms::conditional_effects(mmod),col='blue',points=T)
Model Diagnostics Small
pp = brms::pp_check(smod)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
pp+theme_bw()
#Default Model Predictions
plot(brms::conditional_effects(smod),points=T)
Extract MCMC for regression coefficients
posterior_coeff=brms::posterior_samples(mod)
#head(posterior_coeff)
#plot(posterior_coeff)
nrow(posterior_coeff)
## [1] 4000
install.packages("ggmcmc")
## Installing package into '/Users/vincenttanoe/Library/R/4.0/library'
## (as 'lib' is unspecified)
##
## The downloaded binary packages are in
## /var/folders/sw/m_qbgxpn3pzcgbb04_cl0mxh0000gn/T//Rtmp0nU1qc/downloaded_packages
library(ggmcmc)
## Loading required package: tidyr
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:sjmisc':
##
## replace_na
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
library(ggthemes)
library(ggridges)
model1tranformed <- ggs(mod) # the ggs function transforms the brms output into a longformat tibble, that we can use to make different types of plots.
summary(mod)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: Humboldt ~ BearCreek + MtPeakUtility
## Data: data2 (Number of observations: 8760)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.67 0.06 0.56 0.78 1.00 5029 3320
## BearCreek 0.87 0.00 0.86 0.87 1.00 4917 3111
## MtPeakUtility 0.05 0.01 0.04 0.06 1.00 4747 3030
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1.68 0.01 1.66 1.71 1.00 3562 2771
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
summary(mod)$fixed[1,1:4]
## Estimate Est.Error l-95% CI u-95% CI
## 0.67017785 0.05674332 0.56118136 0.78465390
ggplot(filter(model1tranformed, Parameter %in% c("b_Intercept", "b_BearCreek", "b_Mountaineer","b_MtPeakUtility")),
aes(x = Iteration,
y = value,
col = as.factor(Chain)))+
geom_line() +
geom_vline(xintercept = 1000)+
facet_grid(Parameter ~ . ,
scale = 'free_y',
switch = 'y')+
labs(title = "Caterpillar Plots",
col = "Chains")
#model2 <- brms::brm(Humboldt~., data =small2,
#warmup = 1000, iter = 3000,
#cores = 2, chains = 2,
#seed = 123)
#summary(model2)$fixed
#summary(model2)$fixed
mod3 <- brms::brm(
Humboldt~., data = data1,
prior = brms::set_prior('normal(0, 1)'), iter = 2000,
chains = 4,
cores = 4
)
## Compiling Stan program...
## Start sampling
summary(mod3)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: Humboldt ~ NedPower + CriterionWP + RothRock + FreyFarm + BearCreek + Mountaineer + LocustRidge + MtPeakUtility
## Data: data1 (Number of observations: 8760)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.05 0.05 -0.05 0.15 1.00 4812 3693
## NedPower 0.04 0.01 0.02 0.06 1.00 2504 2313
## CriterionWP 0.07 0.01 0.05 0.09 1.00 3320 2746
## RothRock 0.03 0.01 0.01 0.06 1.00 2850 2318
## FreyFarm 0.20 0.01 0.18 0.21 1.00 4029 2921
## BearCreek 0.59 0.01 0.58 0.61 1.00 3212 2761
## Mountaineer 0.01 0.00 -0.00 0.02 1.00 3820 3208
## LocustRidge 0.08 0.01 0.06 0.09 1.00 3334 2476
## MtPeakUtility 0.04 0.00 0.03 0.04 1.00 5324 3167
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1.44 0.01 1.42 1.46 1.00 4334 3058
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(mod3,ask=FALSE)
pp = brms::pp_check(mod3)
## Using 10 posterior samples for ppc type 'dens_overlay' by default.
pp+theme_bw()
#Default Model Predictions
brms::conditional_effects(mod3)
#install.packages("bayesplot")
#install.packages("rstanarm")
library(rstanarm)
## Loading required package: Rcpp
## This is rstanarm version 2.21.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
##
## Attaching package: 'rstanarm'
## The following objects are masked from 'package:arm':
##
## invlogit, logit
## The following object is masked from 'package:car':
##
## logit
## The following objects are masked from 'package:caret':
##
## compare_models, R2
library(bayesplot)
## This is bayesplot version 1.7.2
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
fit1 <- stan_glm(Humboldt ~ ., data = data2)
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 5.8e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.58 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.034167 seconds (Warm-up)
## Chain 1: 0.469982 seconds (Sampling)
## Chain 1: 0.504149 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 8e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.037167 seconds (Warm-up)
## Chain 2: 0.468174 seconds (Sampling)
## Chain 2: 0.505341 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 8e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.041709 seconds (Warm-up)
## Chain 3: 0.481211 seconds (Sampling)
## Chain 3: 0.52292 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 8e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.037844 seconds (Warm-up)
## Chain 4: 0.470887 seconds (Sampling)
## Chain 4: 0.508731 seconds (Total)
## Chain 4:
posterior <- as.matrix(fit1)
plot_title <- ggtitle("Posterior distributions",
"with medians and 80% intervals")
mcmc_areas(posterior,
pars = c("BearCreek", "MtPeakUtility"),
prob = 0.8) + plot_title
color_scheme_set("red")
ppc_dens_overlay(y = fit1$y,
yrep = posterior_predict(fit, draws = 50))
# Import library
library(BAS)
# Use `bas.lm` to run regression model
cog.bas = bas.lm(Humboldt ~ ., data = data2, prior = "BIC",
#modelprior = Bernoulli(1),
include.always = ~ .,
n.models = 1)
cog.coef = coef(cog.bas)
cog.coef
##
## Marginal Posterior Summaries of Coefficients:
##
## Using BMA
##
## Based on the top 1 models
## post mean post SD post p(B != 0)
## Intercept 7.655643 0.017961 1.000000
## BearCreek 0.865158 0.004938 1.000000
## MtPeakUtility 0.045658 0.005166 1.000000
par(mfrow = c(2, 2), col.lab = "darkgrey", col.axis = "darkgrey", col = "darkgrey")
plot(cog.coef, subset = 2:3, ask = F)
confint(cog.coef, parm = 2:3)
## 2.5% 97.5% beta
## BearCreek 0.85547819 0.8748386 0.86515837
## MtPeakUtility 0.03553192 0.0557844 0.04565816
## attr(,"Probability")
## [1] 0.95
## attr(,"class")
## [1] "confint.bas"
out = confint(cog.coef)[, 1:2]
# Extract the upper and lower bounds of the credible intervals
names = c("posterior mean", "posterior std", colnames(out))
out = cbind(cog.coef$postmean, cog.coef$postsd, out)
colnames(out) = names
round(out, 2)
## posterior mean posterior std 2.5% 97.5%
## Intercept 7.66 0.02 7.62 7.69
## BearCreek 0.87 0.00 0.86 0.87
## MtPeakUtility 0.05 0.01 0.04 0.06
#https://cran.r-project.org/web/packages/bayesm/vignettes/bayesm_Overview_Vignette.html
par(mfrow = c(1,2))
curve(dnorm(x,0,10), xlab = "", ylab = "", xlim = c(-30,30),
main = expression(paste("Prior for ", beta[j])),
col = "dodgerblue4")
nu <- 3
ssq <- var(log(data2$Humboldt))
curve(nu*ssq/dchisq(x,nu), xlab = "", ylab = "", xlim = c(0,1),
main = expression(paste("Prior for ", sigma^2)),
col = "darkred")
par(mfrow = c(1,1))
###B <- 1000+1 #burn in draws to discard
##R <- 10000
##par(mfrow = c(1,2))
##hist(out$betadraw[B:R,2], breaks = 30,
#main = "Posterior Dist. of BearCreek Coef.",
#yaxt = "n", yaxs="i",#
#xlab = "", ylab = "",
#col = "dodgerblue4", border = "gray")
#hist(out$sigmasqdraw[B:R], breaks = 30,
# main = "Posterior Dist. of Sigma2",
# yaxt = "n", yaxs="i",#
# xlab = "", ylab = "", #
# col = "darkred", border = "gray")
#par(mfrow = c(1,1))
#apply(out$betadraw[B:R,2:3], 2, mean)
#summary(out$betadraw)
#plot.bayesm.mat(out$betadraw[,2],names ='Bear Creek')''
library(dgof)
print(ks.test(data1$Humboldt,data1$BearCreek))
## Warning in ks.test(data1$Humboldt, data1$BearCreek): cannot compute correct p-
## values with ties
##
## Two-sample Kolmogorov-Smirnov test
##
## data: data1$Humboldt and data1$BearCreek
## D = 0.014155, p-value = 0.344
## alternative hypothesis: two-sided
print(ks.test(data1$Humboldt, data1$MtPeakUtility))
## Warning in ks.test(data1$Humboldt, data1$MtPeakUtility): cannot compute correct
## p-values with ties
##
## Two-sample Kolmogorov-Smirnov test
##
## data: data1$Humboldt and data1$MtPeakUtility
## D = 0.069977, p-value < 2.2e-16
## alternative hypothesis: two-sided