library(aqp)
## Warning: package 'aqp' was built under R version 4.1.3
## This is aqp 1.42
library(soilDB)
## Warning: package 'soilDB' was built under R version 4.1.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.3
data("loafercreek")
# Construct generalized horizon designations
n <- c("A","BAt", "Bt1","Bt2","Cr", "R")
p <- c("A", "BA|AB", "bt|Bw", "Bt3|Bt4|2B|C" ,"Cr","R")

 loafercreek$genhz <- generalize.hz(
  loafercreek$hzname,
  n,p)
h <- horizons(loafercreek)

table(h$genhz, h$hzname)
##           
##            2BC 2BCt 2Bt1 2Bt2 2Bt3 2Bt4 2Bt5 2CB 2CBt 2Cr 2Crt 2R  A A1 A2 AB
##   A          0    0    0    0    0    0    0   0    0   0    0  0 97  7  7  0
##   BAt        0    0    0    0    0    0    0   0    0   0    0  0  0  0  0  1
##   Bt1        0    0    0    0    0    0    0   0    0   0    0  0  0  0  0  0
##   Bt2        1    1    3    8    8    6    1   1    1   0    0  0  0  0  0  0
##   Cr         0    0    0    0    0    0    0   0    0   4    2  0  0  0  0  0
##   R          0    0    0    0    0    0    0   0    0   0    0  6  0  0  0  0
##   not-used   0    0    0    0    0    0    0   0    0   0    0  0  0  0  0  0
##           
##            ABt Ad Ap  B BA BAt BC BCt Bt Bt1 Bt2 Bt3 Bt4 Bw Bw1 Bw2 Bw3  C CBt
##   A          0  1  1  0  0   0  0   0  0   0   0   0   0  0   0   0   0  0   0
##   BAt        2  0  0  0 31   8  0   0  0   0   0   0   0  0   0   0   0  0   0
##   Bt1        0  0  0  0  0   0  0   0  0   0   0   0   0 10   2   2   1  0   0
##   Bt2        0  0  0  0  0   0  4  16  0   0   0  47   8  0   0   0   0  6   6
##   Cr         0  0  0  0  0   0  0   0  0   0   0   0   0  0   0   0   0  0   0
##   R          0  0  0  0  0   0  0   0  0   0   0   0   0  0   0   0   0  0   0
##   not-used   0  0  0  1  0   0  0   0  8  93  88   0   0  0   0   0   0  0   0
##           
##            Cd Cr Cr/R Crt H1 Oi  R Rt
##   A         0  0    0   0  0  0  0  0
##   BAt       0  0    0   0  0  0  0  0
##   Bt1       0  0    0   0  0  0  0  0
##   Bt2       1  0    0   0  0  0  0  0
##   Cr        0 49    0  20  0  0  0  0
##   R         0  0    1   0  0  0 40  1
##   not-used  0  0    0   0  1 24  0  0
vars <- c("genhz", "clay","total_frags_pct",
          "phfield", "effclass")
summary(h[, vars])
##       genhz          clay       total_frags_pct    phfield    
##  A       :113   Min.   :10.00   Min.   : 0.00   Min.   :4.90  
##  BAt     : 42   1st Qu.:18.00   1st Qu.: 0.00   1st Qu.:6.00  
##  Bt1     : 15   Median :22.00   Median : 5.00   Median :6.30  
##  Bt2     :118   Mean   :23.63   Mean   :13.88   Mean   :6.18  
##  Cr      : 75   3rd Qu.:28.00   3rd Qu.:20.00   3rd Qu.:6.50  
##  R       : 48   Max.   :60.00   Max.   :95.00   Max.   :7.00  
##  not-used:215   NA's   :167                     NA's   :381   
##    effclass        
##  Length:626        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
sort(unique(h$hzname))
##  [1] "2BC"  "2BCt" "2Bt1" "2Bt2" "2Bt3" "2Bt4" "2Bt5" "2CB"  "2CBt" "2Cr" 
## [11] "2Crt" "2R"   "A"    "A1"   "A2"   "AB"   "ABt"  "Ad"   "Ap"   "B"   
## [21] "BA"   "BAt"  "BC"   "BCt"  "Bt"   "Bt1"  "Bt2"  "Bt3"  "Bt4"  "Bw"  
## [31] "Bw1"  "Bw2"  "Bw3"  "C"    "CBt"  "Cd"   "Cr"   "Cr/R" "Crt"  "H1"  
## [41] "Oi"   "R"    "Rt"
h$hzname <- ifelse(h$hzname == "BT",
                   "Bt", h$hzname)
clay <- na.exclude(h$clay)

mean(clay)
## [1] 23.62767
median(clay)
## [1] 22
sort(table(round(h$clay)),
     
     decreasing = TRUE)[1]
## 25 
## 42
table(h$genhz, h$hzname)
##           
##            2BC 2BCt 2Bt1 2Bt2 2Bt3 2Bt4 2Bt5 2CB 2CBt 2Cr 2Crt 2R  A A1 A2 AB
##   A          0    0    0    0    0    0    0   0    0   0    0  0 97  7  7  0
##   BAt        0    0    0    0    0    0    0   0    0   0    0  0  0  0  0  1
##   Bt1        0    0    0    0    0    0    0   0    0   0    0  0  0  0  0  0
##   Bt2        1    1    3    8    8    6    1   1    1   0    0  0  0  0  0  0
##   Cr         0    0    0    0    0    0    0   0    0   4    2  0  0  0  0  0
##   R          0    0    0    0    0    0    0   0    0   0    0  6  0  0  0  0
##   not-used   0    0    0    0    0    0    0   0    0   0    0  0  0  0  0  0
##           
##            ABt Ad Ap  B BA BAt BC BCt Bt Bt1 Bt2 Bt3 Bt4 Bw Bw1 Bw2 Bw3  C CBt
##   A          0  1  1  0  0   0  0   0  0   0   0   0   0  0   0   0   0  0   0
##   BAt        2  0  0  0 31   8  0   0  0   0   0   0   0  0   0   0   0  0   0
##   Bt1        0  0  0  0  0   0  0   0  0   0   0   0   0 10   2   2   1  0   0
##   Bt2        0  0  0  0  0   0  4  16  0   0   0  47   8  0   0   0   0  6   6
##   Cr         0  0  0  0  0   0  0   0  0   0   0   0   0  0   0   0   0  0   0
##   R          0  0  0  0  0   0  0   0  0   0   0   0   0  0   0   0   0  0   0
##   not-used   0  0  0  1  0   0  0   0  8  93  88   0   0  0   0   0   0  0   0
##           
##            Cd Cr Cr/R Crt H1 Oi  R Rt
##   A         0  0    0   0  0  0  0  0
##   BAt       0  0    0   0  0  0  0  0
##   Bt1       0  0    0   0  0  0  0  0
##   Bt2       1  0    0   0  0  0  0  0
##   Cr        0 49    0  20  0  0  0  0
##   R         0  0    1   0  0  0 40  1
##   not-used  0  0    0   0  1 24  0  0
addmargins(table(h$genhz,
                 
                 h$texcl))
##           
##              c  cl   l scl sic sicl sil  sl Sum
##   A          0   0  78   0   0    0  27   6 111
##   BAt        0   2  32   0   0    1   5   1  41
##   Bt1        0   0  14   0   0    0   1   0  15
##   Bt2       16  54  29   5   1    3   5   0 113
##   Cr         1   0   0   0   0    0   0   0   1
##   R          0   0   0   0   0    0   0   0   0
##   not-used   2  45 112   4   1    5  18   1 188
##   Sum       19 101 265   9   2    9  56   8 469
ggplot(h, aes(x = texcl)) +geom_bar()

ggplot(h, aes(x = clay)) +
  
  geom_histogram(bins = nclass.Sturges(h$clay))
## Warning: Removed 167 rows containing non-finite values (stat_bin).

ggplot(h, aes(x = clay)) + geom_density()
## Warning: Removed 167 rows containing non-finite values (stat_density).

ggplot(h, (aes(x = genhz, y = clay))) +
  
  geom_boxplot()
## Warning: Removed 167 rows containing non-finite values (stat_boxplot).

ggplot(h, aes(sample = clay)) +
  
  geom_qq() +
  
  geom_qq_line()
## Warning: Removed 167 rows containing non-finite values (stat_qq).
## Warning: Removed 167 rows containing non-finite values (stat_qq_line).

h$hzdepm <- (h$hzdepb + h$hzdept) / 2

vars <- c("hzdepm", "clay", "sand",
          
          "total_frags_pct", "phfield")

round(cor(h[, vars], use = "complete.obs"), 2)
##                 hzdepm  clay  sand total_frags_pct phfield
## hzdepm            1.00  0.59 -0.08            0.50   -0.03
## clay              0.59  1.00 -0.17            0.28    0.13
## sand             -0.08 -0.17  1.00           -0.05    0.12
## total_frags_pct   0.50  0.28 -0.05            1.00   -0.16
## phfield          -0.03  0.13  0.12           -0.16    1.00
ggplot(h, aes(x = clay, y = hzdepm)) +
  
  geom_point() +
  
  ylim(100, 0)
## Warning: Removed 168 rows containing missing values (geom_point).

ggplot(h, aes(y = clay, x = hzdepm,
              
              group = peiid)) +
  
  geom_line() + coord_flip() + xlim(100, 0)
## Warning: Removed 168 row(s) containing missing values (geom_path).