library(readr)
slemsa_revised <- read_csv("~/soil loss/slemsa_revised.csv")
## Parsed with column specification:
## cols(
##   country = col_character(),
##   site = col_character(),
##   agroecoregion = col_character(),
##   farmer_name = col_character(),
##   drainage_class = col_character(),
##   season = col_double(),
##   treatment = col_double(),
##   treatment_description = col_character(),
##   system = col_character(),
##   cropping_Systems = col_character(),
##   intercepted = col_double(),
##   crop_canopy = col_double(),
##   seasonal_rainfall_energy = col_double(),
##   erodability_factor = col_double(),
##   topographic_ratio = col_double(),
##   soil_loss = col_double(),
##   residue_cover = col_double(),
##   rainfall = col_double(),
##   slope = col_double()
## )
View(slemsa_revised)
attach(slemsa_revised)
library(ggplot2)
library(extrafontdb)
library(MASS)
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(gridExtra)
library(repr) ### adjusting the length and width of your plot
library(beanplot)
library("devtools")
## Warning: package 'devtools' was built under R version 3.6.1
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 3.6.1
library("yarrr")
## Loading required package: jpeg
## Loading required package: BayesFactor
## Loading required package: coda
## Warning: package 'coda' was built under R version 3.6.1
## Loading required package: Matrix
## ************
## Welcome to BayesFactor 0.9.12-4.2. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
## 
## Type BFManual() to open the manual.
## ************
## Loading required package: circlize
## ========================================
## circlize version 0.4.6
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: http://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization 
##   in R. Bioinformatics 2014.
## ========================================
## yarrr v0.1.5. Citation info at citation('yarrr'). Package guide at yarrr.guide()
## Email me at Nathaniel.D.Phillips.is@gmail.com
## 
## Attaching package: 'yarrr'
## The following object is masked from 'package:ggplot2':
## 
##     diamonds
library(agricolae)
library(easynls)
library(MVN)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## sROC 0.1-2 loaded
library(lme4)
## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car
library(ggsignif)
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.6.1
## Loading required package: magrittr
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
library(multcompView)
library(Rmisc)
## Warning: package 'Rmisc' was built under R version 3.6.1
## Loading required package: lattice
## Loading required package: plyr
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:ggpubr':
## 
##     mutate
country=as.factor(country)
site=as.factor(site)
sit=factor(site,levels =c("Mchinji","Ntcheu","Kasungu","Kabango","Salima","Gorongosa","Sussundenga","Chipole",ordered = TRUE))
agroecoregion=as.factor(agroecoregion)
treatment=as.factor(treatment)
treatment_description=as.factor(treatment_description)
cropping_system=factor(cropping_Systems,levels = c("CON","CA_SOLE","CA_ROT","CA_INT"),ordered = TRUE)
cropping_Systems=as.factor(cropping_Systems)
systems=factor(system,levels = c("CON","CA",order=TRUE))
system=as.factor(system)
drainage_class=as.factor(drainage_class)
drainage=factor(drainage_class,levels =c("Well drained","Moderately well drained","Fairly well drained","Somewhat poorly drained","Poorly drained","Very poorly drainedd",ordered = TRUE))

##
#######################model without interactions
slem<-lm(soil_loss~country+drainage_class+cropping_system+rainfall,data = slemsa_revised)
anova(slem)
## Analysis of Variance Table
## 
## Response: soil_loss
##                   Df  Sum Sq Mean Sq  F value    Pr(>F)    
## country            1   548.3   548.3  31.0109 2.965e-08 ***
## drainage_class     5   505.6   101.1   5.7186 3.059e-05 ***
## cropping_system    3 26016.1  8672.0 490.4599 < 2.2e-16 ***
## rainfall           1  3546.3  3546.3 200.5676 < 2.2e-16 ***
## Residuals       1748 30907.1    17.7                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(slem)
## 
## Call:
## lm(formula = soil_loss ~ country + drainage_class + cropping_system + 
##     rainfall, data = slemsa_revised)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.7866  -1.6623  -0.4945   0.8507  27.9947 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                           -5.5057864  0.9181373  -5.997
## countryMozambique                     -0.6683784  0.3902825  -1.713
## drainage_classModerately well drained  0.4675312  0.8622276   0.542
## drainage_classPoorly drained           1.3779831  0.9065990   1.520
## drainage_classSomewhat poorly drained  1.2286134  0.8903561   1.380
## drainage_classVery poorly drained      4.1740013  1.0745202   3.885
## drainage_classWell drained             0.5601568  0.7920259   0.707
## cropping_system.L                     -7.5958729  0.2366660 -32.095
## cropping_system.Q                      4.1374557  0.2118603  19.529
## cropping_system.C                     -2.4445601  0.1833222 -13.335
## rainfall                               0.0084254  0.0005949  14.162
##                                       Pr(>|t|)    
## (Intercept)                           2.44e-09 ***
## countryMozambique                     0.086973 .  
## drainage_classModerately well drained 0.587725    
## drainage_classPoorly drained          0.128705    
## drainage_classSomewhat poorly drained 0.167790    
## drainage_classVery poorly drained     0.000106 ***
## drainage_classWell drained            0.479508    
## cropping_system.L                      < 2e-16 ***
## cropping_system.Q                      < 2e-16 ***
## cropping_system.C                      < 2e-16 ***
## rainfall                               < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.205 on 1748 degrees of freedom
## Multiple R-squared:  0.4976, Adjusted R-squared:  0.4948 
## F-statistic: 173.2 on 10 and 1748 DF,  p-value: < 2.2e-16
##mean seperation and comparison LSD cropping systems
slem_lsd<-aov(soil_loss~country+site+cropping_system+rainfall+drainage,data = slemsa_revised)
##################
x<- LSD.test(slem_lsd,"cropping_system",alpha=0.05,group=FALSE)
diffograph(x,cex.axis=0.8,xlab="Soil Loss [t/ha]",ylab="Soil Loss [t/ha]")

### HSD grouping of means
h<-HSD.test(slem_lsd,"cropping_system",group =TRUE)
h
## $statistics
##    MSerror   Df     Mean       CV
##   16.46636 1701 3.402621 119.2574
## 
## $parameters
##    test          name.t ntr StudentizedRange alpha
##   Tukey cropping_system   4         3.636753  0.05
## 
## $means
##          soil_loss      std   r         Min      Max       Q25       Q50
## CA_INT   0.7070611 1.236869 293 0.000000000 17.29903 0.2613480 0.4362777
## CA_ROT   1.8754086 2.906669 596 0.000654674 23.71809 0.3079899 0.7619144
## CA_SOLE  1.9872120 2.874800 530 0.000654674 20.80496 0.2779504 0.8578885
## CON     11.6247143 8.896297 298 0.019153104 39.66231 5.4291786 8.7298323
##                Q75
## CA_INT   0.7918567
## CA_ROT   2.1640975
## CA_SOLE  2.6383639
## CON     15.8501251
## 
## $comparison
## NULL
## 
## $groups
##          soil_loss groups
## CON     11.6247143      a
## CA_SOLE  1.9872120      b
## CA_ROT   1.8754086      b
## CA_INT   0.7070611      c
## 
## attr(,"class")
## [1] "group"
### bar blot of mean comparison
bar.group(h$groups,density=4,border="blue",ylab="Soil Loss [t/ha])")
##,ylim=c(0,3500)
###########################################################################
##mean seperation and comparison LSD drainage class
x<- LSD.test(slem_lsd,"drainage",alpha=0.05,group=FALSE)
x
## $statistics
##    MSerror   Df     Mean       CV
##   16.46636 1701 3.402621 119.2574
## 
## $parameters
##         test p.ajusted   name.t ntr alpha
##   Fisher-LSD      none drainage   5  0.05
## 
## $means
##                         soil_loss      std   r         LCL      UCL
## Fairly well drained      1.388283 2.594282  30 -0.06481688 2.841382
## Moderately well drained  3.291239 4.990565 396  2.89128621 3.691191
## Poorly drained           4.171247 6.052510 210  3.62202690 4.720467
## Somewhat poorly drained  4.217741 5.934318 336  3.78354404 4.651937
## Well drained             2.958655 6.131454 745  2.66706150 3.250249
##                                Min      Max       Q25       Q50       Q75
## Fairly well drained     0.09188071 11.01112 0.1588065 0.2260149 0.5793583
## Moderately well drained 0.04543234 31.49316 0.4071687 1.3909523 3.8210919
## Poorly drained          0.17592190 36.67403 0.6385188 1.5871715 4.7434589
## Somewhat poorly drained 0.11754137 37.52483 0.5957100 1.6650217 5.2554426
## Well drained            0.00000000 39.66231 0.2119475 0.5798573 2.2794805
## 
## $comparison
##                                                    difference pvalue
## Fairly well drained - Moderately well drained     -1.90295604 0.0134
## Fairly well drained - Poorly drained              -2.78296425 0.0005
## Fairly well drained - Somewhat poorly drained     -2.82945792 0.0003
## Fairly well drained - Well drained                -1.57037236 0.0378
## Moderately well drained - Poorly drained          -0.88000821 0.0112
## Moderately well drained - Somewhat poorly drained -0.92650189 0.0021
## Moderately well drained - Well drained             0.33258368 0.1877
## Poorly drained - Somewhat poorly drained          -0.04649367 0.8964
## Poorly drained - Well drained                      1.21259189 0.0001
## Somewhat poorly drained - Well drained             1.25908557 0.0000
##                                                   signif.        LCL
## Fairly well drained - Moderately well drained           * -3.4100925
## Fairly well drained - Poorly drained                  *** -4.3363930
## Fairly well drained - Somewhat poorly drained         *** -4.3460412
## Fairly well drained - Well drained                      * -3.0524402
## Moderately well drained - Poorly drained                * -1.5594231
## Moderately well drained - Somewhat poorly drained      ** -1.5168311
## Moderately well drained - Well drained                    -0.1623797
## Poorly drained - Somewhat poorly drained                  -0.7466145
## Poorly drained - Well drained                         ***  0.5907644
## Somewhat poorly drained - Well drained                ***  0.7360622
##                                                           UCL
## Fairly well drained - Moderately well drained     -0.39581959
## Fairly well drained - Poorly drained              -1.22953550
## Fairly well drained - Somewhat poorly drained     -1.31287462
## Fairly well drained - Well drained                -0.08830453
## Moderately well drained - Poorly drained          -0.20059332
## Moderately well drained - Somewhat poorly drained -0.33617264
## Moderately well drained - Well drained             0.82754708
## Poorly drained - Somewhat poorly drained           0.65362721
## Poorly drained - Well drained                      1.83441935
## Somewhat poorly drained - Well drained             1.78210890
## 
## $groups
## NULL
## 
## attr(,"class")
## [1] "group"
diffograph(x,cex.axis=0.8,xlab="Soil Loss [t/ha]",ylab="Soil Loss [t/ha]")

### HSD grouping of means
h<-HSD.test(slem_lsd,"drainage",group =TRUE)
h
## $statistics
##    MSerror   Df     Mean       CV
##   16.46636 1701 3.402621 119.2574
## 
## $parameters
##    test   name.t ntr StudentizedRange alpha
##   Tukey drainage   5         3.861799  0.05
## 
## $means
##                         soil_loss      std   r        Min      Max
## Fairly well drained      1.388283 2.594282  30 0.09188071 11.01112
## Moderately well drained  3.291239 4.990565 396 0.04543234 31.49316
## Poorly drained           4.171247 6.052510 210 0.17592190 36.67403
## Somewhat poorly drained  4.217741 5.934318 336 0.11754137 37.52483
## Well drained             2.958655 6.131454 745 0.00000000 39.66231
##                               Q25       Q50       Q75
## Fairly well drained     0.1588065 0.2260149 0.5793583
## Moderately well drained 0.4071687 1.3909523 3.8210919
## Poorly drained          0.6385188 1.5871715 4.7434589
## Somewhat poorly drained 0.5957100 1.6650217 5.2554426
## Well drained            0.2119475 0.5798573 2.2794805
## 
## $comparison
## NULL
## 
## $groups
##                         soil_loss groups
## Somewhat poorly drained  4.217741      a
## Poorly drained           4.171247     ab
## Moderately well drained  3.291239     bc
## Well drained             2.958655      c
## Fairly well drained      1.388283      c
## 
## attr(,"class")
## [1] "group"
### bar blot of mean comparison
bar.group(h$groups,density=4,border="blue",ylab="Soil Loss [t/ha])")
########################### mean seperation by site

x<- LSD.test(slem_lsd,"site",alpha=0.05,group=FALSE)
diffograph(x,cex.axis=0.8,xlab="Soil Loss [t/ha]",ylab="Soil Loss [t/ha]")

### HSD grouping of means
h<-HSD.test(slem_lsd,"site",group =TRUE)
h
## $statistics
##    MSerror   Df     Mean       CV
##   16.46636 1701 3.402621 119.2574
## 
## $parameters
##    test name.t ntr StudentizedRange alpha
##   Tukey   site   8         4.291677  0.05
## 
## $means
##             soil_loss      std   r         Min      Max       Q25
## Chipole      4.524692 6.203068 252 0.137978519 36.67403 0.6152108
## Gorongosa    3.834410 6.125095 210 0.136609865 37.52483 0.5146978
## Kabango      3.405193 5.016857 252 0.111187239 31.07216 0.5108355
## Kasungu      2.835485 6.711124 175 0.042274745 36.95062 0.1243193
## Mchinji      1.842276 4.036683 180 0.018848552 28.50865 0.1252719
## Ntcheu       1.959562 3.931747 216 0.000654674 24.48750 0.2075270
## Salima       4.856308 9.090725 180 0.179240303 39.66231 0.4756925
## Sussundenga  3.625087 3.838323 252 0.000000000 20.80496 0.9967673
##                   Q50      Q75
## Chipole     1.8320279 6.442175
## Gorongosa   1.3906780 4.342198
## Kabango     1.2472806 4.249354
## Kasungu     0.2739624 0.954471
## Mchinji     0.2404846 1.363820
## Ntcheu      0.4151253 1.111926
## Salima      0.8362883 3.043015
## Sussundenga 2.4498903 4.811381
## 
## $comparison
## NULL
## 
## $groups
##             soil_loss groups
## Salima       4.856308      a
## Chipole      4.524692     ab
## Gorongosa    3.834410    abc
## Sussundenga  3.625087     bc
## Kabango      3.405193      c
## Kasungu      2.835485     cd
## Ntcheu       1.959562      d
## Mchinji      1.842276      d
## 
## attr(,"class")
## [1] "group"
### bar blot of mean comparison
bar.group(h$groups,density=4,border="blue",ylab="Soil Loss [t/ha])")

### model with interactions
slem_int<-lm(soil_loss~country+site+drainage+cropping_system+rainfall+country*rainfall+site*rainfall+drainage*rainfall+cropping_system*rainfall+drainage*country+drainage*rainfall+site*cropping_system+site*drainage+drainage*rainfall*cropping_system,data = slemsa_revised)
anova(slem_int)
## Analysis of Variance Table
## 
## Response: soil_loss
##                                     Df  Sum Sq Mean Sq  F value    Pr(>F)
## country                              1   437.9   437.9  34.3636 5.515e-09
## site                                 6  1255.7   209.3  16.4228 < 2.2e-16
## drainage                             4   141.7    35.4   2.7804   0.02556
## cropping_system                      3 25262.5  8420.8 660.7936 < 2.2e-16
## rainfall                             1  2970.8  2970.8 233.1223 < 2.2e-16
## country:rainfall                     1   398.6   398.6  31.2813 2.610e-08
## site:rainfall                        6    41.2     6.9   0.5393   0.77861
## drainage:rainfall                    4    40.5    10.1   0.7948   0.52846
## cropping_system:rainfall             3  1503.9   501.3  39.3369 < 2.2e-16
## country:drainage                     1     1.4     1.4   0.1130   0.73678
## site:cropping_system                19  4443.4   233.9  18.3516 < 2.2e-16
## site:drainage                        3   103.5    34.5   2.7084   0.04384
## drainage:cropping_system            11   282.8    25.7   2.0172   0.02361
## drainage:cropping_system:rainfall   11   269.0    24.5   1.9190   0.03304
## Residuals                         1642 20924.8    12.7                   
##                                      
## country                           ***
## site                              ***
## drainage                          *  
## cropping_system                   ***
## rainfall                          ***
## country:rainfall                  ***
## site:rainfall                        
## drainage:rainfall                    
## cropping_system:rainfall          ***
## country:drainage                     
## site:cropping_system              ***
## site:drainage                     *  
## drainage:cropping_system          *  
## drainage:cropping_system:rainfall *  
## Residuals                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
########################################## multiple comparison

mc = glht(slem_lsd,
          mcp(cropping_system = "Tukey"))
## Warning in glht.matrix(model = structure(list(coefficients =
## c(`(Intercept)` = -2.66903757883595, : 1 out of 17 coefficients not
## estimable in 'model'
mcs = summary(mc, test=adjusted("single-step"))
mcs
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = soil_loss ~ country + site + cropping_system + 
##     rainfall + drainage, data = slemsa_revised)
## 
## Linear Hypotheses:
##                       Estimate Std. Error t value Pr(>|t|)    
## CA_SOLE - CON == 0     -9.5896     0.2947 -32.536   <1e-05 ***
## CA_ROT - CON == 0      -9.7449     0.2879 -33.849   <1e-05 ***
## CA_INT - CON == 0     -11.3506     0.3417 -33.218   <1e-05 ***
## CA_ROT - CA_SOLE == 0  -0.1553     0.2434  -0.638    0.919    
## CA_INT - CA_SOLE == 0  -1.7609     0.3096  -5.687   <1e-05 ***
## CA_INT - CA_ROT == 0   -1.6056     0.2985  -5.378   <1e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
##############################################################
out<-LSD.test(slem_lsd,"cropping_system",p.adj="hommel",console=TRUE)
## 
## Study: slem_lsd ~ "cropping_system"
## 
## LSD t Test for soil_loss 
## P value adjustment method: hommel 
## 
## Mean Square Error:  16.46636 
## 
## cropping_system,  means and individual ( 95 %) CI
## 
##          soil_loss      std   r       LCL       UCL         Min      Max
## CA_INT   0.7070611 1.236869 293  0.242094  1.172028 0.000000000 17.29903
## CA_ROT   1.8754086 2.906669 596  1.549397  2.201420 0.000654674 23.71809
## CA_SOLE  1.9872120 2.874800 530  1.641497  2.332927 0.000654674 20.80496
## CON     11.6247143 8.896297 298 11.163664 12.085764 0.019153104 39.66231
## 
## Alpha: 0.05 ; DF Error: 1701
## Critical Value of t: 1.96136 
## 
## Groups according to probability of means differences and alpha level( 0.05 )
## 
## Treatments with the same letter are not significantly different.
## 
##          soil_loss groups
## CON     11.6247143      a
## CA_SOLE  1.9872120      b
## CA_ROT   1.8754086      b
## CA_INT   0.7070611      c
out
## $statistics
##    MSerror   Df     Mean       CV
##   16.46636 1701 3.402621 119.2574
## 
## $parameters
##         test p.ajusted          name.t ntr alpha
##   Fisher-LSD    hommel cropping_system   4  0.05
## 
## $means
##          soil_loss      std   r       LCL       UCL         Min      Max
## CA_INT   0.7070611 1.236869 293  0.242094  1.172028 0.000000000 17.29903
## CA_ROT   1.8754086 2.906669 596  1.549397  2.201420 0.000654674 23.71809
## CA_SOLE  1.9872120 2.874800 530  1.641497  2.332927 0.000654674 20.80496
## CON     11.6247143 8.896297 298 11.163664 12.085764 0.019153104 39.66231
##               Q25       Q50        Q75
## CA_INT  0.2613480 0.4362777  0.7918567
## CA_ROT  0.3079899 0.7619144  2.1640975
## CA_SOLE 0.2779504 0.8578885  2.6383639
## CON     5.4291786 8.7298323 15.8501251
## 
## $comparison
## NULL
## 
## $groups
##          soil_loss groups
## CON     11.6247143      a
## CA_SOLE  1.9872120      b
## CA_ROT   1.8754086      b
## CA_INT   0.7070611      c
## 
## attr(,"class")
## [1] "group"

Line graph with error bars

rv <- summarySE(slemsa_revised, measurevar="soil_loss", groupvars=c("country","cropping_system"))
rv
##      country cropping_system   N  soil_loss         sd         se
## 1     Malawi             CON 137 11.4506771 10.3088397 0.88074362
## 2     Malawi         CA_SOLE 208  0.6971066  1.9844206 0.13759481
## 3     Malawi          CA_ROT 274  1.1197985  2.5729121 0.15543530
## 4     Malawi          CA_INT 132  0.7928394  1.7390608 0.15136581
## 5 Mozambique             CON 168 12.0451119  7.7371746 0.59693598
## 6 Mozambique         CA_SOLE 336  2.9218713  3.4133966 0.18621605
## 7 Mozambique          CA_ROT 336  2.6006385  3.1286754 0.17068323
## 8 Mozambique          CA_INT 168  0.6615050  0.5704809 0.04401355
##           ci
## 1 1.74172403
## 2 0.27126685
## 3 0.30600418
## 4 0.29943768
## 5 1.17851336
## 6 0.36630012
## 7 0.33574597
## 8 0.08689468
#####################
pd <- position_dodge(0.1) # move them .05 to the left and right

ggplot(rv, aes(x=as.numeric(cropping_system), y=soil_loss, colour=country)) + 
    geom_errorbar(aes(ymin=soil_loss-se, ymax=soil_loss+se), width=.1, position=pd) +
    geom_line(position=pd) +
    geom_point(position=pd)

# Use 95% confidence interval instead of SEM
ggplot(rv, aes(x=cropping_system, y=soil_loss, colour=country)) + 
    geom_errorbar(aes(ymin=soil_loss-ci, ymax=soil_loss+ci), width=.1, position=pd) +
    geom_line(position=pd) +
    geom_point(position=pd)
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

soil.model = lmer(soil_loss~country+site+drainage_class+cropping_system+(1|rainfall),data = slemsa_revised) 
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
summary(soil.model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: soil_loss ~ country + site + drainage_class + cropping_system +  
##     (1 | rainfall)
##    Data: slemsa_revised
## 
## REML criterion at convergence: 10016.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7631 -0.4353 -0.0574  0.2262  6.3214 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  rainfall (Intercept)  2.753   1.659   
##  Residual             16.464   4.058   
## Number of obs: 1759, groups:  rainfall, 99
## 
## Fixed effects:
##                                       Estimate Std. Error t value
## (Intercept)                             4.0740     1.2486   3.263
## countryMozambique                      -1.1801     1.0247  -1.152
## siteGorongosa                          -0.6244     0.8086  -0.772
## siteKabango                            -0.7727     0.4207  -1.837
## siteKasungu                            -2.6325     0.7475  -3.522
## siteMchinji                            -4.2993     0.8031  -5.353
## siteNtcheu                             -3.1843     0.5666  -5.620
## siteSussundenga                        -0.5056     0.9748  -0.519
## drainage_classModerately well drained   1.8255     1.1406   1.601
## drainage_classPoorly drained            2.5898     1.2391   2.090
## drainage_classSomewhat poorly drained   2.3046     1.2403   1.858
## drainage_classVery poorly drained       5.3522     1.3836   3.868
## drainage_classWell drained              1.7884     1.1234   1.592
## cropping_system.L                      -7.7467     0.2336 -33.167
## cropping_system.Q                       4.0088     0.2088  19.197
## cropping_system.C                      -2.4623     0.1770 -13.910
## 
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient

GRAPHICAL PRESENTATION

####
stat_box_data <- function(soil_loss, upper_limit = max(soil_loss)*1.5) {
  return( 
    data.frame(
      y =40,
      label = paste('N =', length(soil_loss), '\n',
                    'Mean =', round(mean(soil_loss), 0), '\n')
    )
  )
}
##################

stat_data<- function(soil_loss, upper_limit = max(soil_loss)*1.5) {
  return( 
    data.frame(
      y = 3,
      label = paste('N =', length(soil_loss), '\n',
                    'Mean =', round(mean(soil_loss), 3), '\n')
    )
  )
}
##########################################################################
#####sites

theme_set(theme_gray(base_size = 10))
ggplot(slemsa_revised, aes(x =sit, y = soil_loss))+ geom_boxplot(size=0.7,varwidth = FALSE,outlier.colour = "red",outlier.shape = 2, shape=2,fill=c("grey","grey","grey","grey","grey","grey","grey","grey")) + geom_smooth(method=lm)+ ylab("Soil Loss [t/ha]") + xlab("Sites")+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")+stat_summary(
    fun.data = stat_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=2.5,
    color="blue"
  )+ theme_bw(base_size = 12)+ theme(axis.text.x = element_text(face="bold", color="black", 
                           size=8),
          axis.text.y = element_text(face="bold", color="black", 
                           size=10))+
geom_text(data =slemsa_revised, x = 1, y = 2.3, label = "e", color="blue")+
geom_text(data =slemsa_revised, x = 2, y = 2.3, label = "d", color="blue")+
geom_text(data =slemsa_revised, x = 3, y = 2.3, label = "d", color="blue")+
geom_text(data =slemsa_revised, x = 4, y = 2.3, label = "c", color="blue")+
geom_text(data =slemsa_revised, x = 5, y =2.3, label = "c", color="blue")+
geom_text(data =slemsa_revised, x = 6, y = 2.3, label = "b", color="blue")+
geom_text(data =slemsa_revised, x = 7, y = 2.3, label = "a", color="blue")+
geom_text(data =slemsa_revised, x = 8, y = 2.3, label = "a", color="blue") + scale_y_log10()+ylim(0,3)
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 515 rows containing non-finite values (stat_boxplot).
## Warning: Removed 515 rows containing non-finite values (stat_smooth).
## Warning: Removed 515 rows containing non-finite values (stat_summary).

## Warning: Removed 515 rows containing non-finite values (stat_summary).

theme_set(theme_gray(base_size = 10))
ggplot(slemsa_revised, aes(x =cropping_system, y = soil_loss))+ geom_boxplot(size=0.7,outlier.colour = "red",outlier.shape = 2, shape=6,fill=c("grey","grey","grey","grey")) + geom_smooth(method=lm)+ ylab("Soil Loss [t/ha]") + xlab("Cropping Systems")+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")+stat_summary(
    fun.data = stat_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=3,
    color="blue"
  )+ theme_bw(base_size = 12)+ theme(axis.text.x = element_text(face="bold", color="black", 
                           size=8),
          axis.text.y = element_text(face="bold", color="black", 
                           size=10))+
geom_text(data =slemsa_revised, x = 1, y = 1.8, label = "a")+
geom_text(data =slemsa_revised, x = 2, y = 1, label = "b")+
geom_text(data =slemsa_revised, x = 3, y =1, label = "b")+
geom_text(data =slemsa_revised, x = 4, y = 0.7, label = "c")+scale_y_log10()+ylim(0,3)
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 515 rows containing non-finite values (stat_boxplot).
## Warning: Removed 515 rows containing non-finite values (stat_smooth).
## Warning: Removed 515 rows containing non-finite values (stat_summary).

## Warning: Removed 515 rows containing non-finite values (stat_summary).

#########################################################



#########################################################
theme_set(theme_gray(base_size = 10))
ggplot(slemsa_revised, aes(x =cropping_system, y = soil_loss))+ geom_boxplot(size=0.7,outlier.colour = "red",outlier.shape = 2, shape=6,fill=c("grey","grey","grey","grey","grey","grey","grey","grey")) + geom_smooth(method=lm)+ ylab("Soil Loss [t/ha]") + xlab("Cropping Systems")+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")+stat_summary(
    fun.data = stat_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=2.9,
    color="blue"
  )+ theme_bw(base_size = 12)+ theme(axis.text.x = element_text(face="bold", color="black", 
                           size=8),
          axis.text.y = element_text(face="bold", color="black", 
                           size=10))+facet_wrap(.~agroecoregion)+
geom_text(data =slemsa_revised, x = 1, y = 1.9, label = "a",color="blue")+
geom_text(data =slemsa_revised, x = 2, y = 1.4, label = "b",color="blue")+
geom_text(data =slemsa_revised, x = 3, y = 1.3, label = "c",color="blue")+
geom_text(data =slemsa_revised, x = 4, y = 0.7, label = "d",color="blue")+scale_y_log10()+ylim(0,3)
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 515 rows containing non-finite values (stat_boxplot).
## Warning: Removed 515 rows containing non-finite values (stat_smooth).
## Warning: Removed 515 rows containing non-finite values (stat_summary).

## Warning: Removed 515 rows containing non-finite values (stat_summary).

##################by country

theme_set(theme_gray(base_size = 10))
ggplot(slemsa_revised, aes(x =cropping_system, y = soil_loss))+ geom_boxplot(size=0.7,outlier.colour = "red",outlier.shape = 2, shape=6,fill=c("grey","grey","grey","grey","grey","grey","grey","grey")) + geom_smooth(method=lm)+ ylab("Soil Loss [t/ha]") + xlab("Cropping Systems")+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")+stat_summary(
    fun.data = stat_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=2.7,
    color="blue"
  )+ theme_bw(base_size = 12)+ theme(axis.text.x = element_text(face="bold", color="black", 
                           size=8),
          axis.text.y = element_text(face="bold", color="black", 
                           size=10))+facet_wrap(.~country)+
geom_text(data =slemsa_revised, x = 1, y = 1.9, label = "a",color="blue")+
geom_text(data =slemsa_revised, x = 2, y = 1.5, label = "c",color="blue")+
geom_text(data =slemsa_revised, x = 3, y = 1.5, label = "b",color="blue")+
geom_text(data =slemsa_revised, x = 4, y = 0.7, label = "b",color="blue")+scale_y_log10()+ylim(0,3)
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 515 rows containing non-finite values (stat_boxplot).
## Warning: Removed 515 rows containing non-finite values (stat_smooth).
## Warning: Removed 515 rows containing non-finite values (stat_summary).

## Warning: Removed 515 rows containing non-finite values (stat_summary).

##########################################################
theme_set(theme_gray(base_size = 10))
ggplot(slemsa_revised, aes(x =systems, y = soil_loss))+ geom_boxplot(size=0.7,outlier.colour = "red",outlier.shape = 2, shape=6,fill=c("grey","grey","grey","grey")) + geom_smooth(method=lm)+ ylab("Soil Loss [t/ha]") + xlab("Cropping Systems")+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")+stat_summary(
    fun.data = stat_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=2.9,
    color="blue"
  )+ theme_classic(base_size = 12)+ theme(axis.text.x = element_text(face="bold", color="black", 
                           size=8),
          axis.text.y = element_text(face="bold", color="black", 
                           size=10))+facet_wrap(.~agroecoregion)+
geom_text(data =slemsa_revised, x = 1, y = 1.8, label = "a",color="blue")+
geom_text(data =slemsa_revised, x = 2, y = 0.7, label = "b",color="blue")+scale_y_log10()+ylim(0,3)
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 515 rows containing non-finite values (stat_boxplot).
## Warning: Removed 515 rows containing non-finite values (stat_smooth).
## Warning: Removed 515 rows containing non-finite values (stat_summary).

## Warning: Removed 515 rows containing non-finite values (stat_summary).

#######################################################
theme_set(theme_gray(base_size = 10))
ggplot(slemsa_revised, aes(x =systems, y = soil_loss))+ geom_boxplot(size=0.7,outlier.colour = "red",outlier.shape = 2, shape=6,fill=c("grey","grey","grey","grey")) + geom_smooth(method=lm)+ ylab("Soil Loss [t/ha]") + xlab("Cropping Systems")+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")+stat_summary(
    fun.data = stat_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=3,
    color="blue"
  )+ theme_classic(base_size = 12)+ theme(axis.text.x = element_text(face="bold", color="black", 
                           size=8),
          axis.text.y = element_text(face="bold", color="black", 
                           size=10))+facet_wrap(.~country)+
geom_text(data =slemsa_revised, x = 1, y = 1.8, label = "a",color="blue")+
geom_text(data =slemsa_revised, x = 2, y = 0.5, label = "b",color="blue")+scale_y_log10()+ylim(0,3)
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 515 rows containing non-finite values (stat_boxplot).
## Warning: Removed 515 rows containing non-finite values (stat_smooth).
## Warning: Removed 515 rows containing non-finite values (stat_summary).

## Warning: Removed 515 rows containing non-finite values (stat_summary).

###################################################

theme_set(theme_gray(base_size = 12))
m <- ggplot(data=slemsa_revised,aes(x=cropping_system,y=soil_loss,color=country))
m + geom_boxplot(outlier.size=0,shape=8)+xlab("Cropping Systems")+ylab("Soil Loss [t/ha]")+theme_classic()+theme(legend.position = c(0.80, 0.60))+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")+
geom_text(data =slemsa_revised, x = 1, y = 2, label = "b   a",color="black")+
geom_text(data =slemsa_revised, x = 2, y =2, label = "b   a",color="black")+
geom_text(data =slemsa_revised, x = 3, y = 2, label = "b   a",color="black")+
geom_text(data =slemsa_revised, x = 4, y = 2, label = "a   a",color="black")+
geom_text(data =slemsa_revised, x = 1, y = 0.5, label = "a",color="black")+
geom_text(data =slemsa_revised, x = 2, y = 0.5, label = "b",color="black")+
geom_text(data =slemsa_revised, x = 3, y = 0.5, label = "b",color="black")+
geom_text(data =slemsa_revised, x = 4, y = 0.5, label = "c",color="black")+scale_y_log10()+ylim(0,3)
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 515 rows containing non-finite values (stat_boxplot).

## Warning: Removed 515 rows containing non-finite values (stat_summary).

p <- ggplot(data=slemsa_revised, aes(x=systems,y=soil_loss,color=drainage))
p + geom_boxplot(size=0.8, shape=8)+xlab("Cropping system")+ylab("Soil Loss [t/ha]")+theme_classic()+
geom_text(data =slemsa_revised, x = 1, y = 2, label = "e     d     f    c  b    a",color="black")+
geom_text(data =slemsa_revised, x = 2, y = 2, label = "b     b     c     a   a     a",color="black")+scale_y_log10()+ylim(0,3)
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 515 rows containing non-finite values (stat_boxplot).

#############################################################
theme_set(theme_gray(base_size = 10))
ggplot(slemsa_revised, aes(x =systems, y = soil_loss))+ geom_boxplot(size=0.7,varwidth =TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Soil Loss [t/ha]") + xlab("Cropping Systems")+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")+stat_summary(
    fun.data = stat_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=2.5
  )+ theme_classic(base_size = 12)+ theme(axis.text.x = element_text(face="bold", color="black", 
                           size=8),
          axis.text.y = element_text(face="bold", color="black", 
                           size=10))+facet_wrap(.~drainage_class)+scale_y_log10()+ylim(0,3)
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 515 rows containing non-finite values (stat_boxplot).
## Warning: Removed 515 rows containing non-finite values (stat_smooth).
## Warning: Removed 515 rows containing non-finite values (stat_summary).

## Warning: Removed 515 rows containing non-finite values (stat_summary).

############################# all cropping systems
theme_set(theme_gray(base_size = 10))
ggplot(slemsa_revised, aes(x =cropping_system, y = soil_loss))+ geom_boxplot(size=0.7,varwidth =TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Soil Loss [t/ha]") + xlab("Cropping Systems")+stat_summary(fun.y=mean, geom="point", shape=10, size=2, color="blue", fill="red")+stat_summary(
    fun.data = stat_data, 
    geom = "text", 
    hjust = 0.5,
    vjust = 0.9,
    size=2.5
  )+ theme_classic(base_size = 12)+ theme(axis.text.x = element_text(face="bold", color="black", 
                           size=6),
          axis.text.y = element_text(face="bold", color="black", 
                           size=10))+facet_wrap(.~drainage_class)+scale_y_log10()+ylim(0,3)
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 515 rows containing non-finite values (stat_boxplot).
## Warning: Removed 515 rows containing non-finite values (stat_smooth).
## Warning: Removed 515 rows containing non-finite values (stat_summary).

## Warning: Removed 515 rows containing non-finite values (stat_summary).

###################################################################

theme_set(theme_gray(base_size = 7))
p1 <- ggplot(slemsa_revised, aes(drainage, soil_loss, fill = cropping_system)) +
        geom_bar(stat = "identity", position = "dodge") +xlab("Drainage Class")+ylab("Soil Loss [t/ha]")
p1+theme_classic()+ theme_classic(base_size = 12)+ theme(axis.text.x = element_text(face="bold", color="black", 
                           size=8),
          axis.text.y = element_text(face="bold", color="black", 
                           size=10))+ theme(legend.position = c(0.75, 0.80))+scale_y_log10()+ylim(0,3)
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 515 rows containing missing values (geom_bar).

#####################################
stat_box_dat <- function(soil_loss, upper_limit = max(soil_loss)*1.5) {
  return( 
    data.frame(
      y = 10,
      label = paste('N =', length(soil_loss), '\n',
                    'Mean =', round(mean(soil_loss), 0), '\n')
    )
  )
}


###########################

ggplot(slemsa_revised, aes(x = rainfall, y = soil_loss)) +  
  geom_point(size=3) + 
  geom_smooth(method=lm, position = "jitter", aes(fill=agroecoregion), level = 0.95)+ylab("Soil Loss [t/ha]") + 
  xlab("Rainfall [mm]")+theme_classic()+facet_wrap(.~agroecoregion)+ theme(axis.text.x = element_text(face="bold", color="black", 
                           size=8),
          axis.text.y = element_text(face="bold", color="black", 
                           size=10))+ theme( axis.line = element_line(colour = "black", 
                      size = 0.8, linetype = "solid"))+ theme(legend.position="none")+ 
  ylim(0,40) +
  geom_abline(xintercept =400, linetype=2, color = "black", size=1)
## Warning: Ignoring unknown parameters: xintercept
## Warning: Removed 33 rows containing missing values (geom_smooth).

###  + stat_cor(method = "pearson", label.x = 800, label.y = 40)

###########################

theme_set(theme_gray(base_size = 14))
p <- ggplot(data=slemsa_revised,aes(y=soil_loss,x=rainfall, color=agroecoregion))
p + geom_smooth()+xlab("Rainfall [mm]")+ylab("Soil Loss [t/ha]")+theme_classic()+
  theme(legend.position = c(0.82, 0.84))+ theme(axis.text.x = element_text(face="bold", color="black",  size=8),axis.text.y = element_text(face="bold", color="black", size=10))+ theme( axis.line = element_line(colour = "black",size = 0.8, linetype = "solid"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

###########################
theme_set(theme_gray(base_size = 14))
p <- ggplot(data=slemsa_revised,aes(y=soil_loss,x=rainfall, color=agroecoregion))
p + geom_smooth()+xlab("Rainfall [mm]")+ylab("Soil Loss [t/ha]")+theme_classic()+ theme(axis.text.x = element_text(face="bold", color="black",  size=8),axis.text.y = element_text(face="bold", color="black", size=10))+ theme( axis.line = element_line(colour = "black",size = 0.8, linetype = "solid"))+facet_wrap(country~agroecoregion)+ theme(legend.position="none")#+xlim(0,5)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(slemsa_revised, aes(x = crop_canopy, y = soil_loss)) +  
  geom_point(size=3) + 
  geom_smooth(method=lm, position = "jitter", level = 0.95)+ylab("Soil Loss [t/ha]") + 
  xlab("Crop Canopy")+theme_classic()+ stat_cor(method = "pearson", label.x = 0.01, label.y = 40)+facet_wrap(.~agroecoregion)+ theme(axis.text.x = element_text(face="bold", color="black", 
                           size=8),
          axis.text.y = element_text(face="bold", color="black", 
                           size=10))+ theme( axis.line = element_line(colour = "black", 
                      size = 0.8, linetype = "solid"))+ theme(legend.position="none")+ 
  ylim(0,40)

ggplot(slemsa_revised, aes(x = residue_cover, y = soil_loss)) +  
  geom_point(size=3) + 
  geom_smooth(method=lm, position = "jitter", aes(fill=agroecoregion), level = 0.95)+ylab("Soil Loss [t/ha]") + 
  xlab("Residue Cover")+theme_classic()+ stat_cor(method = "pearson", label.x = 25, label.y = 40)+facet_wrap(.~agroecoregion)+ theme(axis.text.x = element_text(face="bold", color="black", 
                           size=8),
          axis.text.y = element_text(face="bold", color="black", 
                           size=10))+ theme( axis.line = element_line(colour = "black", 
                      size = 0.8, linetype = "solid"))+ theme(legend.position="none")+ 
  ylim(0,40) 
## Warning: Removed 252 rows containing non-finite values (stat_smooth).
## Warning: Removed 252 rows containing non-finite values (stat_cor).
## Warning: Removed 252 rows containing missing values (geom_point).
## Warning: Removed 42 rows containing missing values (geom_smooth).

#soil loss and rainfall intercepted 
theme_set(theme_gray(base_size = 14))
p <- ggplot(data=slemsa_revised,aes(y=soil_loss,x=intercepted, color=agroecoregion))
p + geom_smooth()+xlab("Rainfall Intercepted [i(%)]")+ylab("Soil Loss [t/ha]")+theme_classic()+
  theme(legend.position = c(0.82, 0.84))+ theme(axis.text.x = element_text(face="bold", color="black",  size=8),axis.text.y = element_text(face="bold", color="black", size=10))+ theme( axis.line = element_line(colour = "black",size = 0.8, linetype = "solid"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

##############################################################
############## by agro
theme_set(theme_gray(base_size = 14))
p <- ggplot(data=slemsa_revised,aes(y=soil_loss,x=intercepted))
p + geom_smooth()+xlab("Rainfall Intercepted [i(%)]")+ylab("Soil Loss [t/ha]")+theme_classic()+
  theme(legend.position = c(0.82, 0.84))+ theme(axis.text.x = element_text(face="bold", color="black",  size=8),axis.text.y = element_text(face="bold", color="black", size=10))+ theme( axis.line = element_line(colour = "black",size = 0.8, linetype = "solid"))+facet_wrap(.~agroecoregion)+ stat_cor(method = "pearson", label.x = 25, label.y = 20)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

################## by country
theme_set(theme_gray(base_size = 14))
p <- ggplot(data=slemsa_revised,aes(y=soil_loss,x=intercepted))
p + geom_smooth()+xlab("Rainfall Intercepted [i(%)]")+ylab("Soil Loss [t/ha]")+theme_classic()+
  theme(legend.position = c(0.82, 0.84))+ theme(axis.text.x = element_text(face="bold", color="black",  size=8),axis.text.y = element_text(face="bold", color="black", size=10))+ theme( axis.line = element_line(colour = "black",size = 0.8, linetype = "solid"))+facet_wrap(.~country)+ stat_cor(method = "pearson", label.x = 25, label.y = 20)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

########################### cropping system
theme_set(theme_gray(base_size = 14))
p <- ggplot(data=slemsa_revised,aes(y=soil_loss,x=intercepted))
p + geom_smooth()+xlab("Rainfall Intercepted [i(%)]")+ylab("Soil Loss [t/ha]")+theme_classic()+
  theme(legend.position = c(0.82, 0.84))+ theme(axis.text.x = element_text(face="bold", color="black",  size=8),axis.text.y = element_text(face="bold", color="black", size=10))+ theme( axis.line = element_line(colour = "black",size = 0.8, linetype = "solid"))+facet_wrap(.~cropping_Systems)+ stat_cor(method = "pearson", label.x = 25, label.y = 20)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : at 100.25
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : radius 0.061919
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 100.25
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.24883
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0.045766
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.13777
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : zero-width neighborhood. make span bigger
## Warning: Computation failed in `stat_smooth()`:
## NA/NaN/Inf in foreign function call (arg 5)

################systems in general
theme_set(theme_gray(base_size = 14))
p <- ggplot(data=slemsa_revised,aes(y=soil_loss,x=intercepted))
p + geom_smooth()+xlab("Rainfall Intercepted [i(%)]")+ylab("Soil Loss [t/ha]")+theme_classic()+
  theme(legend.position = c(0.82, 0.84))+ theme(axis.text.x = element_text(face="bold", color="black",  size=8),axis.text.y = element_text(face="bold", color="black", size=10))+ theme( axis.line = element_line(colour = "black",size = 0.8, linetype = "solid"))+facet_wrap(.~system)+ stat_cor(method = "pearson", label.x = 25, label.y = 20)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

theme_set(theme_gray(base_size = 14))
p <- ggplot(data=slemsa_revised,aes(y=intercepted,x=crop_canopy))
p + geom_smooth()+xlab("Crop Canopy")+ylab("Rainfall Intercepted [i(%)]")+theme_classic()+ theme(axis.text.x = element_text(face="bold", color="black",  size=8),axis.text.y = element_text(face="bold", color="black", size=10))+ theme( axis.line = element_line(colour = "black",size = 0.8, linetype = "solid"))+ stat_cor(method = "pearson", label.x = 0.05, label.y = 90)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Regression Tree Analysis

library(rpart)
### grow a tree

fit <- rpart(soil_loss ~cropping_system +country+drainage_class,
   method="anova", data=slemsa_revised)
printcp(fit)
## 
## Regression tree:
## rpart(formula = soil_loss ~ cropping_system + country + drainage_class, 
##     data = slemsa_revised, method = "anova")
## 
## Variables actually used in tree construction:
## [1] country         cropping_system drainage_class 
## 
## Root node error: 61523/1759 = 34.976
## 
## n= 1759 
## 
##         CP nsplit rel error  xerror     xstd
## 1 0.413299      0   1.00000 1.00076 0.084252
## 2 0.011872      1   0.58670 0.58818 0.047894
## 3 0.010483      2   0.57483 0.58792 0.049192
## 4 0.010000      3   0.56434 0.58572 0.049195
plotcp(fit) # visualize cross-validation results 

summary(fit) # detailed summary of splits
## Call:
## rpart(formula = soil_loss ~ cropping_system + country + drainage_class, 
##     data = slemsa_revised, method = "anova")
##   n= 1759 
## 
##           CP nsplit rel error    xerror       xstd
## 1 0.41329941      0 1.0000000 1.0007568 0.08425191
## 2 0.01187244      1 0.5867006 0.5881837 0.04789398
## 3 0.01048341      2 0.5748282 0.5879175 0.04919219
## 4 0.01000000      3 0.5643447 0.5857226 0.04919454
## 
## Variable importance
## cropping_system  drainage_class         country 
##              92               4               3 
## 
## Node number 1: 1759 observations,    complexity param=0.4132994
##   mean=3.476689, MSE=34.97638 
##   left son=2 (1454 obs) right son=3 (305 obs)
##   Primary splits:
##       cropping_system splits as  RLLL,   improve=0.413299400, (0 missing)
##       drainage_class  splits as  LLRRRL, improve=0.011315520, (0 missing)
##       country         splits as  LR,     improve=0.008912322, (0 missing)
## 
## Node number 2: 1454 observations,    complexity param=0.01187244
##   mean=1.735333, MSE=7.99099 
##   left son=4 (614 obs) right son=5 (840 obs)
##   Primary splits:
##       country         splits as  LR,     improve=0.06286597, (0 missing)
##       drainage_class  splits as  LRRRRL, improve=0.03367622, (0 missing)
##       cropping_system splits as  RRRL,   improve=0.03358436, (0 missing)
##   Surrogate splits:
##       drainage_class splits as  LRRRRL, agree=0.919, adj=0.808, (0 split)
## 
## Node number 3: 305 observations,    complexity param=0.01048341
##   mean=11.7781, MSE=80.25229 
##   left son=6 (207 obs) right son=7 (98 obs)
##   Primary splits:
##       drainage_class splits as  LLRRRL, improve=0.026350340, (0 missing)
##       country        splits as  LR,     improve=0.001089385, (0 missing)
##   Surrogate splits:
##       country splits as  LR, agree=0.77, adj=0.286, (0 split)
## 
## Node number 4: 614 observations
##   mean=0.9063156, MSE=4.954162 
## 
## Node number 5: 840 observations
##   mean=2.341305, MSE=9.341203 
## 
## Node number 6: 207 observations
##   mean=10.77753, MSE=82.98951 
## 
## Node number 7: 98 observations
##   mean=13.89156, MSE=67.88921
# plot tree 
plot(fit, uniform=TRUE, 
   main="Classification Tree for Soil Loss")
text(fit, use.n=TRUE, all=TRUE, cex=.8)

# create additional plots 
par(mfrow=c(1,2)) # two plots on one page 
rsq.rpart(fit) # visualize cross-validation results   
## 
## Regression tree:
## rpart(formula = soil_loss ~ cropping_system + country + drainage_class, 
##     data = slemsa_revised, method = "anova")
## 
## Variables actually used in tree construction:
## [1] country         cropping_system drainage_class 
## 
## Root node error: 61523/1759 = 34.976
## 
## n= 1759 
## 
##         CP nsplit rel error  xerror     xstd
## 1 0.413299      0   1.00000 1.00076 0.084252
## 2 0.011872      1   0.58670 0.58818 0.047894
## 3 0.010483      2   0.57483 0.58792 0.049192
## 4 0.010000      3   0.56434 0.58572 0.049195

# plot tree 
plot(fit, uniform=TRUE, 
   main="Regression Tree for Soil Loss ")
text(fit, use.n=TRUE, all=TRUE, cex=.8)
##################################################################

# Conditional Inference Tree for Mileage
library(party)
## Warning: package 'party' was built under R version 3.6.1
## Loading required package: grid
## Loading required package: modeltools
## Loading required package: stats4
## 
## Attaching package: 'modeltools'
## The following object is masked from 'package:plyr':
## 
##     empty
## The following object is masked from 'package:lme4':
## 
##     refit
## The following object is masked from 'package:BayesFactor':
## 
##     posterior
## Loading required package: strucchange
## Warning: package 'strucchange' was built under R version 3.6.1
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
fit2 <- ctree(soil_loss ~cropping_system)
plot(fit2,cex=0.5)