library(readr)
slemsa_consolidated <- read_csv("~/soil loss/slemsa_consolidated.csv")
## Parsed with column specification:
## cols(
##   Country = col_character(),
##   Site = col_character(),
##   Agroeco = col_character(),
##   Farmer_Name = col_character(),
##   Drainage_class = col_character(),
##   Season = col_character(),
##   Treatment = col_double(),
##   Treatment_description = col_character(),
##   System = col_character(),
##   Cropping_Systems = col_character(),
##   Intercepted = col_double(),
##   Crop_Canopy = col_double(),
##   Seasonal_rainfall_energy = col_double(),
##   Erodability_Factor = col_double(),
##   Topographic_ratio = col_double(),
##   Soil_loss = col_double(),
##   Residue_cover = col_double(),
##   rainfall = col_double(),
##   slope = col_double()
## )
View(slemsa_consolidated)
attach(slemsa_consolidated)
names(slemsa_consolidated)
##  [1] "Country"                  "Site"                    
##  [3] "Agroeco"                  "Farmer_Name"             
##  [5] "Drainage_class"           "Season"                  
##  [7] "Treatment"                "Treatment_description"   
##  [9] "System"                   "Cropping_Systems"        
## [11] "Intercepted"              "Crop_Canopy"             
## [13] "Seasonal_rainfall_energy" "Erodability_Factor"      
## [15] "Topographic_ratio"        "Soil_loss"               
## [17] "Residue_cover"            "rainfall"                
## [19] "slope"
Country=as.factor(Country)
Site=as.factor(Site)
Farmer_Name=as.factor(Farmer_Name)
Agroeco=as.factor(Agroeco)
Season=as.factor(Season)
Drainage_class=as.factor(Drainage_class)
Treatment=as.factor(Treatment)
Treatment_description=as.factor(Treatment_description)
System=as.factor(System)
Cropping_Systems=as.factor(Cropping_Systems)
library(ggplot2)
library(maps)
library(ggalt)
library(extrafontdb)
library(MASS)
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(gridExtra)
summary(Soil_loss)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.355   1.034   3.837   3.773  59.388
describeBy(Soil_loss,Country)
## 
##  Descriptive statistics by group 
## group: Malawi
##    vars   n mean   sd median trimmed  mad min   max range skew kurtosis
## X1    1 751 3.32 7.94   0.45    1.17 0.52   0 59.39 59.39 3.68    14.87
##      se
## X1 0.29
## -------------------------------------------------------- 
## group: Mozambique
##    vars    n mean  sd median trimmed  mad min   max range skew kurtosis
## X1    1 1008 4.23 6.5   1.68    2.77 1.95   0 49.38 49.38  3.3    13.97
##     se
## X1 0.2
describeBy(Soil_loss,Agroeco)
## 
##  Descriptive statistics by group 
## group: Highlands
##    vars   n mean   sd median trimmed  mad  min   max range skew kurtosis
## X1    1 859 3.52 6.56   0.87    1.94 1.07 0.02 49.38 49.36 3.48    15.03
##      se
## X1 0.22
## -------------------------------------------------------- 
## group: Lowlands
##    vars   n mean   sd median trimmed  mad min   max range skew kurtosis
## X1    1 900 4.14 7.68   1.17    2.24 1.38   0 59.39 59.39 3.47    13.94
##      se
## X1 0.26
describeBy(Soil_loss,Drainage_class)
## 
##  Descriptive statistics by group 
## group: Moderately well drained
##    vars   n mean   sd median trimmed  mad  min   max range skew kurtosis
## X1    1 426 3.16 4.88    1.3    2.07 1.58 0.05 31.49 31.45 3.04     11.1
##      se
## X1 0.24
## -------------------------------------------------------- 
## group: Poorly drained
##    vars   n mean   sd median trimmed  mad  min   max range skew kurtosis
## X1    1 210 4.55 7.28   1.59     2.8 1.84 0.18 47.56 47.39 3.07    11.31
##     se
## X1 0.5
## -------------------------------------------------------- 
## group: Somewhat poorly drained
##    vars   n mean   sd median trimmed  mad  min   max range skew kurtosis
## X1    1 336  4.7 7.49   1.67    3.02 1.93 0.12 49.38 49.26 3.21    12.57
##      se
## X1 0.41
## -------------------------------------------------------- 
## group: Very poorly drained
##    vars  n mean   sd median trimmed  mad  min   max range skew kurtosis
## X1    1 42 7.17 9.61   1.76    5.03 1.85 0.45 33.95  33.5 1.64     1.61
##      se
## X1 1.48
## -------------------------------------------------------- 
## group: Well drained
##    vars   n mean   sd median trimmed mad min   max range skew kurtosis
## X1    1 745 3.45 7.79   0.58    1.42 0.7   0 59.39 59.39 3.74    15.69
##      se
## X1 0.29

Regression analysis

fit1 <- aov(Soil_loss~Country,data=slemsa_consolidated)
summary(fit1)
##               Df Sum Sq Mean Sq F value  Pr(>F)   
## Country        1    356   356.1   6.971 0.00836 **
## Residuals   1757  89740    51.1                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(fit1)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Soil_loss ~ Country, data = slemsa_consolidated)
## 
## $Country
##                        diff       lwr      upr     p adj
## Mozambique-Malawi 0.9095983 0.2339243 1.585272 0.0083556
fit2 <- aov(Soil_loss~Agroeco,data=slemsa_consolidated)
summary(fit2)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## Agroeco        1    167  167.02   3.263  0.071 .
## Residuals   1757  89929   51.18                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(fit2)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Soil_loss ~ Agroeco, data = slemsa_consolidated)
## 
## $Agroeco
##                         diff         lwr      upr     p adj
## Lowlands-Highlands 0.6164488 -0.05286011 1.285758 0.0710244
fit3 <- aov(Soil_loss~Drainage_class,data=slemsa_consolidated)
summary(fit3)
##                  Df Sum Sq Mean Sq F value   Pr(>F)    
## Drainage_class    4   1132  283.01    5.58 0.000184 ***
## Residuals      1754  88964   50.72                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(fit3)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Soil_loss ~ Drainage_class, data = slemsa_consolidated)
## 
## $Drainage_class
##                                                       diff        lwr
## Poorly drained-Moderately well drained           1.3949716 -0.2447385
## Somewhat poorly drained-Moderately well drained  1.5396795  0.1207658
## Very poorly drained-Moderately well drained      4.0141007  0.8689140
## Well drained-Moderately well drained             0.2913602 -0.8899079
## Somewhat poorly drained-Poorly drained           0.1447080 -1.5659768
## Very poorly drained-Poorly drained               2.6191292 -0.6680164
## Well drained-Poorly drained                     -1.1036114 -2.6229930
## Very poorly drained-Somewhat poorly drained      2.4744212 -0.7083438
## Well drained-Somewhat poorly drained            -1.2483193 -2.5262816
## Well drained-Very poorly drained                -3.7227406 -6.8069051
##                                                         upr     p adj
## Poorly drained-Moderately well drained           3.03468166 0.1380117
## Somewhat poorly drained-Moderately well drained  2.95859331 0.0256461
## Very poorly drained-Moderately well drained      7.15928749 0.0045844
## Well drained-Moderately well drained             1.47262827 0.9621161
## Somewhat poorly drained-Poorly drained           1.85539273 0.9993702
## Very poorly drained-Poorly drained               5.90627470 0.1893859
## Well drained-Poorly drained                      0.41577019 0.2744870
## Very poorly drained-Somewhat poorly drained      5.65718619 0.2107009
## Well drained-Somewhat poorly drained             0.02964291 0.0593228
## Well drained-Very poorly drained                -0.63857601 0.0088529
fit4 <- aov(Soil_loss~Cropping_Systems,data=slemsa_consolidated)
summary(fit4)
##                    Df Sum Sq Mean Sq F value Pr(>F)    
## Cropping_Systems    3  37439   12480   415.9 <2e-16 ***
## Residuals        1755  52657      30                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(fit4)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Soil_loss ~ Cropping_Systems, data = slemsa_consolidated)
## 
## $Cropping_Systems
##                      diff        lwr        upr     p adj
## CA_ROT-CA_INT   1.2161822  0.2229019  2.2094625 0.0090447
## CA_SOLE-CA_INT  1.3519338  0.3389843  2.3648834 0.0034288
## CON-CA_INT     13.1374999 11.9921351 14.2828646 0.0000000
## CA_SOLE-CA_ROT  0.1357516 -0.6948929  0.9663962 0.9750293
## CON-CA_ROT     11.9213177 10.9335100 12.9091253 0.0000000
## CON-CA_SOLE    11.7855660 10.7779823 12.7931498 0.0000000
fit<- aov(Soil_loss~Country+Agroeco+Drainage_class+Cropping_Systems,data=slemsa_consolidated)
summary(fit)
##                    Df Sum Sq Mean Sq F value   Pr(>F)    
## Country             1    356     356  12.273 0.000471 ***
## Agroeco             1    181     181   6.225 0.012691 *  
## Drainage_class      4    872     218   7.511 5.37e-06 ***
## Cropping_Systems    3  37946   12649 435.986  < 2e-16 ***
## Residuals        1749  50742      29                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

CON and CA in general vs soil loss

theme_set(theme_gray(base_size = 12))
s<- ggplot(data=slemsa_consolidated, aes(x=System,y=Soil_loss,color=Country))
s + geom_boxplot(outlier.size=0,width=0.4) +
geom_jitter(position=position_jitter(h=.1))+xlab("Cropping Systems")+ylab("Soil Loss [t/ha]")+theme(legend.position = c(0.20, 0.80))

theme_set(theme_gray(base_size = 12))
s<- ggplot(data=slemsa_consolidated, aes(x=System,y=Soil_loss,color=Country))
s + geom_boxplot(outlier.size=0,width=0.4) +
geom_jitter(position=position_jitter(h=.1))+xlab("Cropping Systems")+ylab("Soil Loss [t/ha]")+facet_grid(Agroeco~.)+theme(legend.position = c(0.20, 0.80))

#p <- ggplot(data=slemsa_consolidated, aes(x=System,y=Soil_loss,color=Country))
#p + geom_boxplot(outlier.size=0,width=0.4,shape=6)+xlab("Cropping Systems")+ylab("Soil Loss [t/ha]")+facet_grid(Agroeco~.)
theme_set(theme_gray(base_size = 12))
m <- ggplot(data=slemsa_consolidated,aes(x=Cropping_Systems,y=Soil_loss,color=Country))
m + geom_boxplot(outlier.size=0,width=0.4,shape=8)+xlab("Cropping Systems")+ylab("Soil Loss [t/ha]")+theme(legend.position = c(0.20, 0.80))

theme_set(theme_gray(base_size = 12))
m <- ggplot(data=slemsa_consolidated,aes(x=Cropping_Systems,y=Soil_loss,color=Country))
m + geom_boxplot(outlier.size=0,width=0.4,shape=8)+facet_grid(Agroeco~.)+theme(legend.position = c(0.20, 0.80))

theme_set(theme_gray(base_size = 12)) ###This sets the font sizes of anything writt
m<-ggplot(slemsa_consolidated, aes(x = Cropping_Systems, y = Soil_loss, color = Country))+ geom_boxplot(size=1, shape=8) + geom_smooth(method=lm)+ ylab("Soil Loss [t/ha]") + xlab("Country")+geom_hline(yintercept = mean(Soil_loss))+theme(legend.position = c(0.20, 0.80))  
m

theme_set(theme_gray(base_size = 8)) ###This sets the font sizes of anything writt
m<-ggplot(slemsa_consolidated, aes(x = Cropping_Systems, y = Soil_loss, color = Country))+ geom_boxplot(size=0.5, shape=8) + geom_smooth(method=lm)+ ylab("Soil Loss [t/ha]") + xlab("Cropping System")+facet_grid(Treatment~.) +theme(legend.position = c(0.20, 0.80)) 
m

theme_set(theme_gray(base_size = 10)) ###This sets the font sizes of anything writt
m<-ggplot(slemsa_consolidated, aes(x =Cropping_Systems, y = Soil_loss, color = Cropping_Systems))+ geom_boxplot(size=0.5, shape=6) + geom_smooth(method=lm)+ ylab("Soil Loss [t/ha]") + xlab("Cropping System")+facet_grid(Country~.)+geom_hline(yintercept = mean(Soil_loss)) +theme(legend.position = c(0.20, 0.80)) 
m

theme_set(theme_gray(base_size = 10)) ###This sets the font sizes of anything writt
m<-ggplot(slemsa_consolidated, aes(x =Cropping_Systems, y = Soil_loss, color = Cropping_Systems))+ geom_violin(size=0.5, shape=6) + geom_boxplot()+ ylab("Soil Loss [t/ha]") + xlab("Cropping System")+facet_grid(Country~.)+theme(legend.position = c(0.20, 0.80))  
## Warning: Ignoring unknown parameters: shape
m

theme_set(theme_gray(base_size = 10)) ###This sets the font sizes of anything writt
m<-ggplot(slemsa_consolidated, aes(x =Cropping_Systems, y = Soil_loss, color = Cropping_Systems))+ geom_violin(size=0.5, shape=6) +ylab("Soil Loss [t/ha]") + xlab("Cropping System")+facet_grid(Country~.)+theme(legend.position = c(0.20, 0.80))  
## Warning: Ignoring unknown parameters: shape
m

#p <- ggplot(data=slemsa_consolidated, aes(x=season,y=Soil_loss, color=Country))
#p + geom_smooth()+geom_jitter()+xlab("Season")+ylab("Soil Loss [t/ha]")

#p <- ggplot(data=slemsa_consolidated, aes(x=Soil_loss,y=season, color=Country))
#p + geom_smooth()+geom_jitter()+xlab("Soil Loss [t/ha]")+ylab("Season")

futher analysis

ggplot(slemsa_consolidated, aes(x=Intercepted,y=Soil_loss, shape=Agroeco, color=Agroeco)) + geom_point(data =slemsa_consolidated, size=1.5, aes(color=Agroeco, shape=Agroeco), position = "jitter") + 
  geom_smooth(method="lm", level = 0.65)+ 
  ylab("Soil Loss [t/ha]") + xlab("Rainfall Intercepted [i(%)]")+
  theme(legend.position = c(0.90, 0.82))+facet_grid(Country~.)

ggplot(slemsa_consolidated, aes(x=Residue_cover,y=Soil_loss, shape=Agroeco, color=Agroeco)) + geom_point(data =slemsa_consolidated, size=1.5, aes(color=Agroeco, shape=Agroeco), position = "jitter") + 
  geom_smooth(method="lm", level = 0.65)+ 
  ylab("Soil Loss [t/ha]") + xlab("Residue cover (%)")+
  theme(legend.position = c(0.90, 0.82))+facet_grid(Country~.)
## Warning: Removed 252 rows containing non-finite values (stat_smooth).
## Warning: Removed 252 rows containing missing values (geom_point).

ggplot(slemsa_consolidated, aes(x= rainfall,y=Soil_loss, shape=Agroeco, color=Agroeco)) + geom_point(data =slemsa_consolidated, size=1.5, aes(color=Agroeco, shape=Agroeco), position = "jitter") + 
  geom_smooth(method="lm", level = 0.65)+ 
  ylab("Soil Loss [t/ha]") + xlab("Seasonal Rainfall(mm)")+
  theme(legend.position = c(0.90, 0.82))+facet_grid(Country~.)

ggplot(slemsa_consolidated, aes(x= slope,y=Soil_loss, shape=Agroeco, color=Agroeco)) + geom_point(data =slemsa_consolidated, size=1.5, aes(color=Agroeco, shape=Agroeco), position = "jitter") + 
  geom_smooth(method="lm", level = 0.65)+ 
  ylab("Soil Loss [t/ha]") + xlab("Mean Slope)")+
  theme(legend.position = c(0.90, 0.82))+facet_grid(Country~.)
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

theme_set(theme_gray(base_size = 14))
p <- ggplot(data=slemsa_consolidated, aes(y=Soil_loss,x=Intercepted, color=Agroeco))
p + geom_smooth()+xlab("Rainfall Intercepted [i(%)]")+ylab("Soil Loss [t/ha]")+
  theme(legend.position = c(0.82, 0.84))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p <- ggplot(data=slemsa_consolidated, aes(x=System,y=Soil_loss,color=Drainage_class))
p + geom_boxplot(size=0.8, shape=8,fill=c("red","grey","green","blue","yellow","red","grey","green","blue","yellow"))+xlab("Cropping system")+ylab("Soil Loss [t/ha]")+
  theme(legend.position = c(0.20, 0.80))

theme_set(theme_gray(base_size = 7))
p <- ggplot(data=slemsa_consolidated,aes(x=Drainage_class,y=Soil_loss,fill=System))
p + geom_bar(size=0.5, shape=8,width=.3,stat = "identity")+xlab("Drainage Class")+ylab("Soil Loss [t/ha]")+ theme(legend.position = c(0.20, 0.80))
## Warning: Ignoring unknown parameters: shape

theme_set(theme_gray(base_size = 9))
p <- ggplot(data=slemsa_consolidated,aes(x=Drainage_class,y=Soil_loss,fill=Cropping_Systems))
p + geom_bar(size=0.5, shape=8,width=.2,stat = "identity")+xlab("Drainage Class")+ylab("Soil Loss [t/ha]")+theme(legend.position = c(0.20, 0.80))
## Warning: Ignoring unknown parameters: shape

theme_set(theme_gray(base_size = 9))
p1 <- ggplot(slemsa_consolidated, aes(Drainage_class, Soil_loss, fill = Cropping_Systems)) +
        geom_bar(stat = "identity", position = "dodge") +xlab("Drainage Class")+ylab("Soil Loss [t/ha]")
p1+ theme(legend.position = c(0.20, 0.80))

p1 <- ggplot(slemsa_consolidated, aes(Drainage_class, Soil_loss)) +
        geom_bar(size=0.5,shape=8,stat = "identity", position ="dodge",color = "grey40") +xlab("Drainage Class")+ylab("Soil Loss [t/ha]")
## Warning: Ignoring unknown parameters: shape
p1

p <- ggplot(data=slemsa_consolidated,aes(x=Drainage_class,y=Soil_loss))
p + geom_bar(size=0.5, shape=8,width=.3,stat = "identity")+xlab("Drainage Class")+ylab("Soil Loss [t/ha]")
## Warning: Ignoring unknown parameters: shape

p <- ggplot(data=slemsa_consolidated,aes(x=Drainage_class,y=Soil_loss,fill=Treatment_description))
p + geom_bar(size=0.5, shape=8,width=.3,stat = "identity")+xlab("Drainage Class")+ylab("Soil Loss [t/ha]")
## Warning: Ignoring unknown parameters: shape