library(readr)
new_sim <- read_csv("~/Dr Nyagumbo/Final data set/new_sim.csv")
## Parsed with column specification:
## cols(
##   country = col_character(),
##   site = col_character(),
##   agroecoregion = col_character(),
##   farmer = col_character(),
##   system = col_character(),
##   legume_cultivar = col_character(),
##   cropping_system = col_character(),
##   season = col_character(),
##   season_since = col_double(),
##   season_class = col_character(),
##   rainfall = col_double(),
##   rainfall_class = col_character(),
##   textureclass = col_character(),
##   sand = col_double(),
##   silt = col_double(),
##   clay = col_double(),
##   drainage_class = col_logical(),
##   maize_grain = col_double()
## )
## Warning: 2444 parsing failures.
##  row            col           expected                  actual                                       file
## 2095 drainage_class 1/0/T/F/TRUE/FALSE Somewhat poorly drained '~/Dr Nyagumbo/Final data set/new_sim.csv'
## 2096 drainage_class 1/0/T/F/TRUE/FALSE Somewhat poorly drained '~/Dr Nyagumbo/Final data set/new_sim.csv'
## 2097 drainage_class 1/0/T/F/TRUE/FALSE Moderately well drained '~/Dr Nyagumbo/Final data set/new_sim.csv'
## 2098 drainage_class 1/0/T/F/TRUE/FALSE Somewhat poorly drained '~/Dr Nyagumbo/Final data set/new_sim.csv'
## 2099 drainage_class 1/0/T/F/TRUE/FALSE Somewhat poorly drained '~/Dr Nyagumbo/Final data set/new_sim.csv'
## .... .............. .................. ....................... ..........................................
## See problems(...) for more details.
View(new_sim)
attach(new_sim)
names(new_sim)
##  [1] "country"         "site"            "agroecoregion"  
##  [4] "farmer"          "system"          "legume_cultivar"
##  [7] "cropping_system" "season"          "season_since"   
## [10] "season_class"    "rainfall"        "rainfall_class" 
## [13] "textureclass"    "sand"            "silt"           
## [16] "clay"            "drainage_class"  "maize_grain"
country=as.factor(country)
site=as.factor(site)
agroecoregion=as.factor(agroecoregion)
system=as.factor(system)
systems=factor(system,levels = c("Conv","CA",order=TRUE))
legume_cultivar=as.factor(legume_cultivar)
cropping_systems=factor(cropping_system,levels = c("Conv","CA_sole","CA_intercrop","CA_rotation"),ordered = TRUE)
season_class=as.factor(season_class)
season_clas=factor(season_class,levels = c("(0-2)","(3-5)","(>5)"),ordered = TRUE)
rainfall_class=as.factor(rainfall_class)
rainfall_clas=factor(rainfall_class,levels = c("(<700)","(700-1300)","(>1300)"),ordered = TRUE)
textureclass=as.factor(textureclass)
#drainage_class=c(drainage_class)
#drainage_clas=factor(drainage_class,levels = c("Well drained",
#"Moderately well drained","Somewhat poorly drained","Poorly drained"),ordered=TRUE)
library(ggplot2)
library(maps)
library(ggalt)
library(extrafontdb)
library(MASS)
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(gridExtra)
library(repr) ### adjusting the length and width of your plot
library(beanplot)
library("devtools")
library("yarrr")
## Loading required package: jpeg
## Loading required package: BayesFactor
## Loading required package: coda
## Loading required package: Matrix
## ************
## Welcome to BayesFactor 0.9.12-4.2. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
## 
## Type BFManual() to open the manual.
## ************
## Loading required package: circlize
## ========================================
## circlize version 0.4.5
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: http://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization 
##   in R. Bioinformatics 2014.
## ========================================
## yarrr v0.1.5. Citation info at citation('yarrr'). Package guide at yarrr.guide()
## Email me at Nathaniel.D.Phillips.is@gmail.com
## 
## Attaching package: 'yarrr'
## The following object is masked from 'package:ggplot2':
## 
##     diamonds
library(agricolae)
library(easynls)
library(MVN)
## sROC 0.1-2 loaded
library(lme4)

Multiple regression analysis across countries

## full model without possible interactions 
## note that drainade classifications are applicable to Mza ans Mwi
## texture data was properly done in Mza and Mlwi
full<-lm(maize_grain~country+agroecoregion+cropping_systems+season_class+rainfall_class+textureclass,data = new_sim) ## if the texture class is included in the model , season class becomes insignificant.
anova(full)
## Analysis of Variance Table
## 
## Response: maize_grain
##                    Df     Sum Sq   Mean Sq  F value    Pr(>F)    
## country             4 1.9648e+09 491189869 164.2260 < 2.2e-16 ***
## agroecoregion       1 5.4536e+08 545357685 182.3366 < 2.2e-16 ***
## cropping_systems    3 3.7323e+07  12440999   4.1596 0.0059546 ** 
## season_class        2 2.4405e+07  12202476   4.0798 0.0169729 *  
## rainfall_class      2 2.2710e+08 113552233  37.9654 < 2.2e-16 ***
## textureclass        2 4.5506e+07  22753189   7.6074 0.0005032 ***
## Residuals        4523 1.3528e+10   2990939                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(full)
## 
## Call:
## lm(formula = maize_grain ~ country + agroecoregion + cropping_systems + 
##     season_class + rainfall_class + textureclass, data = new_sim)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3967.7 -1198.6  -275.7   926.4  8525.0 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               5010.01     307.89  16.272  < 2e-16 ***
## countryKenya             -1450.70     104.11 -13.934  < 2e-16 ***
## countryMalawi             -532.94     100.87  -5.283 1.33e-07 ***
## countryMozambique        -1497.04      95.41 -15.690  < 2e-16 ***
## countryTanzania          -2559.14     119.14 -21.480  < 2e-16 ***
## agroecoregionLowlands     -767.12      58.08 -13.209  < 2e-16 ***
## cropping_systems.L         161.56      59.86   2.699  0.00698 ** 
## cropping_systems.Q         -42.31      55.20  -0.766  0.44349    
## cropping_systems.C          27.59      59.26   0.466  0.64157    
## season_class(0-2)         -221.34     135.19  -1.637  0.10164    
## season_class(3-5)          -89.29     127.96  -0.698  0.48534    
## rainfall_class(>1300)       36.95     147.66   0.250  0.80239    
## rainfall_class(700-1300)   594.81      71.64   8.303  < 2e-16 ***
## textureclassLoam          -354.35     312.62  -1.133  0.25707    
## textureclassSand          -777.21     258.63  -3.005  0.00267 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1729 on 4523 degrees of freedom
## Multiple R-squared:  0.1737, Adjusted R-squared:  0.1712 
## F-statistic: 67.93 on 14 and 4523 DF,  p-value: < 2.2e-16
### computing the regression model and do the mean comparison across countries 

## model
fll<-aov(maize_grain~country+agroecoregion+cropping_systems+season_class+rainfall_class,data = new_sim)
############################# mean comparison of cropping systems 
x<- LSD.test(fll,"cropping_systems",alpha=0.05,group=FALSE)
diffograph(x,cex.axis=0.8,xlab="Maize Grain Yield [kg/ha]",ylab="Maize Grain Yield [kg/ha]")

###
h<-HSD.test(fll,"cropping_systems",group =TRUE)
h
## $statistics
##   MSerror   Df     Mean       CV
##   2999674 4525 2845.451 60.86756
## 
## $parameters
##    test           name.t ntr StudentizedRange alpha
##   Tukey cropping_systems   4          3.63451  0.05
## 
## $means
##              maize_grain      std    r Min   Max  Q25  Q50    Q75
## CA_intercrop    2839.938 1941.517 1357 107 11870 1354 2472 3838.0
## CA_rotation     3316.005 1996.374  633 199 10923 1770 2995 4498.0
## CA_sole         3111.879 1754.605 1029 148 10822 1836 2784 4069.0
## Conv            2473.804 1842.679 1519 107 11634 1078 2112 3388.5
## 
## $comparison
## NULL
## 
## $groups
##              maize_grain groups
## CA_rotation     3316.005      a
## CA_sole         3111.879      a
## CA_intercrop    2839.938      b
## Conv            2473.804      c
## 
## attr(,"class")
## [1] "group"
#####
bar.group(h$groups,ylim=c(0,3500),density=4,border="blue",ylab="Maize Grain Yield [kg/ha]")

############# 
############################# mean comparison of country 
x<- LSD.test(fll,"country",alpha=0.05,group=FALSE)
diffograph(x,cex.axis=0.8,xlab="Maize Grain Yield [kg/ha]",ylab="Maize Grain Yield [kg/ha]")

###
h<-HSD.test(fll,"country",group =TRUE)
h
## $statistics
##   MSerror   Df     Mean       CV
##   2999674 4525 2845.451 60.86756
## 
## $parameters
##    test  name.t ntr StudentizedRange alpha
##   Tukey country   5         3.859213  0.05
## 
## $means
##            maize_grain      std    r Min   Max     Q25  Q50    Q75
## Ethiopia      3573.339 1923.667  622 113 10015 2097.75 3340 4863.0
## Kenya         2574.323 2002.054 1027 107 11100 1047.50 2133 3733.0
## Malawi        3616.825 1996.786  905 148 11870 2163.00 3304 4657.0
## Mozambique    2710.578 1600.831 1539 132 10278 1571.00 2362 3535.5
## Tanzania      1351.472 1008.427  445 109  5667  582.00 1112 1787.0
## 
## $comparison
## NULL
## 
## $groups
##            maize_grain groups
## Malawi        3616.825      a
## Ethiopia      3573.339      a
## Mozambique    2710.578      b
## Kenya         2574.323      b
## Tanzania      1351.472      c
## 
## attr(,"class")
## [1] "group"
#####
bar.group(h$groups,ylim=c(0,5000),density=4,border="blue",ylab="Maize Grain Yield [kg/ha]")

##############

############################# mean comparison of rainfall class
x<- LSD.test(fll,"rainfall_class",alpha=0.05,group=FALSE)
diffograph(x,cex.axis=0.8,xlab="Maize Grain Yield [kg/ha]",ylab="Maize Grain Yield [kg/ha]")

###
h<-HSD.test(fll,"rainfall_class",group =TRUE)
h
## $statistics
##   MSerror   Df     Mean       CV
##   2999674 4525 2845.451 60.86756
## 
## $parameters
##    test         name.t ntr StudentizedRange alpha
##   Tukey rainfall_class   3         3.315587  0.05
## 
## $means
##            maize_grain      std    r Min   Max     Q25  Q50  Q75
## (<700)        2503.578 1728.335  957 107  9090 1040.00 2282 3597
## (>1300)       2843.494 1579.178  180 304  8187 1760.25 2477 3587
## (700-1300)    2941.754 1949.885 3401 109 11870 1482.00 2550 3959
## 
## $comparison
## NULL
## 
## $groups
##            maize_grain groups
## (700-1300)    2941.754      a
## (>1300)       2843.494      a
## (<700)        2503.578      b
## 
## attr(,"class")
## [1] "group"
#####
bar.group(h$groups,ylim=c(0,3500),density=4,border="blue",ylab="Maize Grain Yield [kg/ha]")
###Turkey test
#posthoc <- TukeyHSD(x=fll, 'cropping_systems', conf.level=0.95)
#posthoc  ## differences are not intersting

modell with possible interaction

##modell with possible interaction
full<-lm(maize_grain~country+agroecoregion+cropping_systems+season_class+rainfall_class+textureclass+cropping_systems*rainfall_class+cropping_systems*season_class+cropping_systems*agroecoregion+textureclass*rainfall_class+textureclass*agroecoregion+textureclass*cropping_systems,data = new_sim) ## if the texture class is included in the model , season class becomes insignificant.
anova(full)
## Analysis of Variance Table
## 
## Response: maize_grain
##                                   Df     Sum Sq   Mean Sq  F value
## country                            4 1.9648e+09 491189869 168.5775
## agroecoregion                      1 5.4536e+08 545357685 187.1680
## cropping_systems                   3 3.7323e+07  12440999   4.2698
## season_class                       2 2.4405e+07  12202476   4.1879
## rainfall_class                     2 2.2710e+08 113552233  38.9714
## textureclass                       2 4.5506e+07  22753189   7.8089
## cropping_systems:rainfall_class    6 1.3549e+08  22581312   7.7500
## cropping_systems:season_class      6 5.3859e+07   8976478   3.0807
## agroecoregion:cropping_systems     3 9.4781e+07  31593720  10.8430
## rainfall_class:textureclass        2 1.1242e+08  56208850  19.2910
## agroecoregion:textureclass         1 9.1261e+06   9126118   3.1321
## cropping_systems:textureclass      4 7.6315e+06   1907875   0.6548
## Residuals                       4501 1.3115e+10   2913734         
##                                    Pr(>F)    
## country                         < 2.2e-16 ***
## agroecoregion                   < 2.2e-16 ***
## cropping_systems                0.0051074 ** 
## season_class                    0.0152370 *  
## rainfall_class                  < 2.2e-16 ***
## textureclass                    0.0004116 ***
## cropping_systems:rainfall_class 2.602e-08 ***
## cropping_systems:season_class   0.0051906 ** 
## agroecoregion:cropping_systems  4.277e-07 ***
## rainfall_class:textureclass     4.547e-09 ***
## agroecoregion:textureclass      0.0768323 .  
## cropping_systems:textureclass   0.6234666    
## Residuals                                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(full)
## 
## Call:
## lm(formula = maize_grain ~ country + agroecoregion + cropping_systems + 
##     season_class + rainfall_class + textureclass + cropping_systems * 
##     rainfall_class + cropping_systems * season_class + cropping_systems * 
##     agroecoregion + textureclass * rainfall_class + textureclass * 
##     agroecoregion + textureclass * cropping_systems, data = new_sim)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4109.3 -1171.7  -245.3   875.1  8397.3 
## 
## Coefficients: (5 not defined because of singularities)
##                                             Estimate Std. Error t value
## (Intercept)                                  4572.79     571.36   8.003
## countryKenya                                -1397.65     112.04 -12.474
## countryMalawi                                -598.96     103.14  -5.808
## countryMozambique                           -1444.72      96.72 -14.937
## countryTanzania                             -2570.90     124.21 -20.697
## agroecoregionLowlands                        -146.34     580.18  -0.252
## cropping_systems.L                            162.95     716.85   0.227
## cropping_systems.Q                           -242.15     315.11  -0.768
## cropping_systems.C                            923.42     283.54   3.257
## season_class(0-2)                            -121.28     139.45  -0.870
## season_class(3-5)                              47.40     132.75   0.357
## rainfall_class(>1300)                        -103.87     156.69  -0.663
## rainfall_class(700-1300)                      565.66     583.13   0.970
## textureclassLoam                              385.92     686.39   0.562
## textureclassSand                             -223.87     546.83  -0.409
## cropping_systems.L:rainfall_class(>1300)     -302.25     314.13  -0.962
## cropping_systems.Q:rainfall_class(>1300)     -211.36     304.13  -0.695
## cropping_systems.C:rainfall_class(>1300)    -1066.62     301.86  -3.534
## cropping_systems.L:rainfall_class(700-1300)  -461.45     195.15  -2.365
## cropping_systems.Q:rainfall_class(700-1300)  -291.67     171.80  -1.698
## cropping_systems.C:rainfall_class(700-1300)  -907.99     151.10  -6.009
## cropping_systems.L:season_class(0-2)          -58.11     282.12  -0.206
## cropping_systems.Q:season_class(0-2)          194.66     260.97   0.746
## cropping_systems.C:season_class(0-2)           30.16     243.40   0.124
## cropping_systems.L:season_class(3-5)          154.33     281.12   0.549
## cropping_systems.Q:season_class(3-5)          494.43     259.87   1.903
## cropping_systems.C:season_class(3-5)          353.79     237.35   1.491
## agroecoregionLowlands:cropping_systems.L     -240.58     125.79  -1.913
## agroecoregionLowlands:cropping_systems.Q      275.68     117.68   2.343
## agroecoregionLowlands:cropping_systems.C     -470.86     114.36  -4.117
## rainfall_class(>1300):textureclassLoam            NA         NA      NA
## rainfall_class(700-1300):textureclassLoam   -2095.42     652.93  -3.209
## rainfall_class(>1300):textureclassSand            NA         NA      NA
## rainfall_class(700-1300):textureclassSand    -168.25     582.20  -0.289
## agroecoregionLowlands:textureclassLoam            NA         NA      NA
## agroecoregionLowlands:textureclassSand       -715.05     581.50  -1.230
## cropping_systems.L:textureclassLoam           620.82     721.36   0.861
## cropping_systems.Q:textureclassLoam          -190.25     328.21  -0.580
## cropping_systems.C:textureclassLoam           394.75     305.84   1.291
## cropping_systems.L:textureclassSand           556.99     630.97   0.883
## cropping_systems.Q:textureclassSand               NA         NA      NA
## cropping_systems.C:textureclassSand               NA         NA      NA
##                                             Pr(>|t|)    
## (Intercept)                                 1.53e-15 ***
## countryKenya                                 < 2e-16 ***
## countryMalawi                               6.78e-09 ***
## countryMozambique                            < 2e-16 ***
## countryTanzania                              < 2e-16 ***
## agroecoregionLowlands                       0.800872    
## cropping_systems.L                          0.820185    
## cropping_systems.Q                          0.442251    
## cropping_systems.C                          0.001135 ** 
## season_class(0-2)                           0.384493    
## season_class(3-5)                           0.721081    
## rainfall_class(>1300)                       0.507404    
## rainfall_class(700-1300)                    0.332080    
## textureclassLoam                            0.573977    
## textureclassSand                            0.682271    
## cropping_systems.L:rainfall_class(>1300)    0.336010    
## cropping_systems.Q:rainfall_class(>1300)    0.487112    
## cropping_systems.C:rainfall_class(>1300)    0.000414 ***
## cropping_systems.L:rainfall_class(700-1300) 0.018089 *  
## cropping_systems.Q:rainfall_class(700-1300) 0.089638 .  
## cropping_systems.C:rainfall_class(700-1300) 2.01e-09 ***
## cropping_systems.L:season_class(0-2)        0.836809    
## cropping_systems.Q:season_class(0-2)        0.455761    
## cropping_systems.C:season_class(0-2)        0.901400    
## cropping_systems.L:season_class(3-5)        0.583035    
## cropping_systems.Q:season_class(3-5)        0.057150 .  
## cropping_systems.C:season_class(3-5)        0.136137    
## agroecoregionLowlands:cropping_systems.L    0.055858 .  
## agroecoregionLowlands:cropping_systems.Q    0.019190 *  
## agroecoregionLowlands:cropping_systems.C    3.90e-05 ***
## rainfall_class(>1300):textureclassLoam            NA    
## rainfall_class(700-1300):textureclassLoam   0.001340 ** 
## rainfall_class(>1300):textureclassSand            NA    
## rainfall_class(700-1300):textureclassSand   0.772605    
## agroecoregionLowlands:textureclassLoam            NA    
## agroecoregionLowlands:textureclassSand      0.218887    
## cropping_systems.L:textureclassLoam         0.389494    
## cropping_systems.Q:textureclassLoam         0.562164    
## cropping_systems.C:textureclassLoam         0.196879    
## cropping_systems.L:textureclassSand         0.377418    
## cropping_systems.Q:textureclassSand               NA    
## cropping_systems.C:textureclassSand               NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1707 on 4501 degrees of freedom
## Multiple R-squared:  0.199,  Adjusted R-squared:  0.1926 
## F-statistic: 31.06 on 36 and 4501 DF,  p-value: < 2.2e-16

Graphical presentation

### cropping systems and grain yield 
# stats output
crop<-lm( maize_grain~cropping_systems,data = new_sim)
summary(crop)
## 
## Call:
## lm(formula = maize_grain ~ cropping_systems, data = new_sim)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3117.0 -1416.8  -353.8  1003.3  9160.2 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2935.41      29.46  99.649  < 2e-16 ***
## cropping_systems.L   504.16      62.00   8.132 5.39e-16 ***
## cropping_systems.Q   -81.00      58.92  -1.375    0.169    
## cropping_systems.C   370.75      55.66   6.660 3.06e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1876 on 4534 degrees of freedom
## Multiple R-squared:  0.02584,    Adjusted R-squared:  0.02519 
## F-statistic: 40.09 on 3 and 4534 DF,  p-value: < 2.2e-16
## general box plot
theme_set(theme_gray(base_size = 12)) ###This sets the font sizes of anything writt
m<-ggplot(new_sim, aes(x = cropping_systems, y = maize_grain))+ geom_boxplot(size=1.5,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6,fill=c("grey","grey","grey","grey")) + geom_smooth(method=lm)+ ylab("Maize Grain Yield [kg/ha]") + xlab("Cropping Systems")
m

####################
theme_set(theme_gray(base_size = 10)) ###This sets the font sizes of anything writt
ggplot(new_sim, aes(x = cropping_systems, y = maize_grain))+ geom_boxplot(size=1.5,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6,fill=c("grey","grey","grey","grey","grey","grey","grey","grey")) + geom_smooth(method=lm)+ ylab("Maize Grain Yield [kg/ha]") + xlab("Cropping Systems")+facet_wrap(.~agroecoregion)

########


### violin + boxplot

theme_set(theme_gray(base_size =12))
m <- ggplot(data=new_sim,aes(x=cropping_systems, y=maize_grain))
m + geom_violin(size=0.9,shape=8) + geom_boxplot(width=.3, outlier.size=0,fill=c("grey","grey","grey","grey"))+ylab("Maize Grain Yield [kg/ha]") + xlab("Cropping Systems")
## Warning: Ignoring unknown parameters: shape

######
theme_set(theme_gray(base_size =10))
m <- ggplot(data=new_sim,aes(x=cropping_systems, y=maize_grain))
m + geom_violin(size=0.7,shape=8) + geom_boxplot(width=.3, outlier.size=0,fill=c("grey","grey","grey","grey","grey","grey","grey","grey"))+ylab("Maize Grain Yield [kg/ha]") + xlab("Cropping Systems")+facet_wrap(.~agroecoregion)
## Warning: Ignoring unknown parameters: shape

#####

##cropping system and agroecological regions 
theme_set(theme_gray(base_size = 10)) ###This sets the font sizes of anything writt
m<-ggplot(new_sim, aes(x = cropping_systems, y = maize_grain, fill= agroecoregion))+geom_boxplot(size=0.8,outlier.colour = "red")+ ylab("Maize Grain Yield [kg/ha]") + xlab("Cropping Systems")+theme(legend.position = c(0.90, 0.90))
m

Rainfall Classes

#### model
rnfall<-lm(maize_grain~agroecoregion+rainfall_clas+cropping_systems)
anova(rnfall)
## Analysis of Variance Table
## 
## Response: maize_grain
##                    Df     Sum Sq   Mean Sq F value    Pr(>F)    
## agroecoregion       1 2.3384e+08 233835624  67.782 2.365e-16 ***
## rainfall_clas       2 1.3195e+08  65975066  19.124 5.362e-09 ***
## cropping_systems    3 3.7550e+08 125167004  36.282 < 2.2e-16 ***
## Residuals        4531 1.5631e+10   3449832                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(rnfall)
## 
## Call:
## lm(formula = maize_grain ~ agroecoregion + rainfall_clas + cropping_systems)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3489.2 -1387.8  -326.6   971.8  9246.2 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            3153.67      65.11  48.436  < 2e-16 ***
## agroecoregionLowlands  -471.25      57.21  -8.237 2.29e-16 ***
## rainfall_clas.L         135.17     108.11   1.250  0.21124    
## rainfall_clas.Q        -175.16      67.14  -2.609  0.00911 ** 
## cropping_systems.L      501.53      61.95   8.096 7.22e-16 ***
## cropping_systems.Q      -46.08      58.62  -0.786  0.43189    
## cropping_systems.C      349.44      55.89   6.252 4.43e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1857 on 4531 degrees of freedom
## Multiple R-squared:  0.04528,    Adjusted R-squared:  0.04401 
## F-statistic: 35.81 on 6 and 4531 DF,  p-value: < 2.2e-16
######################
theme_set(theme_gray(base_size = 12)) ###This sets the font sizes of anything writt
m<-ggplot(new_sim, aes(x = rainfall_clas, y = maize_grain,fill=systems))+ geom_boxplot(size=1,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Maize Grain yield [kg/ha]") + xlab("Seasonal Rainfall [mm]")+theme(legend.position = c(0.85, 0.85))
m

theme_set(theme_gray(base_size = 11)) ###This sets the font sizes of anything writt
m<-ggplot(new_sim, aes(x = rainfall_clas, y = maize_grain,fill=cropping_systems))+ geom_boxplot(size=1,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Maize Grain yield [kg/ha]") + xlab("Seasonal Rainfall [mm]")+theme(legend.position = c(0.85, 0.85))
m

Seasonal Classes

#### model
season<-lm(maize_grain~agroecoregion+season_clas,data = new_sim)
anova(season)
## Analysis of Variance Table
## 
## Response: maize_grain
##                 Df     Sum Sq   Mean Sq F value    Pr(>F)    
## agroecoregion    1 2.3384e+08 233835624  66.399 4.718e-16 ***
## season_clas      2 1.7147e+08  85733713  24.345 3.045e-11 ***
## Residuals     4534 1.5967e+10   3521652                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(season)
## 
## Call:
## lm(formula = maize_grain ~ agroecoregion + season_clas, data = new_sim)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3266.0 -1398.4  -348.6   991.4  9361.4 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            3236.14      62.30  51.948  < 2e-16 ***
## agroecoregionLowlands  -486.55      58.05  -8.382  < 2e-16 ***
## season_clas.L           220.67      93.35   2.364  0.01812 *  
## season_clas.Q          -208.07      66.65  -3.122  0.00181 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1877 on 4534 degrees of freedom
## Multiple R-squared:  0.02476,    Adjusted R-squared:  0.02411 
## F-statistic: 38.36 on 3 and 4534 DF,  p-value: < 2.2e-16
######################
theme_set(theme_gray(base_size = 12)) ###This sets the font sizes of anything writt
m<-ggplot(new_sim, aes(x = season_clas, y = maize_grain,fill=systems))+ geom_boxplot(size=1,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Maize Grain yield [kg/ha]") + xlab("Cropping Season [Yrs]")+theme(legend.position = c(0.85, 0.85))
m

theme_set(theme_gray(base_size = 11)) ###This sets the font sizes of anything writt
m<-ggplot(new_sim, aes(x = season_clas, y = maize_grain,fill=cropping_systems))+ geom_boxplot(size=1,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Maize Grain yield [kg/ha]") + xlab("Cropping Season [Yrs]")+theme(legend.position = c(0.85, 0.85))
m

legume grown

library(readr)
legume <- read_csv("~/Dr Nyagumbo/Final data set/legume.csv")
## Parsed with column specification:
## cols(
##   country = col_character(),
##   site = col_character(),
##   agroecoregion = col_character(),
##   farmer = col_character(),
##   system = col_character(),
##   legume_cultivar = col_character(),
##   cropping_system = col_character(),
##   season = col_character(),
##   season_since = col_double(),
##   season_class = col_character(),
##   rainfall = col_double(),
##   rainfall_class = col_character(),
##   textureclass = col_character(),
##   sand = col_double(),
##   silt = col_double(),
##   clay = col_double(),
##   drainage_class = col_logical(),
##   maize_grain = col_double()
## )
## Warning: 1190 parsing failures.
##  row            col           expected                  actual                                      file
## 1388 drainage_class 1/0/T/F/TRUE/FALSE Somewhat poorly drained '~/Dr Nyagumbo/Final data set/legume.csv'
## 1389 drainage_class 1/0/T/F/TRUE/FALSE Somewhat poorly drained '~/Dr Nyagumbo/Final data set/legume.csv'
## 1390 drainage_class 1/0/T/F/TRUE/FALSE Moderately well drained '~/Dr Nyagumbo/Final data set/legume.csv'
## 1391 drainage_class 1/0/T/F/TRUE/FALSE Somewhat poorly drained '~/Dr Nyagumbo/Final data set/legume.csv'
## 1392 drainage_class 1/0/T/F/TRUE/FALSE Somewhat poorly drained '~/Dr Nyagumbo/Final data set/legume.csv'
## .... .............. .................. ....................... .........................................
## See problems(...) for more details.
View(legume)
attach(legume)
## The following objects are masked _by_ .GlobalEnv:
## 
##     agroecoregion, country, legume_cultivar, rainfall_class,
##     season, season_class, site, system, textureclass
## The following objects are masked from new_sim:
## 
##     agroecoregion, clay, country, cropping_system, drainage_class,
##     farmer, legume_cultivar, maize_grain, rainfall,
##     rainfall_class, sand, season, season_class, season_since,
##     silt, site, system, textureclass
legume_cultivar=as.factor(legume_cultivar)

md<-lm(maize_grain~legume_cultivar,data = legume)
anova(md)
## Analysis of Variance Table
## 
## Response: maize_grain
##                   Df     Sum Sq  Mean Sq F value    Pr(>F)    
## legume_cultivar    7  477627168 68232453  20.207 < 2.2e-16 ***
## Residuals       2569 8674845546  3376740                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(md)
## 
## Call:
## lm(formula = maize_grain ~ legume_cultivar, data = legume)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3587.4 -1386.8  -410.6   960.4  9717.4 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)                         2617.01      56.82  46.060  < 2e-16
## legume_cultivarCow_pea               476.82     125.81   3.790 0.000154
## legume_cultivarDesmodium             -44.12     162.84  -0.271 0.786442
## legume_cultivarGround_nuts          1250.37     159.97   7.816 7.88e-15
## legume_cultivarPigeon_pea           -464.38      89.28  -5.201 2.13e-07
## legume_cultivarPigeon_pea_ g_nuts    490.75     222.45   2.206 0.027464
## legume_cultivarSoya_ g_nut_Cow_pea    19.96     289.18   0.069 0.944967
## legume_cultivarSoya_bean             271.76     165.37   1.643 0.100439
##                                       
## (Intercept)                        ***
## legume_cultivarCow_pea             ***
## legume_cultivarDesmodium              
## legume_cultivarGround_nuts         ***
## legume_cultivarPigeon_pea          ***
## legume_cultivarPigeon_pea_ g_nuts  *  
## legume_cultivarSoya_ g_nut_Cow_pea    
## legume_cultivarSoya_bean              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1838 on 2569 degrees of freedom
## Multiple R-squared:  0.05219,    Adjusted R-squared:  0.0496 
## F-statistic: 20.21 on 7 and 2569 DF,  p-value: < 2.2e-16
mod<-aov(maize_grain~legume_cultivar,data = legume)

x<- LSD.test(mod,"legume_cultivar",alpha=0.05,group=FALSE)
diffograph(x,cex.axis=0.8,xlab="Maize Grain Yield [kg/ha]",ylab="Maize Grain Yield [kg/ha]")

theme_set(theme_gray(base_size =7))
m <- ggplot(data=legume,aes(x=legume_cultivar, y=maize_grain))
m + geom_violin(size=0.8,shape=8) + geom_boxplot(width=.2, outlier.size=0,fill=c("grey","grey","grey","grey","grey","grey","grey","grey"))+ylab("Maize Grain Yield [kg/ha]") + xlab("Legumes Grown")
## Warning: Ignoring unknown parameters: shape

Drainage and Textural classification Analysis

library(readr)
drainage <- read_csv("~/Dr Nyagumbo/Final data set/drainage.csv")
## Parsed with column specification:
## cols(
##   country = col_character(),
##   site = col_character(),
##   agroecoregion = col_character(),
##   system = col_character(),
##   legume_cultivar = col_character(),
##   cropping_system = col_character(),
##   season = col_character(),
##   season_since = col_double(),
##   season_class = col_character(),
##   rainfall = col_double(),
##   rainfall_class = col_character(),
##   textureclass = col_character(),
##   sand = col_double(),
##   silt = col_double(),
##   clay = col_double(),
##   drainage_class = col_character(),
##   maize_grain = col_double()
## )
View(drainage)
attach(drainage)
## The following objects are masked _by_ .GlobalEnv:
## 
##     agroecoregion, country, legume_cultivar, rainfall_class,
##     season, season_class, site, system, textureclass
## The following objects are masked from legume:
## 
##     agroecoregion, clay, country, cropping_system, drainage_class,
##     legume_cultivar, maize_grain, rainfall, rainfall_class, sand,
##     season, season_class, season_since, silt, site, system,
##     textureclass
## The following objects are masked from new_sim:
## 
##     agroecoregion, clay, country, cropping_system, drainage_class,
##     legume_cultivar, maize_grain, rainfall, rainfall_class, sand,
##     season, season_class, season_since, silt, site, system,
##     textureclass
#defining factor variables 

country=as.factor(country)
site=as.factor(site)
agroecoregion=as.factor(agroecoregion)
system=as.factor(system)
systems=factor(system,levels = c("Conv","CA",order=TRUE))
legume_cultivar=as.factor(legume_cultivar)
cropping_systems=as.factor(cropping_systems)
cropping_systems=factor(cropping_system,levels = c("Conv","CA_sole","CA_intercrop","CA_rotation"),ordered = TRUE)
season_class=as.factor(season_class)
season_clas=factor(season_class,levels = c("(0-2)","(3-5)","(>5)"),ordered = TRUE)
rainfall_class=as.factor(rainfall_class)
rainfall_clas=factor(rainfall_class,levels = c("(<700)","(700-1300)","(>1300)"),ordered = TRUE)
textureclass=as.factor(textureclass)
drainage_class=as.factor(drainage_class)
drainage_clas=factor(drainage_class,levels =c("Well drained","Moderately well drained","Somewhat poorly drained","Poorly drained"),ordered=TRUE)

Drainage classification analysis

## full model
thandeka<-lm(maize_grain~drainage_class+cropping_systems,data = drainage)
anova(thandeka)
## Analysis of Variance Table
## 
## Response: maize_grain
##                    Df     Sum Sq  Mean Sq F value    Pr(>F)    
## drainage_class      3   63795507 21265169  6.5493 0.0002081 ***
## cropping_systems    3   36014957 12004986  3.6973 0.0113583 *  
## Residuals        2436 7909557758  3246945                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(thandeka)
## 
## Call:
## lm(formula = maize_grain ~ drainage_class + cropping_systems, 
##     data = drainage)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3070.8 -1297.5  -321.2   901.3  8938.5 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)                            3141.13      58.44  53.753  < 2e-16
## drainage_classPoorly drained          -1082.35     277.74  -3.897    1e-04
## drainage_classSomewhat poorly drained   -13.20      89.91  -0.147  0.88333
## drainage_classWell drained             -208.38      88.56  -2.353  0.01870
## cropping_systems.L                      255.25      78.57   3.248  0.00118
## cropping_systems.Q                       36.04      74.86   0.481  0.63026
## cropping_systems.C                       60.13      71.44   0.842  0.40004
##                                          
## (Intercept)                           ***
## drainage_classPoorly drained          ***
## drainage_classSomewhat poorly drained    
## drainage_classWell drained            *  
## cropping_systems.L                    ** 
## cropping_systems.Q                       
## cropping_systems.C                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1802 on 2436 degrees of freedom
## Multiple R-squared:  0.01246,    Adjusted R-squared:  0.01003 
## F-statistic: 5.123 on 6 and 2436 DF,  p-value: 3.059e-05
############################# mean comparison
tha<-aov(maize_grain~drainage_class+textureclass+rainfall_class+cropping_systems,data = drainage)
x<- LSD.test(tha,"drainage_class",alpha=0.05,group=FALSE)
diffograph(x,cex.axis=0.8,xlab="Maize Grain Yield [kg/ha]",ylab="Maize Grain Yield [kg/ha]")

#####
x<- LSD.test(tha,"textureclass",alpha=0.05,group=FALSE)
diffograph(x,cex.axis=0.8,xlab="Maize Grain Yield [kg/ha]",ylab="Maize Grain Yield [kg/ha]")

h<-HSD.test(tha,"textureclass",group =TRUE)
h
## $statistics
##   MSerror   Df     Mean       CV
##   3199342 2429 3047.017 58.70234
## 
## $parameters
##    test       name.t ntr StudentizedRange alpha
##   Tukey textureclass   6         4.033318  0.05
## 
## $means
##                 maize_grain      std    r  Min   Max     Q25    Q50
## Clay               2583.296 1308.452   27 1187  6547 1653.00 2082.0
## Clay Loam          3081.217 1980.616  240  225 11870 1712.25 2686.0
## Loamy sandy        3112.951 1877.160  407  148 10923 1682.50 2818.0
## Sandy              2466.729 1756.877  129  497  9487 1218.00 1861.0
## Sandy clay loam    3274.919 1398.473   74  989  7233 2171.75 3085.0
## Sandy Loam         3069.667 1787.937 1566  132 10908 1780.50 2702.5
##                     Q75
## Clay            3279.50
## Clay Loam       4023.00
## Loamy sandy     4035.50
## Sandy           3212.00
## Sandy clay loam 4377.75
## Sandy Loam      3994.75
## 
## $comparison
## NULL
## 
## $groups
##                 maize_grain groups
## Sandy clay loam    3274.919      a
## Loamy sandy        3112.951      a
## Clay Loam          3081.217      a
## Sandy Loam         3069.667      a
## Clay               2583.296     ab
## Sandy              2466.729      b
## 
## attr(,"class")
## [1] "group"
#################

x<- LSD.test(tha,"drainage_class",alpha=0.05,group=FALSE)
diffograph(x,cex.axis=0.8,xlab="Maize Grain Yield [kg/ha]",ylab="Maize Grain Yield [kg/ha]")

h<-HSD.test(tha,"drainage_class",group =TRUE)
h
## $statistics
##   MSerror   Df     Mean       CV
##   3199342 2429 3047.017 58.70234
## 
## $parameters
##    test         name.t ntr StudentizedRange alpha
##   Tukey drainage_class   4         3.635676  0.05
## 
## $means
##                         maize_grain      std   r  Min   Max     Q25    Q50
## Moderately well drained    3126.441 1965.794 979  273 10923 1680.50 2663.0
## Poorly drained             2055.455  675.063  44 1192  3876 1547.75 1923.0
## Somewhat poorly drained    3123.419 1618.903 690  225  8773 1863.00 2949.5
## Well drained               2928.052 1791.951 730  132 11870 1732.25 2583.5
##                            Q75
## Moderately well drained 4088.5
## Poorly drained          2354.5
## Somewhat poorly drained 4083.0
## Well drained            3776.5
## 
## $comparison
## NULL
## 
## $groups
##                         maize_grain groups
## Moderately well drained    3126.441      a
## Somewhat poorly drained    3123.419      a
## Well drained               2928.052      a
## Poorly drained             2055.455      b
## 
## attr(,"class")
## [1] "group"

Model with possible interactions

intera<-lm(maize_grain~drainage_class+silt+clay+sand+textureclass+rainfall_class+cropping_systems+drainage_class*cropping_systems+textureclass*cropping_systems+textureclass*drainage_class,data = drainage)
anova(intera)
## Analysis of Variance Table
## 
## Response: maize_grain
##                                   Df     Sum Sq  Mean Sq F value    Pr(>F)
## drainage_class                     3   58802274 19600758  6.1419 0.0003715
## silt                               1   32803130 32803130 10.2789 0.0013643
## clay                               1        208      208  0.0001 0.9935560
## sand                               1    4402205  4402205  1.3794 0.2403206
## textureclass                       4   75913264 18978316  5.9469 9.282e-05
## rainfall_class                     2   17986733  8993366  2.8181 0.0599291
## cropping_systems                   3   32267644 10755881  3.3704 0.0178090
## drainage_class:cropping_systems    9   52564640  5840516  1.8301 0.0582969
## textureclass:cropping_systems     12   56916061  4743005  1.4862 0.1217679
## drainage_class:textureclass        7  110610326 15801475  4.9514 1.427e-05
## Residuals                       2276 7263430265  3191314                  
##                                    
## drainage_class                  ***
## silt                            ** 
## clay                               
## sand                               
## textureclass                    ***
## rainfall_class                  .  
## cropping_systems                *  
## drainage_class:cropping_systems .  
## textureclass:cropping_systems      
## drainage_class:textureclass     ***
## Residuals                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plots silt clay and sand

######################################### silt
ggplot(drainage, aes(silt,maize_grain))+geom_point(color="firebrick")+
  stat_smooth()+ ylab("Maize Grain yield [kg/ha]") + xlab("Silt [%]")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 67 rows containing non-finite values (stat_smooth).
## Warning: Removed 67 rows containing missing values (geom_point).

###################
theme_set(theme_gray(base_size = 11))
ggplot(drainage, aes(silt,maize_grain,color=cropping_systems))+geom_point()+
  stat_smooth()+ ylab("Maize Grain yield [kg/ha]") + xlab("Silt [%]")+theme(legend.position = c(0.90, 0.85))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 67 rows containing non-finite values (stat_smooth).

## Warning: Removed 67 rows containing missing values (geom_point).

######################

ggplot(drainage, aes(silt,maize_grain))+geom_point()+
  stat_smooth()+ ylab("Maize Grain yield [kg/ha]") + xlab("Silt [%]")+facet_wrap(.~cropping_system)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 67 rows containing non-finite values (stat_smooth).

## Warning: Removed 67 rows containing missing values (geom_point).

#################
theme_set(theme_gray(base_size = 10))
ggplot(drainage, aes(silt,maize_grain,color=agroecoregion))+geom_point(size=1.7,aes(shape=agroecoregion))+
  stat_smooth()+ ylab("Maize Grain yield [kg/ha]") + xlab("Silt [%]")+facet_wrap(.~cropping_system)+theme(legend.position = c(0.90, 0.94))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 67 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 6
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.9755e-016
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 6
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 2.9755e-016
## Warning: Removed 67 rows containing missing values (geom_point).

######################################### sand
ggplot(drainage, aes(sand,maize_grain))+geom_point(color="firebrick")+
  stat_smooth()+ ylab("Maize Grain yield [kg/ha]") + xlab("Sand [%]")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 67 rows containing non-finite values (stat_smooth).
## Warning: Removed 67 rows containing missing values (geom_point).

###################
theme_set(theme_gray(base_size = 11))
ggplot(drainage, aes(sand,maize_grain,color=cropping_systems))+geom_point()+
  stat_smooth()+ ylab("Maize Grain yield [kg/ha]") + xlab("Sand [%]")+theme(legend.position = c(0.90, 0.85))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 67 rows containing non-finite values (stat_smooth).

## Warning: Removed 67 rows containing missing values (geom_point).

######################

ggplot(drainage, aes(sand,maize_grain))+geom_point()+
  stat_smooth()+ ylab("Maize Grain yield [kg/ha]") + xlab("Sand [%]")+facet_wrap(.~cropping_system)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 67 rows containing non-finite values (stat_smooth).

## Warning: Removed 67 rows containing missing values (geom_point).

#################

theme_set(theme_gray(base_size = 10))
ggplot(drainage, aes(sand,maize_grain,color=agroecoregion))+geom_point(size=1.7,aes(shape=agroecoregion))+
  stat_smooth()+ ylab("Maize Grain yield [kg/ha]") + xlab("Sand [%]")+facet_wrap(.~cropping_system)+theme(legend.position = c(0.90, 0.94))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 67 rows containing non-finite values (stat_smooth).

## Warning: Removed 67 rows containing missing values (geom_point).

######################################### clay
ggplot(drainage, aes(sand,maize_grain))+geom_point(color="firebrick")+
  stat_smooth()+ ylab("Maize Grain yield [kg/ha]") + xlab("clay [%]")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 67 rows containing non-finite values (stat_smooth).
## Warning: Removed 67 rows containing missing values (geom_point).

###################
theme_set(theme_gray(base_size = 11))
ggplot(drainage, aes(clay,maize_grain,color=cropping_systems))+geom_point()+
  stat_smooth()+ ylab("Maize Grain yield [kg/ha]") + xlab("clay [%]")+theme(legend.position = c(0.90, 0.85))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 123 rows containing non-finite values (stat_smooth).
## Warning: Removed 123 rows containing missing values (geom_point).

######################

ggplot(drainage, aes(clay,maize_grain))+geom_point()+
  stat_smooth()+ ylab("Maize Grain yield [kg/ha]") + xlab("clay [%]")+facet_wrap(.~cropping_system)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 123 rows containing non-finite values (stat_smooth).

## Warning: Removed 123 rows containing missing values (geom_point).

#################
theme_set(theme_gray(base_size = 10))
ggplot(drainage, aes(clay,maize_grain,color=agroecoregion))+geom_point(size=1.7,aes(shape=agroecoregion))+
  stat_smooth()+ ylab("Maize Grain yield [kg/ha]") + xlab("clay [%]")+facet_wrap(.~cropping_system)+theme(legend.position = c(0.90, 0.94))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 123 rows containing non-finite values (stat_smooth).

## Warning: Removed 123 rows containing missing values (geom_point).

Drainage Classification

dr<-lm(maize_grain~drainage_clas+agroecoregion,data = drainage)
anova(dr)
## Analysis of Variance Table
## 
## Response: maize_grain
##                 Df     Sum Sq   Mean Sq  F value    Pr(>F)    
## drainage_clas    3   63795507  21265169   7.4086 6.131e-05 ***
## agroecoregion    1  947648014 947648014 330.1501 < 2.2e-16 ***
## Residuals     2438 6997924700   2870355                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(dr)
## 
## Call:
## lm(formula = maize_grain ~ drainage_clas + agroecoregion, data = drainage)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3677.2 -1149.9  -279.6   807.6  9349.4 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            3424.63      76.92  44.522  < 2e-16 ***
## drainage_clas.L       -1165.57     180.27  -6.466 1.21e-10 ***
## drainage_clas.Q        -968.63     139.30  -6.953 4.56e-12 ***
## drainage_clas.C        -460.47      82.86  -5.557 3.04e-08 ***
## agroecoregionLowlands -1304.57      71.80 -18.170  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1694 on 2438 degrees of freedom
## Multiple R-squared:  0.1263, Adjusted R-squared:  0.1248 
## F-statistic: 88.09 on 4 and 2438 DF,  p-value: < 2.2e-16
#########################
d<-aov(maize_grain~drainage_clas,data = drainage)
x<- LSD.test(d,"drainage_clas",alpha=0.05,group=FALSE)
diffograph(x,cex.axis=1,xlab="Maize Grain Yield [kg/ha]",ylab="Maize Grain Yield [kg/ha]")

h<-HSD.test(tha,"drainage_clas",group =TRUE)
h
## $statistics
##   MSerror   Df     Mean       CV
##   3199342 2429 3047.017 58.70234
## 
## $parameters
##    test         name.t ntr StudentizedRange alpha
##   Tukey drainage_class   4         3.635676  0.05
## 
## $means
##                         maize_grain      std   r  Min   Max     Q25    Q50
## Moderately well drained    3126.441 1965.794 979  273 10923 1680.50 2663.0
## Poorly drained             2055.455  675.063  44 1192  3876 1547.75 1923.0
## Somewhat poorly drained    3123.419 1618.903 690  225  8773 1863.00 2949.5
## Well drained               2928.052 1791.951 730  132 11870 1732.25 2583.5
##                            Q75
## Moderately well drained 4088.5
## Poorly drained          2354.5
## Somewhat poorly drained 4083.0
## Well drained            3776.5
## 
## $comparison
## NULL
## 
## $groups
##                         maize_grain groups
## Moderately well drained    3126.441      a
## Somewhat poorly drained    3123.419      a
## Well drained               2928.052      a
## Poorly drained             2055.455      b
## 
## attr(,"class")
## [1] "group"
################### plots
## general box plot
theme_set(theme_gray(base_size = 10)) ###This sets the font sizes of anything writt
m<-ggplot(drainage, aes(x = drainage_clas, y = maize_grain))+ geom_boxplot(size=1.5,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Maize Grain Yield [kg/ha]") + xlab("Drainage Classification")
m

#####################

########### or
theme_set(theme_gray(base_size = 10)) ###This sets the font sizes of anything writt
m<-ggplot(drainage, aes(x = drainage_clas, y = maize_grain,fill=agroecoregion))+ geom_boxplot(size=1.5,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Maize Grain Yield [kg/ha]") + xlab("Drainage Classification")+theme(legend.position = c(0.90, 0.94))
m

######################
###model 
dr<-lm(maize_grain~drainage_clas+cropping_systems,data = drainage)
anova(dr)
## Analysis of Variance Table
## 
## Response: maize_grain
##                    Df     Sum Sq  Mean Sq F value    Pr(>F)    
## drainage_clas       3   63795507 21265169  6.5493 0.0002081 ***
## cropping_systems    3   36014957 12004986  3.6973 0.0113583 *  
## Residuals        2436 7909557758  3246945                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(dr)
## 
## Call:
## lm(formula = maize_grain ~ drainage_clas + cropping_systems, 
##     data = drainage)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3070.8 -1297.5  -321.2   901.3  8938.5 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2815.15      73.96  38.063  < 2e-16 ***
## drainage_clas.L     -589.23     188.78  -3.121  0.00182 ** 
## drainage_clas.Q     -638.77     147.14  -4.341 1.47e-05 ***
## drainage_clas.C     -186.57      86.87  -2.148  0.03184 *  
## cropping_systems.L   255.25      78.57   3.248  0.00118 ** 
## cropping_systems.Q    36.04      74.86   0.481  0.63026    
## cropping_systems.C    60.13      71.44   0.842  0.40004    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1802 on 2436 degrees of freedom
## Multiple R-squared:  0.01246,    Adjusted R-squared:  0.01003 
## F-statistic: 5.123 on 6 and 2436 DF,  p-value: 3.059e-05
theme_set(theme_gray(base_size = 10)) ###This sets the font sizes of anything writt
m<-ggplot(drainage, aes(x = drainage_clas, y = maize_grain,fill=cropping_systems))+ geom_boxplot(size=1,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Maize Grain Yield [kg/ha]") + xlab("Drainage Classification")+theme(legend.position = c(0.85, 0.85))
m

#################

theme_set(theme_gray(base_size = 6)) ###This sets the font sizes of anything writt
m<-ggplot(drainage, aes(x = drainage_clas, y = maize_grain,fill=cropping_systems))+ geom_boxplot(size=0.8,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Maize Grain Yield [kg/ha]") + xlab("Drainage Classification")+theme(legend.position = c(0.85, 0.85))+facet_wrap(.~agroecoregion)
m

#########
theme_set(theme_gray(base_size = 6)) ###This sets the font sizes of anything writt
m<-ggplot(drainage, aes(x = drainage_clas, y = maize_grain,fill=system))+ geom_boxplot(size=1,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Maize Grain Yield [kg/ha]") + xlab("Drainage Classification")+theme(legend.position = c(0.90, 0.94))+facet_wrap(.~agroecoregion)
m

Texture analysis

library(readr)
texture <- read_csv("~/Dr Nyagumbo/Final data set/texture.csv")
## Parsed with column specification:
## cols(
##   country = col_character(),
##   site = col_character(),
##   agroecoregion = col_character(),
##   farmer = col_character(),
##   system = col_character(),
##   legume_cultivar = col_character(),
##   cropping_system = col_character(),
##   season = col_character(),
##   season_since = col_double(),
##   season_class = col_character(),
##   rainfall = col_double(),
##   rainfall_class = col_character(),
##   textureclass = col_character(),
##   sand = col_double(),
##   silt = col_double(),
##   clay = col_double(),
##   drainage_class = col_logical(),
##   maize_grain = col_double()
## )
## Warning: 2443 parsing failures.
##  row            col           expected                  actual                                       file
## 2026 drainage_class 1/0/T/F/TRUE/FALSE Somewhat poorly drained '~/Dr Nyagumbo/Final data set/texture.csv'
## 2027 drainage_class 1/0/T/F/TRUE/FALSE Somewhat poorly drained '~/Dr Nyagumbo/Final data set/texture.csv'
## 2028 drainage_class 1/0/T/F/TRUE/FALSE Moderately well drained '~/Dr Nyagumbo/Final data set/texture.csv'
## 2029 drainage_class 1/0/T/F/TRUE/FALSE Somewhat poorly drained '~/Dr Nyagumbo/Final data set/texture.csv'
## 2030 drainage_class 1/0/T/F/TRUE/FALSE Somewhat poorly drained '~/Dr Nyagumbo/Final data set/texture.csv'
## .... .............. .................. ....................... ..........................................
## See problems(...) for more details.
View(texture)
attach(texture)
## The following objects are masked _by_ .GlobalEnv:
## 
##     agroecoregion, country, drainage_class, legume_cultivar,
##     rainfall_class, season, season_class, site, system,
##     textureclass
## The following objects are masked from drainage:
## 
##     agroecoregion, clay, country, cropping_system, drainage_class,
##     legume_cultivar, maize_grain, rainfall, rainfall_class, sand,
##     season, season_class, season_since, silt, site, system,
##     textureclass
## The following objects are masked from legume:
## 
##     agroecoregion, clay, country, cropping_system, drainage_class,
##     farmer, legume_cultivar, maize_grain, rainfall,
##     rainfall_class, sand, season, season_class, season_since,
##     silt, site, system, textureclass
## The following objects are masked from new_sim:
## 
##     agroecoregion, clay, country, cropping_system, drainage_class,
##     farmer, legume_cultivar, maize_grain, rainfall,
##     rainfall_class, sand, season, season_class, season_since,
##     silt, site, system, textureclass
country=as.factor(country)
site=as.factor(site)
agroecoregion=as.factor(agroecoregion)
system=as.factor(system)
systems=factor(system,levels = c("Conv","CA",order=TRUE))
legume_cultivar=as.factor(legume_cultivar)
cropping_systems=factor(cropping_system,levels = c("Conv","CA_sole","CA_intercrop","CA_rotation"),ordered = TRUE)
season_class=as.factor(season_class)
season_clas=factor(season_class,levels = c("(0-2)","(3-5)","(>5)"),ordered = TRUE)
rainfall_class=as.factor(rainfall_class)
rainfall_clas=factor(rainfall_class,levels = c("(<700)","(700-1300)","(>1300)"),ordered = TRUE)
textureclass=as.factor(textureclass)
#drainage_class=c(drainage_class)
#drainage_clas=factor(drainage_class,levels = c("Well drained",
#"Moderately well drained","Somewhat poorly drained","Poorly drained"),ordered=TRUE)
#### model

tex<-lm(maize_grain~textureclass+cropping_system+agroecoregion,data = texture)
anova(tex)
## Analysis of Variance Table
## 
## Response: maize_grain
##                   Df     Sum Sq   Mean Sq F value Pr(>F)    
## textureclass       2 1.6227e+07   8113357  2.3363 0.0968 .  
## cropping_system    3 4.1234e+08 137445633 39.5791 <2e-16 ***
## agroecoregion      1 2.4896e+08 248955515 71.6897 <2e-16 ***
## Residuals       4461 1.5492e+10   3472682                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(tex)
## 
## Call:
## lm(formula = maize_grain ~ textureclass + cropping_system + agroecoregion, 
##     data = texture)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3469.2 -1368.1  -303.0   942.5  9424.1 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 3290.30      72.24  45.545  < 2e-16 ***
## textureclassLoam            -259.29      63.31  -4.096 4.28e-05 ***
## textureclassSand            -188.39      97.15  -1.939   0.0525 .  
## cropping_systemCA_rotation   566.33      91.41   6.195 6.34e-10 ***
## cropping_systemCA_sole       319.94      78.92   4.054 5.12e-05 ***
## cropping_systemConv         -332.52      70.52  -4.715 2.49e-06 ***
## agroecoregionLowlands       -488.55      57.70  -8.467  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1864 on 4461 degrees of freedom
## Multiple R-squared:  0.0419, Adjusted R-squared:  0.04061 
## F-statistic: 32.52 on 6 and 4461 DF,  p-value: < 2.2e-16
## general box plot
theme_set(theme_gray(base_size = 10)) ###This sets the font sizes of anything writt
m<-ggplot(texture, aes(x = textureclass, y = maize_grain))+ geom_boxplot(size=1.5,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Maize Grain Yield [kg/ha]") + xlab("Texture Classification")
m

#####################
theme_set(theme_gray(base_size = 7)) ###This sets the font sizes of anything writt
m<-ggplot(texture, aes(x = textureclass, y = maize_grain))+ geom_boxplot(size=1,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Maize Grain Yield [kg/ha]") + xlab("Texture Classification")+facet_wrap(.~cropping_system)
m

########################
theme_set(theme_gray(base_size = 7)) ###This sets the font sizes of anything writt
m<-ggplot(texture, aes(x = textureclass, y = maize_grain))+ geom_boxplot(size=1,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Maize Grain Yield [kg/ha]") + xlab("Texture Classification")+facet_wrap(agroecoregion~cropping_system)
m

######
theme_set(theme_gray(base_size = 7)) ###This sets the font sizes of anything writt
m<-ggplot(texture, aes(x = textureclass, y = maize_grain,fill=agroecoregion))+ geom_boxplot(size=0.8,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Maize Grain Yield [kg/ha]") + xlab("Texture Classification")+theme(legend.position = c(0.90, 0.94))
m

###################

m<-ggplot(texture, aes(x = textureclass, y = maize_grain,fill=agroecoregion))+ geom_boxplot(size=0.8,varwidth = TRUE,outlier.colour = "red",outlier.shape = 1, shape=6) + geom_smooth(method=lm)+ ylab("Maize Grain Yield [kg/ha]") + xlab("Texture Classification")+theme(legend.position = c(0.90, 0.94))+facet_wrap(.~cropping_system)
m