# get ready
# Load Necessary Libraries
library(RSQLite)
library(plyr)
library(dplyr)
library(ggplot2)
library(knitr)
library(xtable)
library(ggthemes)

#Define Custom Multiplot function
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  plots <- c(list(...), plotlist)
  numPlots = length(plots)
  if (is.null(layout)) {
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  if (numPlots==1) {
    print(plots[[1]])
  } else {
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    for (i in 1:numPlots) {
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

Summary

Seasonal Farming Indicators

Step 1. Load data source

Use RSQLite Package to connect to database and load Seasonal Farming table. Ensure database file is in Working Directory. Read in lookup code csv.

con <- dbConnect(drv=RSQLite::SQLite(), dbname="Burkina_HHDatabase_Revision_1.db")

#Pull Tables
hh_sf <- dbGetQuery(conn=con, statement="SELECT * FROM hh_seasonal_farming")

#Read In Lookup Codes for Crops and Weight Conversions. Important this file exists in the working directory
codes <- read.csv("LookupCodes.csv")

Step 2. Deep data cleaning

  1. Add in missing crop codes and factorize. Factroize year_collected
hh_sf$b7_a <- mapvalues(hh_sf$b7_a, 
                         as.character(codes$crop_code), 
                         as.character(codes$crop), 
                         warn_missing = FALSE)

#Crop Names: No coding for numeric > 100, add to "Other", convert all names to uppercase, then factorize
hh_sf$b7_a <- toupper(hh_sf$b7_a)
hh_sf$b7_a <- mapvalues(hh_sf$b7_a,c('YAM/SWEET POTATO', 'GROUNDNUT'),c("SWEET POTATO/YAM", "GROUND NUT"))
hh_sf$b7_a <- as.factor(hh_sf$b7_a)
hh_sf$year_collected <- as.factor(hh_sf$year_collected)
  1. look at plot size variable
levels(factor(hh_sf$b7_d))
##  [1] "0"        "1.2"      "10"       "15"       "3.5"      "9"       
##  [7] "924"      "acres"    "hectares" "limas"    "manzanas"
  1. create new variable that converts all plot sizes to hectares (NA for unrecognized)
hh_sf$hectares <- with(hh_sf,ifelse(b7_d == 'acres', b7_c*0.4,
                                    ifelse(b7_d == 'hectares', b7_c*1, 
                                           ifelse(b7_d == 'manzanas', b7_c*1,
                                                  ifelse(b7_d == 'limas', b7_c*0.25, NA)))))

4.Look harvested amount variable

levels(factor(hh_sf$b7_f))
##  [1] "0"             "0.25"          "1"             "100"          
##  [5] "100 kg bags"   "100kg bags"    "15"            "16"           
##  [9] "200"           "22"            "30"            "40 kg bags"   
## [13] "50 kg bags"    "50kg bags"     "9"             "grammes"      
## [17] "kilogrammes"   "metric tonnes" "Other"         "Others"
  1. create new variable that converts all amounts to kilograms (NA for unrecognized)
#Add Multiplier Variable based on lookup codes
hh_sf$kg_mult <- hh_sf$b7_f

hh_sf$kg_mult <- mapvalues(hh_sf$kg_mult, 
                           as.character(codes$weight_lab), 
                           as.character(codes$multiplier), 
                           warn_missing = FALSE)

#Convert qty sold and kg_mult variables to numeric, create kg_sold variable based on this
hh_sf$b7_e <- as.numeric(hh_sf$b7_e)
hh_sf$kg_mult <- as.numeric(hh_sf$kg_mult)
hh_sf$kilograms <- hh_sf$b7_e * hh_sf$kg_mult
  1. create new variable of hired manhours
hh_sf$days_hired_labor <- hh_sf$b7_p + hh_sf$b7_r
  1. Subset variables for analysis
sfsub <- hh_sf[c("year_collected","new_hh_code","b7_a","hectares","kilograms","days_hired_labor")]
#rename crop variable
sfsub <- rename(sfsub, crop_type = b7_a)

Step 3. Summary Analysis

  1. Prepare Data for Summary Analysis - Farm Size in Hectacres, Crops Harvested in Kg, Outside Man Hours Employed
#Summary by year
sfg <- group_by(sfsub, year_collected)

#Farm Size in hectares
sfsum1 <- summarise(sfg,
                    Total_Rows = n(),
                    Complete_Cases = sum(complete.cases(hectares)),
                    Mean = mean(hectares, na.rm = TRUE),
                    Variance = var(hectares, na.rm = TRUE),
                    Max = max(hectares, na.rm = TRUE),
                    Min = min(hectares, na.rm = TRUE))


#Crops Harvested in Kg
sfsum2 <- summarise(sfg,
                    Total_Rows = n(),
                    Complete_Cases = sum(complete.cases(kilograms)),
                    Mean = mean(kilograms, na.rm = TRUE),
                    Variance = var(kilograms, na.rm = TRUE),
                    Max = max(kilograms, na.rm = TRUE),
                    Min = min(kilograms, na.rm = TRUE))


#Outside Man Hours Employes
sfsum3 <- summarise(sfg,
                    Total_Rows = n(),
                    Complete_Cases = sum(complete.cases(days_hired_labor)),
                    Mean = mean(days_hired_labor, na.rm = TRUE),
                    Variance = var(days_hired_labor, na.rm = TRUE),
                    Max = max(days_hired_labor, na.rm = TRUE),
                    Min = min(days_hired_labor, na.rm = TRUE))
  1. Print Summary Statistics for Indicators
  1. Farm Size in Hectares
kable(sfsum1)
year_collected Total_Rows Complete_Cases Mean Variance Max Min
2009 10782 2777 1.355027 5.054484 88.88 0.00
2011 3360 1802 1.305938 2.145922 25.00 0.04
2013 3180 1630 1.149248 1.273464 15.00 0.00
  1. Crops Harvested in Kg
kable(sfsum2)
year_collected Total_Rows Complete_Cases Mean Variance Max Min
2009 10782 2610 830.4398 19378692.6 173000 0e+00
2011 3360 1604 494.2297 674414.6 12000 0e+00
2013 3180 1585 609.6020 778405.7 15000 5e-04
  1. Outside Man Hours Employed
kable(sfsum3)
year_collected Total_Rows Complete_Cases Mean Variance Max Min
2009 10782 1029 28.39456 314726.9006 17776 0
2011 3360 472 11.23835 353.6793 235 0
2013 3180 461 119.98123 3233971.7096 35002 0

Step 4. Reportable Indicator Analysis

Look at Change in Farm Size, Outside Man Hours, and Crops Harvested over years in study

fplot <- qplot(x=year_collected, y=Mean, fill = year_collected, data=sfsum1, geom = "bar", stat="identity") + 
  labs(y = "Avg Farm Size(ha)", x = "Year") + 
  ggtitle("Avg Farm Size(Hectares)") +
  theme_igray() +
  theme(axis.title.x = element_text(color="#666666", face="bold", size=10),
        axis.title.y = element_text(color="#666666", face="bold", size=10))

cplot <- qplot(x=year_collected, y=Mean, fill = year_collected, data=sfsum2, geom = "bar", stat="identity") + 
  labs(y = "Avg Harvest(Kg)", x = "Year") + 
  ggtitle("Avg Crops Harvested(kg)") +
  theme_igray() +
  theme(axis.title.x = element_text(color="#666666", face="bold", size=10),
        axis.title.y = element_text(color="#666666", face="bold", size=10))

mplot <- qplot(x=year_collected, y=Mean, fill = year_collected, data=sfsum3, geom = "bar", stat="identity") + 
  labs(y = "Avg Outside Man-Hours", x = "Year") + 
  ggtitle("Avg Outside Man Hours") +
  theme_igray() +
  theme(axis.title.x = element_text(color="#666666", face="bold", size=10),
        axis.title.y = element_text(color="#666666", face="bold", size=10))

multiplot(fplot,cplot,mplot,cols = 2)

Look at Crop Output by type across years of the survey

#Group Data - Harvest by crop type and year
sfgc <- group_by(sfsub, year_collected, crop_type)
sfsum4 <- summarise(sfgc, mean(kilograms, na.rm = TRUE))
sfsum4 <- sfsum4[complete.cases(sfsum4),]
colnames(sfsum4)[3] <- "kilograms"
#reorder largest to smallest
sfsum4$crop_type <- factor(sfsum4$crop_type, levels = sfsum4$crop_type[order(sfsum4$kilograms)])

#Plot
ggplot(sfsum4, aes(x = crop_type, y = kilograms, fill = crop_type)) + geom_bar(stat = "identity") + 
  coord_flip() + 
  facet_wrap( ~ year_collected)+ theme(legend.position="none") + 
  ggtitle("Mean Crop Output by Type and Year") +
  labs(x = "Crop Type") +
  theme_igray() +
  theme(legend.position="none") +
  theme(axis.title.x = element_text(color="#666666", face="bold", size=10),
        axis.title.y = element_text(color="#666666", face="bold", size=10))

Look at Crop Output by type across years of the survey, ignoring cotton for additional granularity in other crops

#Plot
ggplot(sfsum4, aes(x = crop_type, y = kilograms, fill = crop_type)) + 
  geom_bar(subset = .(crop_type != 'COTTON'), stat = "identity") + 
  coord_flip() + facet_wrap( ~ year_collected)+ theme(legend.position="none") + 
  ggtitle("Mean Crop Output by Type and Year") +
  theme(plot.title = element_text(face = "bold")) +
  labs(x = "Crop Type") +
  theme_igray() +
  theme(legend.position="none") +
  theme(axis.title.x = element_text(color="#666666", face="bold", size=10),
        axis.title.y = element_text(color="#666666", face="bold", size=10))