# get ready
# Load Necessary Libraries
library(RSQLite)
library(plyr)
library(dplyr)
library(ggplot2)
library(knitr)
library(xtable)
library(ggthemes)
#Define Custom Multiplot function
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
plots <- c(list(...), plotlist)
numPlots = length(plots)
if (is.null(layout)) {
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
for (i in 1:numPlots) {
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
Summary
Seasonal Farming Indicators
Step 1. Load data source
Use RSQLite Package to connect to database and load Seasonal Farming table. Ensure database file is in Working Directory. Read in lookup code csv.
con <- dbConnect(drv=RSQLite::SQLite(), dbname="Burkina_HHDatabase_Revision_1.db")
#Pull Tables
hh_sf <- dbGetQuery(conn=con, statement="SELECT * FROM hh_seasonal_farming")
#Read In Lookup Codes for Crops and Weight Conversions. Important this file exists in the working directory
codes <- read.csv("LookupCodes.csv")
Step 2. Deep data cleaning
- Add in missing crop codes and factorize. Factroize year_collected
hh_sf$b7_a <- mapvalues(hh_sf$b7_a,
as.character(codes$crop_code),
as.character(codes$crop),
warn_missing = FALSE)
#Crop Names: No coding for numeric > 100, add to "Other", convert all names to uppercase, then factorize
hh_sf$b7_a <- toupper(hh_sf$b7_a)
hh_sf$b7_a <- mapvalues(hh_sf$b7_a,c('YAM/SWEET POTATO', 'GROUNDNUT'),c("SWEET POTATO/YAM", "GROUND NUT"))
hh_sf$b7_a <- as.factor(hh_sf$b7_a)
hh_sf$year_collected <- as.factor(hh_sf$year_collected)
- look at plot size variable
levels(factor(hh_sf$b7_d))
## [1] "0" "1.2" "10" "15" "3.5" "9"
## [7] "924" "acres" "hectares" "limas" "manzanas"
- create new variable that converts all plot sizes to hectares (NA for unrecognized)
hh_sf$hectares <- with(hh_sf,ifelse(b7_d == 'acres', b7_c*0.4,
ifelse(b7_d == 'hectares', b7_c*1,
ifelse(b7_d == 'manzanas', b7_c*1,
ifelse(b7_d == 'limas', b7_c*0.25, NA)))))
4.Look harvested amount variable
levels(factor(hh_sf$b7_f))
## [1] "0" "0.25" "1" "100"
## [5] "100 kg bags" "100kg bags" "15" "16"
## [9] "200" "22" "30" "40 kg bags"
## [13] "50 kg bags" "50kg bags" "9" "grammes"
## [17] "kilogrammes" "metric tonnes" "Other" "Others"
- create new variable that converts all amounts to kilograms (NA for unrecognized)
#Add Multiplier Variable based on lookup codes
hh_sf$kg_mult <- hh_sf$b7_f
hh_sf$kg_mult <- mapvalues(hh_sf$kg_mult,
as.character(codes$weight_lab),
as.character(codes$multiplier),
warn_missing = FALSE)
#Convert qty sold and kg_mult variables to numeric, create kg_sold variable based on this
hh_sf$b7_e <- as.numeric(hh_sf$b7_e)
hh_sf$kg_mult <- as.numeric(hh_sf$kg_mult)
hh_sf$kilograms <- hh_sf$b7_e * hh_sf$kg_mult
- create new variable of hired manhours
hh_sf$days_hired_labor <- hh_sf$b7_p + hh_sf$b7_r
- Subset variables for analysis
sfsub <- hh_sf[c("year_collected","new_hh_code","b7_a","hectares","kilograms","days_hired_labor")]
#rename crop variable
sfsub <- rename(sfsub, crop_type = b7_a)
Step 3. Summary Analysis
- Prepare Data for Summary Analysis - Farm Size in Hectacres, Crops Harvested in Kg, Outside Man Hours Employed
#Summary by year
sfg <- group_by(sfsub, year_collected)
#Farm Size in hectares
sfsum1 <- summarise(sfg,
Total_Rows = n(),
Complete_Cases = sum(complete.cases(hectares)),
Mean = mean(hectares, na.rm = TRUE),
Variance = var(hectares, na.rm = TRUE),
Max = max(hectares, na.rm = TRUE),
Min = min(hectares, na.rm = TRUE))
#Crops Harvested in Kg
sfsum2 <- summarise(sfg,
Total_Rows = n(),
Complete_Cases = sum(complete.cases(kilograms)),
Mean = mean(kilograms, na.rm = TRUE),
Variance = var(kilograms, na.rm = TRUE),
Max = max(kilograms, na.rm = TRUE),
Min = min(kilograms, na.rm = TRUE))
#Outside Man Hours Employes
sfsum3 <- summarise(sfg,
Total_Rows = n(),
Complete_Cases = sum(complete.cases(days_hired_labor)),
Mean = mean(days_hired_labor, na.rm = TRUE),
Variance = var(days_hired_labor, na.rm = TRUE),
Max = max(days_hired_labor, na.rm = TRUE),
Min = min(days_hired_labor, na.rm = TRUE))
- Print Summary Statistics for Indicators
- Farm Size in Hectares
kable(sfsum1)
| 2009 |
10782 |
2777 |
1.355027 |
5.054484 |
88.88 |
0.00 |
| 2011 |
3360 |
1802 |
1.305938 |
2.145922 |
25.00 |
0.04 |
| 2013 |
3180 |
1630 |
1.149248 |
1.273464 |
15.00 |
0.00 |
- Crops Harvested in Kg
kable(sfsum2)
| 2009 |
10782 |
2610 |
830.4398 |
19378692.6 |
173000 |
0e+00 |
| 2011 |
3360 |
1604 |
494.2297 |
674414.6 |
12000 |
0e+00 |
| 2013 |
3180 |
1585 |
609.6020 |
778405.7 |
15000 |
5e-04 |
- Outside Man Hours Employed
kable(sfsum3)
| 2009 |
10782 |
1029 |
28.39456 |
314726.9006 |
17776 |
0 |
| 2011 |
3360 |
472 |
11.23835 |
353.6793 |
235 |
0 |
| 2013 |
3180 |
461 |
119.98123 |
3233971.7096 |
35002 |
0 |
Step 4. Reportable Indicator Analysis
Look at Change in Farm Size, Outside Man Hours, and Crops Harvested over years in study
fplot <- qplot(x=year_collected, y=Mean, fill = year_collected, data=sfsum1, geom = "bar", stat="identity") +
labs(y = "Avg Farm Size(ha)", x = "Year") +
ggtitle("Avg Farm Size(Hectares)") +
theme_igray() +
theme(axis.title.x = element_text(color="#666666", face="bold", size=10),
axis.title.y = element_text(color="#666666", face="bold", size=10))
cplot <- qplot(x=year_collected, y=Mean, fill = year_collected, data=sfsum2, geom = "bar", stat="identity") +
labs(y = "Avg Harvest(Kg)", x = "Year") +
ggtitle("Avg Crops Harvested(kg)") +
theme_igray() +
theme(axis.title.x = element_text(color="#666666", face="bold", size=10),
axis.title.y = element_text(color="#666666", face="bold", size=10))
mplot <- qplot(x=year_collected, y=Mean, fill = year_collected, data=sfsum3, geom = "bar", stat="identity") +
labs(y = "Avg Outside Man-Hours", x = "Year") +
ggtitle("Avg Outside Man Hours") +
theme_igray() +
theme(axis.title.x = element_text(color="#666666", face="bold", size=10),
axis.title.y = element_text(color="#666666", face="bold", size=10))
multiplot(fplot,cplot,mplot,cols = 2)

Look at Crop Output by type across years of the survey
#Group Data - Harvest by crop type and year
sfgc <- group_by(sfsub, year_collected, crop_type)
sfsum4 <- summarise(sfgc, mean(kilograms, na.rm = TRUE))
sfsum4 <- sfsum4[complete.cases(sfsum4),]
colnames(sfsum4)[3] <- "kilograms"
#reorder largest to smallest
sfsum4$crop_type <- factor(sfsum4$crop_type, levels = sfsum4$crop_type[order(sfsum4$kilograms)])
#Plot
ggplot(sfsum4, aes(x = crop_type, y = kilograms, fill = crop_type)) + geom_bar(stat = "identity") +
coord_flip() +
facet_wrap( ~ year_collected)+ theme(legend.position="none") +
ggtitle("Mean Crop Output by Type and Year") +
labs(x = "Crop Type") +
theme_igray() +
theme(legend.position="none") +
theme(axis.title.x = element_text(color="#666666", face="bold", size=10),
axis.title.y = element_text(color="#666666", face="bold", size=10))

Look at Crop Output by type across years of the survey, ignoring cotton for additional granularity in other crops
#Plot
ggplot(sfsum4, aes(x = crop_type, y = kilograms, fill = crop_type)) +
geom_bar(subset = .(crop_type != 'COTTON'), stat = "identity") +
coord_flip() + facet_wrap( ~ year_collected)+ theme(legend.position="none") +
ggtitle("Mean Crop Output by Type and Year") +
theme(plot.title = element_text(face = "bold")) +
labs(x = "Crop Type") +
theme_igray() +
theme(legend.position="none") +
theme(axis.title.x = element_text(color="#666666", face="bold", size=10),
axis.title.y = element_text(color="#666666", face="bold", size=10))
