Load Libraries

library(RSQLite)
library(plyr)
library(dplyr)
library(ggplot2)
library(reshape2)
library(knitr)
library(xtable)
library(ggthemes)

Step 1. Load data sources

#Connect to Database and get tables
con1 <- dbConnect(drv=RSQLite::SQLite(), dbname="Burkina_HHDatabase_Revision_1.db")
con2 <- dbConnect(drv=RSQLite::SQLite(), dbname="Burkina_FODatabase_Revision_2.sqlite3")

hh_serv <- dbGetQuery(conn=con1, statement="SELECT * FROM hh_support_services")
fo_serv <- dbGetQuery(conn=con2, statement="SELECT * FROM fo_services")
dbDisconnect(con1)
## [1] TRUE
dbDisconnect(con2)
## [1] TRUE

Step 2. Deep data cleaning

FO Data

  1. Subset FO data to necessary columns
#################Clean fo_services############################
fo_serc <- select(fo_serv, year_collected, fo_code, b3_VALUE, b3_a, b3_b)

#Add Readable Column Names
colnames(fo_serc) <- c("year", "fo", "service_type", "other_provide", "fo_provide")
  1. create numeric varaible for if FO provided Service
fo_serc$fo_yes <- with(fo_serc,ifelse(fo_provide == "Yes", 1 , NA))
  1. Create Summary Statistic for fo_servies proived, convert to proportion data for accurate comparison given unequal n
fo_serc <- group_by(fo_serc, year, service_type)
fosg <- summarise(fo_serc, 
                  n = n(),
                  fo_yes = sum(fo_yes, na.rm = TRUE))
fosg$p_offer <- with(fosg,fo_yes/n)
  1. Analyze Summary Dataset
kable(head(fosg), format="pandoc",caption="Summary")
Summary
year service_type n fo_yes p_offer
2009 Access to subsidized inputs (seed, fertilizer, etc.) 6 3 0.500
2009 Aggregating members’ commodities for sale to buyers 8 7 0.875
2009 Cleaning commodities of foreign matter 8 4 0.500
2009 Corn threshing/maize shelling 8 5 0.625
2009 Draught power (animals/tractors) 8 0 0.000
2009 Drying commodities for long-term storage 8 3 0.375

HH Data

  1. Subset Variables
###############Clean hh_services_offered######################## 
hh_serc <- select(hh_serv, year_collected, a2_VALUE, a2_e, a2_f, a2_h)

#Add Readable Column Names
hh_serc <- rename(hh_serc, service_type = a2_VALUE, service_available = a2_e, service_used = a2_f, provider = a2_h)
  1. Create boolean nominal Variable if service is available to HH
hh_serc$hh_available <- with(hh_serc,ifelse(service_available == "Yes", 1 , 0))
  1. Create boolean nominal Variable if HH used service
hh_serc$hh_use <- with(hh_serc,ifelse(service_used == "Yes", 1 , 0))
  1. Create boolean nominal variable if HH used service and it was provided by FO
hh_serc$hh_use_fo <- with(hh_serc,ifelse(provider == "Farmers organization", 1 , 0))
  1. Group By Year and Service Type for Analysis
hh_serc <- group_by(hh_serc, year_collected)
hhsg <- summarise(hh_serc, 
                  n = n(),
                  hh_use = sum(hh_use, na.rm = TRUE))
  1. Upon analyzing summary data we see that there is only data available for 2013
kable(hhsg, format="pandoc",caption="Summary")
Summary
year_collected n hh_use
2009 7188 0
2013 3816 763
  1. Only year with any recorded answers is 2013, so subset this year and group by service type for analysis
hh_serc <- filter(hh_serc, year_collected == 2013)

#Group By Year and Service Type for Analysis
hh_serc <- group_by(hh_serc, service_type)
hhsg <- summarise(hh_serc, 
                  n = n(),
                  hh_available = sum(hh_available, na.rm = TRUE),
                  hh_use = sum(hh_use, na.rm = TRUE),
                  hh_use_fo = sum(hh_use_fo, na.rm = TRUE))
hhsg$p_available <- with(hhsg, hh_available/n)
hhsg$p_use <- with(hhsg, hh_use/n)
hhsg$p_use_fo <- with(hhsg, hh_use_fo/n)
  1. Analyze summary dataset
kable(hhsg, format="pandoc",caption="Summary")
Summary
service_type n hh_available hh_use hh_use_fo p_available p_use p_use_fo
Access to agricultural equipment 318 176 22 4 0.5534591 0.0691824 0.0125786
Access to cleaning services for commodities 318 50 13 7 0.1572327 0.0408805 0.0220126
Access to drying services for commodities 318 31 6 3 0.0974843 0.0188679 0.0094340
Agricultural inputs on credit 318 180 64 29 0.5660377 0.2012579 0.0911950
Cash loans for agricultural purposes 318 80 15 6 0.2515723 0.0471698 0.0188679
Cash loans for non-agriculture uses 318 129 31 20 0.4056604 0.0974843 0.0628931
Chemical treatment of commodities to control insect pests 318 198 95 18 0.6226415 0.2987421 0.0566038
Crop insurance 318 5 2 0 0.0157233 0.0062893 0.0000000
Help selling agricultural products 318 255 155 125 0.8018868 0.4874214 0.3930818
Storage for agricultural commodities 318 134 69 64 0.4213836 0.2169811 0.2012579
Subsidized (or free) inputs 318 169 78 53 0.5314465 0.2452830 0.1666667
Training or technical assistance in agricultural practices or technology 318 282 213 159 0.8867925 0.6698113 0.5000000

Step 3. Plot Analysis

Analysis of Services Offered by FOs

Here we analyze what services are offered most by FOs across the years of the survey

fosg$service_type <- factor(fosg$service_type, levels = fosg$service_type[order(fosg$p_offer)])

ggplot(fosg, aes(x = service_type, y = p_offer, fill = service_type)) + geom_bar(stat = "identity") + 
  coord_flip() +
  facet_wrap(~ year) +
  ggtitle("Proportion of FOs That Offer Service") +
  labs(x = "Type of Service Offering", y = "Proportion of FO's Offering Service") +
  theme_igray() +
  theme(legend.position="none") +
  theme(axis.title.x = element_text(color="#666666", face="bold", size=12),
        axis.title.y = element_text(color="#666666", face="bold", size=12))

Household Service Usage Analysis

Here we analyze the proportion of HHs where services are availabe, utilized, and utilized directly from an FO

  1. Plot proportion of HHs where service is available
hhsg$service_type <- factor(hhsg$service_type, levels = hhsg$service_type[order(hhsg$p_available)])

ggplot(hhsg, aes(x = service_type, y = p_available, fill = service_type)) + geom_bar(stat = "identity") + 
  coord_flip() +
  ggtitle("Proportion of HHs where service is available (2013)") +
  labs(x = "Type of Service Offering", y = "Proportion of HH Where Available") +
  theme_igray() +
  theme(legend.position="none") +
  theme(axis.title.x = element_text(color="#666666", face="bold", size=10),
        axis.title.y = element_text(color="#666666", face="bold", size=10))

  1. Plot proportion of HHs that use service from any provider
hhsg$service_type <- factor(hhsg$service_type, levels = hhsg$service_type[order(hhsg$p_use)])

ggplot(hhsg, aes(x = service_type, y = p_use, fill = service_type)) + geom_bar(stat = "identity") + 
  coord_flip() +
  ggtitle("Proportion of HHs that use service (2013)") +
  labs(x = "Type of Service Offering", y = "Proportion of HH Using Service") +
  theme_igray() +
  theme(legend.position="none") +
  theme(axis.title.x = element_text(color="#666666", face="bold", size=10),
        axis.title.y = element_text(color="#666666", face="bold", size=10))

  1. Plot proportion of HHs that use service directly from FO
hhsg$service_type <- factor(hhsg$service_type, levels = hhsg$service_type[order(hhsg$p_use_fo)])

ggplot(hhsg, aes(x = service_type, y = p_use_fo, fill = service_type)) + geom_bar(stat = "identity") + 
  coord_flip() +
  theme(legend.position="none") + 
  ggtitle("Proportion of HHs that use FO (2013)") +
  labs(x = "Type of Service Offering", y = "Proportion of HH Using Service Directly From an FO") +
  theme_igray() +
  theme(legend.position="none") +
  theme(axis.title.x = element_text(color="#666666", face="bold", size=10),
        axis.title.y = element_text(color="#666666", face="bold", size=10))

  1. Comparison of serivces used The below plots HH service categories on the same axis for comparison
#Melt data to see comparison
hhsm <- melt(hhsg, id.vars = 1:5)
hhsm$variable <- factor(hhsm$variable, levels = c("p_use_fo", "p_use", "p_available"))

#Plot proportion of HHs using Services from an FO
ggplot(hhsm, aes(x = service_type, y = value, fill = variable)) + geom_bar(stat = "identity", position="dodge") + 
  coord_flip() +
  ggtitle("Proportion of HHs using Service Comparison (2013)") +
  labs(x = "Type of Service Offering", y = "Proportion of HH Using Service Directly From an FO") +
  theme_igray() +
  theme(axis.title.x = element_text(color="#666666", face="bold", size=10),
        axis.title.y = element_text(color="#666666", face="bold", size=10)) + 
  scale_fill_discrete(guide = guide_legend(reverse=TRUE),
                      labels = c("Used from FO", "Used", "Available"))