# Required libraries
require(plyr)
FALSE Loading required package: plyr
require(rmongodb)
FALSE Loading required package: rmongodb
FALSE WARNING!
FALSE There are some quite big changes in this version of rmongodb.
FALSE mongo.bson.to.list, mongo.bson.from.list (which are workhorses of many other rmongofb high-level functions) are rewritten.
FALSE Please,
FALSE TEST IT BEFORE PRODUCTION USAGE.
FALSE Also there are some other important changes, please see NEWS file and release notes at
FALSE https://github.com/mongosoup/rmongodb/releases/
data_list <- mongo.find.all(mongo=mongo.create(db = "fbc", host = "50.97.79.169:29883", username = "fbcommons_user", password = "&sm1rKs4A"),
ns="fbc.results",
query='{"survey_id": 2198999}', # SurveyGizmo survey id
field= list( # Rmongodb shorthand for projection query
# q407=1L, # Date
# q415=1L, # Occasion
# -------------------
q534=1L, # Number of dairy cows
q539=1L, # County
q575=1L, # sex
q590=1L, # CIG
q604=1L, # Age
# -------------------
q576=1L, # Farm size
q577=1L, # Technology
q578=1L, # How easy was it to understand and use {the technology}
q579=1L, # How easy was it to buy the inputs (such as fertiliser, pesticides, tools) that you needed?
q580=1L, # How easy was it to get the labour that you need?
q581=1L, # How satisfied are you with the quantity produced?
q582=1L, # How satisfied are you with the quality of what was produced?
q583=1L, # How satisfied are you with the quantity that you could sell?
q584=1L, # How satisfied are you with the price that you received for your produce?
q602=1L # Please tell us what changes or improvements would help you benefit more from this {technology}
)
)
# data_list element is document (one survey)
# ASSUMPTION: value of each data_list element is length of 1 (i.e. data_list is not nested)
df_from_db <- data.frame(matrix(unlist(data_list), nrow=length(data_list), byrow=T))
colnames(df_from_db) <- names(data_list[[1]]) # assumes non-empty query
df_from_db <- df_from_db[,order(colnames(df_from_db))]
# returns string w/o leading or trailing whitespace
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
- ‘cig’ data recoding
# cig data cleaning
df_from_db$q539 <- ifelse(df_from_db$q539 == "", NA, as.character(df_from_db$q539))
df_from_db$q539 <- ifelse(df_from_db$q539 == "MURANG'a", "MURANG'A", df_from_db$q539)
df_from_db$q539 <- factor(ifelse(df_from_db$q539 == "test", NA, as.character(df_from_db$q539)))
- ‘sex’ data recoding
df_from_db$q575 <- factor(df_from_db$q575, label = c("", "FEMALE", "MALE"))
df_from_db$q575 <- factor(ifelse(df_from_db$q575 == "", NA, as.character(df_from_db$q575)))
- ‘technology’ data recoding
# levels(df_from_db$q577)
levels(df_from_db$q577) <- c("", "", "", "", "TISSUE CONTROL", "TISSUE CONTROL",
"TISSUE CULTURE", "TISSUE CULTURE")
df_from_db$q577 <- factor(ifelse(df_from_db$q577 == "", NA, as.character(df_from_db$q577)))
- ‘number of cows’ data recoding
# ASSUMPTION: There are no fractional cows. Coercing the values to numeric
# is good enough
df_from_db$q534 <- as.character(df_from_db$q534)
df_from_db$q534 <- as.numeric(df_from_db$q534)
## Warning: NAs introduced by coercion
df_from_db$q534 <- ifelse(df_from_db$q534 == -1, NA, df_from_db$q534)
# #
df_from_db$wealth <- as.character(df_from_db$q534)
df_from_db$wealth[which(is.na(df_from_db$wealth))] <- "Unknown"
df_from_db$wealth[which(df_from_db$wealth == "0")] <- "Poorer"
df_from_db$wealth[which(df_from_db$wealth == "1")] <- "Middle"
df_from_db$wealth[which(df_from_db$wealth == "2")] <- "Middle"
df_from_db$wealth[-which(df_from_db$wealth %in% c("Unknown", "Poorer", "Middle"))] <- "Richer"
df_from_db$wealth <- as.factor(df_from_db$wealth)
- Scaling answer from 0-10 scale to Categorical scale (Negative, Neutral, Positive)
DPP_transform <- function(x) {
# x: vector of likelihood to recommend in 0 - 10 scale returns vector of c(%
# of Detractors, % of Passives, % of Promoters)
DPP <- rep(NA, length(x))
x <- as.numeric(as.character(x))
x <- ifelse(x == -1, NA, x)
# ---- LEON'S ----------------------- DPP[0 <= x & x <= 6] <- 'Negative'
# DPP[7 <= x & x <= 8] <- 'Neutral' DPP[9 <= x & x <= 10] <- 'Positive'
# ---- Ahmed'S -----------------------
DPP[0 <= x & x <= 4] <- "Negative"
DPP[5 <= x & x <= 6] <- "Neutral"
DPP[7 <= x & x <= 10] <- "Positive"
DPP <- factor(DPP, levels = c("Negative", "Neutral", "Positive"), order = T)
return(DPP)
}
df_from_db$q578 <- DPP_transform(factor(ifelse(df_from_db$q578 == "-1", NA,
as.character(df_from_db$q578)), ordered = T, levels = c("0", "1", "2", "3",
"4", "5", "6", "7", "8", "9", "10")))
df_from_db$q579 <- DPP_transform(factor(ifelse(df_from_db$q579 == "-1", NA,
as.character(df_from_db$q579)), ordered = T, levels = c("0", "1", "2", "3",
"4", "5", "6", "7", "8", "9", "10")))
df_from_db$q580 <- DPP_transform(factor(ifelse(df_from_db$q580 == "-1", NA,
as.character(df_from_db$q580)), ordered = T, levels = c("0", "1", "2", "3",
"4", "5", "6", "7", "8", "9", "10")))
df_from_db$q581 <- DPP_transform(factor(ifelse(df_from_db$q581 == "-1", NA,
as.character(df_from_db$q581)), ordered = T, levels = c("0", "1", "2", "3",
"4", "5", "6", "7", "8", "9", "10")))
df_from_db$q582 <- DPP_transform(factor(ifelse(df_from_db$q582 == "-1", NA,
as.character(df_from_db$q582)), ordered = T, levels = c("0", "1", "2", "3",
"4", "5", "6", "7", "8", "9", "10")))
df_from_db$q583 <- DPP_transform(factor(ifelse(df_from_db$q583 == "-1", NA,
as.character(df_from_db$q583)), ordered = T, levels = c("0", "1", "2", "3",
"4", "5", "6", "7", "8", "9", "10")))
df_from_db$q584 <- DPP_transform(factor(ifelse(df_from_db$q584 == "-1", NA,
as.character(df_from_db$q584)), ordered = T, levels = c("0", "1", "2", "3",
"4", "5", "6", "7", "8", "9", "10")))
- Renaming variables
df_from_db <- rename(df_from_db, replace = c(q590 = "cig", q539 = "county",
q575 = "sex", q577 = "technology", q534 = "numDairycows", q604 = "age"))
- Saving cleaned Data to table
df <- df_from_db
df$cig <- rep("AVERAGE", nrow(df_from_db))
df$county <- rep("AVERAGE", nrow(df_from_db))
df_from_db <- rbind(df, df_from_db)
write.csv(df_from_db, "bgak_technology_table.csv")