LoadingData

# Required libraries
require(plyr)

FALSE Loading required package: plyr

require(rmongodb)

FALSE Loading required package: rmongodb
FALSE WARNING!
FALSE There are some quite big changes in this version of rmongodb.
FALSE mongo.bson.to.list, mongo.bson.from.list (which are workhorses of many other rmongofb high-level functions) are rewritten.
FALSE Please, 
FALSE TEST IT BEFORE PRODUCTION USAGE.
FALSE Also there are some other important changes, please see NEWS file and release notes at
FALSE https://github.com/mongosoup/rmongodb/releases/

data_list <- mongo.find.all(mongo=mongo.create(db = "fbc", host = "50.97.79.169:29883", username = "fbcommons_user", password = "&sm1rKs4A"), 
                            ns="fbc.results", 
                            query='{"survey_id": 2198999}', # SurveyGizmo survey id
                            field= list(       # Rmongodb shorthand for projection query
                              # q407=1L, #  Date
                              # q415=1L, #  Occasion
                              # -------------------
                              q534=1L, #    Number of dairy cows
                              q539=1L, #    County
                              q575=1L, #    sex
                              q590=1L, #  CIG
                              q604=1L, #    Age
                              # -------------------
                              q576=1L, #    Farm size
                              q577=1L, #    Technology
                              q578=1L, #    How easy was it to understand and use {the technology}
                              q579=1L, #    How easy was it to buy the inputs (such as fertiliser, pesticides, tools) that you needed?
                              q580=1L, #    How easy was it to get the labour that you need?
                              q581=1L, #    How satisfied are you with the quantity produced? 
                              q582=1L, #    How satisfied are you with the quality of what was produced?
                              q583=1L, #    How satisfied are you with the quantity that you could sell?
                              q584=1L, #    How satisfied are you with the price that you received for your produce?
                              q602=1L # Please tell us what changes or improvements would help you benefit more from this {technology}
                            )
)

# data_list element is document (one survey)
# ASSUMPTION: value of each data_list element is length of 1 (i.e. data_list is not nested)





df_from_db <- data.frame(matrix(unlist(data_list), nrow=length(data_list), byrow=T))
colnames(df_from_db) <- names(data_list[[1]]) # assumes non-empty query
df_from_db <- df_from_db[,order(colnames(df_from_db))]

# returns string w/o leading or trailing whitespace
trim <- function (x) gsub("^\\s+|\\s+$", "", x)

‘cig’ data recoding

# cig data cleaning
df_from_db$q539 <- ifelse(df_from_db$q539 == "", NA, as.character(df_from_db$q539))
df_from_db$q539 <- ifelse(df_from_db$q539 == "MURANG'a", "MURANG'A", df_from_db$q539)
df_from_db$q539 <- factor(ifelse(df_from_db$q539 == "test", NA, as.character(df_from_db$q539)))

‘sex’ data recoding

df_from_db$q575 <- factor(df_from_db$q575, label = c("", "FEMALE", "MALE"))
df_from_db$q575 <- factor(ifelse(df_from_db$q575 == "", NA, as.character(df_from_db$q575)))

‘technology’ data recoding

# levels(df_from_db$q577)
levels(df_from_db$q577) <- c("", "", "", "", "TISSUE CONTROL", "TISSUE CONTROL", 
    "TISSUE CULTURE", "TISSUE CULTURE")
df_from_db$q577 <- factor(ifelse(df_from_db$q577 == "", NA, as.character(df_from_db$q577)))

‘number of cows’ data recoding

# ASSUMPTION: There are no fractional cows. Coercing the values to numeric
# is good enough

df_from_db$q534 <- as.character(df_from_db$q534)
df_from_db$q534 <- as.numeric(df_from_db$q534)

## Warning: NAs introduced by coercion

df_from_db$q534 <- ifelse(df_from_db$q534 == -1, NA, df_from_db$q534)
# #
df_from_db$wealth <- as.character(df_from_db$q534)
df_from_db$wealth[which(is.na(df_from_db$wealth))] <- "Unknown"
df_from_db$wealth[which(df_from_db$wealth == "0")] <- "Poorer"
df_from_db$wealth[which(df_from_db$wealth == "1")] <- "Middle"
df_from_db$wealth[which(df_from_db$wealth == "2")] <- "Middle"
df_from_db$wealth[-which(df_from_db$wealth %in% c("Unknown", "Poorer", "Middle"))] <- "Richer"
df_from_db$wealth <- as.factor(df_from_db$wealth)

Scaling answer from 0-10 scale to Categorical scale (Negative, Neutral, Positive)

DPP_transform <- function(x) {
    # x: vector of likelihood to recommend in 0 - 10 scale returns vector of c(%
    # of Detractors, % of Passives, % of Promoters)
    DPP <- rep(NA, length(x))
    x <- as.numeric(as.character(x))
    x <- ifelse(x == -1, NA, x)
    
    # ---- LEON'S ----------------------- DPP[0 <= x & x <= 6] <- 'Negative'
    # DPP[7 <= x & x <= 8] <- 'Neutral' DPP[9 <= x & x <= 10] <- 'Positive'
    
    # ---- Ahmed'S -----------------------
    DPP[0 <= x & x <= 4] <- "Negative"
    DPP[5 <= x & x <= 6] <- "Neutral"
    DPP[7 <= x & x <= 10] <- "Positive"
    
    
    DPP <- factor(DPP, levels = c("Negative", "Neutral", "Positive"), order = T)
    return(DPP)
}


df_from_db$q578 <- DPP_transform(factor(ifelse(df_from_db$q578 == "-1", NA, 
    as.character(df_from_db$q578)), ordered = T, levels = c("0", "1", "2", "3", 
    "4", "5", "6", "7", "8", "9", "10")))



df_from_db$q579 <- DPP_transform(factor(ifelse(df_from_db$q579 == "-1", NA, 
    as.character(df_from_db$q579)), ordered = T, levels = c("0", "1", "2", "3", 
    "4", "5", "6", "7", "8", "9", "10")))



df_from_db$q580 <- DPP_transform(factor(ifelse(df_from_db$q580 == "-1", NA, 
    as.character(df_from_db$q580)), ordered = T, levels = c("0", "1", "2", "3", 
    "4", "5", "6", "7", "8", "9", "10")))




df_from_db$q581 <- DPP_transform(factor(ifelse(df_from_db$q581 == "-1", NA, 
    as.character(df_from_db$q581)), ordered = T, levels = c("0", "1", "2", "3", 
    "4", "5", "6", "7", "8", "9", "10")))




df_from_db$q582 <- DPP_transform(factor(ifelse(df_from_db$q582 == "-1", NA, 
    as.character(df_from_db$q582)), ordered = T, levels = c("0", "1", "2", "3", 
    "4", "5", "6", "7", "8", "9", "10")))




df_from_db$q583 <- DPP_transform(factor(ifelse(df_from_db$q583 == "-1", NA, 
    as.character(df_from_db$q583)), ordered = T, levels = c("0", "1", "2", "3", 
    "4", "5", "6", "7", "8", "9", "10")))



df_from_db$q584 <- DPP_transform(factor(ifelse(df_from_db$q584 == "-1", NA, 
    as.character(df_from_db$q584)), ordered = T, levels = c("0", "1", "2", "3", 
    "4", "5", "6", "7", "8", "9", "10")))

Renaming variables

df_from_db <- rename(df_from_db, replace = c(q590 = "cig", q539 = "county", 
    q575 = "sex", q577 = "technology", q534 = "numDairycows", q604 = "age"))

Saving cleaned Data to table

df <- df_from_db
df$cig <- rep("AVERAGE", nrow(df_from_db))
df$county <- rep("AVERAGE", nrow(df_from_db))
df_from_db <- rbind(df, df_from_db)
write.csv(df_from_db, "bgak_technology_table.csv")

LoadingData

Ahmed Tadde

September 27, 2015