This script downloads data that has been extracted by coding teams during a data extraction activity. Minimal data cleaning is performed to allow for analysis.
## Import data from Google Sheets
key<-gs_key("1HOnqlfCHHo8NAQFeh8EzIIUr56fYaMsb4DHigZafNig")## Worksheets feed constructed with public visibilitydata_import<-gs_read(key, 
                     col_types = cols(
                       `Study ID` = col_character(), 
                        `Intervention Mean` = col_double(), 
                            `Intervention N` = col_integer(),
                            `Intervention SD` = col_double(),
                            `Control Mean` = col_double(), 
                            `Control N` = col_integer(),
                            `Control SD` = col_double()
                       )
                     )## Accessing worksheet titled 'Form Responses 1'.This shows the data as entered into Google Sheets
## Clean data and create standard column names
data_cleaned<-data_import
names(data_cleaned) <- make.names(names(data_cleaned))
data_cleaned$Study.ID<-toupper(data_cleaned$Study.ID)
## Identify duplicate cases by removing exact mataching and keeping duplicate entries with inconsistent data
data_exact<-data_cleaned %>% distinct(Study.ID, Intervention.Mean, Intervention.SD, Intervention.N, Control.Mean, Control.SD, Control.N, .keep_all = TRUE)
data_exact<-data_exact %>% mutate(row.ID = row_number())
data_exact <- data_exact[, c("row.ID", setdiff(names(data_exact), "row.ID"))]
data_fuzzy<-data_cleaned %>% distinct(Study.ID, .keep_all = TRUE) ## Needed for the logic about inconsistent duplicates belowThis data has had exact duplicates removed. However, there are duplicate study entries with inconsistent data, manual data checking is required
## You can manually specify which row(s) to drop below. You should specify the row.IDs for the rows that you want to discard. If you don't wish to discard any rows keep rows_drop<-NULL
rows_drop<-NULL
rows_drop<-c(2,3)
data_manually<- data_exact[!(data_exact$row.ID %in% rows_drop),]Specificed rows (2, 3) have been discarded. This data should be ready to go
data<-data_manually
write.csv(data, "data.csv")