QUALITY CONTROL 3.2 R version 3.5.1 (2018-07-02) – “Feather Spray” Original script developed in August, 2016
This script is set up to run for user-assigned market and segment with minimal manual adjustment. The following files should be in the same folder: reference Excel file, csv files for each market.
### Set working directory. The path will vary from user to user.
setwd(choose.dir())
### Load required packages. The script checks and installs the packages, if necessary.
### Function borrowed from Steven Worthington (https://gist.github.com/stevenworthington/3178163)
ipak <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if (length(new.pkg))
install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
packages <- c("rio", "reshape", "reshape2", "data.table", "dplyr", "svDialogs")
ipak(packages)
### Load the Excel file used to check the data entered in SPSS
reference.file <- file.choose()
REF.DATA.QC <- suppressWarnings(import(reference.file))
head(REF.DATA.QC)
### Remove punctuation from column names
names(REF.DATA.QC) <- gsub(pattern = "[[:punct:]]", names(REF.DATA.QC), replacement = "")
### Trim to select the relevant data: Segment, Brand, KPI, and Quarter
NameList <- c("Segment","BrandCategory ", "KPI1", "Q2 2006")
idx <- match(NameList, names(REF.DATA.QC))
idx <- sort(idx)
QC.Q2.2006 <- REF.DATA.QC[,idx]
### Assign user-friendly column/variable names
BetterNames <- c("Segment", "Product", "Measure", "Value")
colnames(QC.Q2.2006) <- BetterNames
head(QC.Q2.2006)
### Subset data broken down by segment
seg.lab <- dlgInput(message = "Enter a product label exactly as it appears in the reference file", default = "")
SEGMENT.Subset <- QC.Q2.2006[grep(seg.lab$res, QC.Q2.2006$Segment), ]
head(SEGMENT.Subset)
### Convert "long" data to "wide" data; clean up column names; remove
SEGMENT.refdat <- dcast(SEGMENT.Subset, Product ~ Measure)
head(SEGMENT.refdat)
summary(SEGMENT.refdat)
### Clean up column names
ColumnNames <- names(SEGMENT.refdat)
colnames(SEGMENT.refdat) <- gsub("I - ", "", ColumnNames)
ColumnNames <- names(SEGMENT.refdat)
colnames(SEGMENT.refdat) <- gsub("KM - ", "", ColumnNames)
names(SEGMENT.refdat) <- tolower(names(SEGMENT.refdat))
### Remove unnecessary rows
SEGMENT.refdat <- SEGMENT.refdat[!grepl("vs.", SEGMENT.refdat$product),]
product <- SEGMENT.refdat[,1]
SEGMENT.refdat <- SEGMENT.refdat[,-1]
rownames(SEGMENT.refdat) <- product
SEGMENT.refdat <- SEGMENT.refdat[ , order(names(SEGMENT.refdat))]
### Read SPSS Exported Excel File and remove last 10 rows of unnecessary data.
SPSS.file <- file.choose()
SPSS.SEGMENT <- import(SPSS.file)
n<-dim(SPSS.SEGMENT)[1]
SPSS.SEGMENT<-SPSS.SEGMENT[1:(n-10),]
head(SPSS.SEGMENT)
### Clean up data frame
SPSS.SEGMENT <- SPSS.SEGMENT[-2,]
colnames(SPSS.SEGMENT) <- SPSS.SEGMENT[1,]
SPSS.SEGMENT <- SPSS.SEGMENT[-1,]
names(SPSS.SEGMENT) <- tolower(names(SPSS.SEGMENT))
head(SPSS.SEGMENT)
tail(SPSS.SEGMENT)
dim(SPSS.SEGMENT)
### Assign row names and convert column names to lower case
SPSS.SEGMENT <- subset(SPSS.SEGMENT, select=colMeans(is.na(SPSS.SEGMENT)) == 0)
names(SPSS.SEGMENT)[1] <- "product"
SPSS.SEGMENT <- sort_df(SPSS.SEGMENT, vars = "product")
product.names <- SPSS.SEGMENT[,1]
SPSS.SEGMENT <- SPSS.SEGMENT[-1]
rownames(SPSS.SEGMENT) <- product.names
names(SPSS.SEGMENT) <- tolower(names(SPSS.SEGMENT))
### Convert data to numeric.
SPSS.SEGMENT <- as.matrix(SPSS.SEGMENT)
mode(SPSS.SEGMENT) <- "numeric"
SPSS.SEGMENT <- as.data.frame(SPSS.SEGMENT)
### Calculate average. The reference data average excludes the reference vehicles.
SPSS.SEGMENT1 <- SPSS.SEGMENT
average <- colMeans((SPSS.SEGMENT1),0)
SPSS.SEGMENT <- rbind(SPSS.SEGMENT, average)
n <- length(row.names(SPSS.SEGMENT))
row.names(SPSS.SEGMENT)[n] <- "Average"
### Round to 0.
SPSS.SEGMENT <- round(SPSS.SEGMENT, 0)
SPSS.SEGMENT <- SPSS.SEGMENT[ , order(names(SPSS.SEGMENT))]
### Compare product names. No need to update names unless row order is different
reference.products <- row.names(SEGMENT.refdat)
SPSS.products <- row.names(SPSS.SEGMENT)
SPSS.products <- sort(SPSS.products)
which(reference.products != SPSS.products)
### Fix different column names
SEGMENT.names <- names(SEGMENT.refdat)
SPSS.names <- names(SPSS.SEGMENT)
SEGMENT.names
SPSS.names
SEGMENT.names <- gsub(pattern = "[[:punct:]]", SEGMENT.names, replacement = "")
SEGMENT.names <- gsub("awareness", "total awareness", SEGMENT.names)
SEGMENT.names <- gsub("familiarity2", "familiarity", SEGMENT.names)
SEGMENT.names <- gsub("versatile", "practical", SEGMENT.names)
SPSS.names <- gsub(pattern = "[[:punct:]]", SPSS.names, replacement = "")
SPSS.names <- gsub("mean", SPSS.names, replacement = "familiarity")
SPSS.names <- gsub("versatile", "practical", SPSS.names)
SPSS.names <- gsub("definitely not consider 1", "notconsider", SPSS.names)
SPSS.names <- gsub("total count", "totalcount", SPSS.names)
names(SEGMENT.refdat) <- SEGMENT.names
names(SPSS.SEGMENT) <- SPSS.names
### Remove extra columns "notconsider,", "none,", "totalcount"
SPSS.SEGMENT <- subset(SPSS.SEGMENT, select = -c(notconsider, none, totalcount))
names(SPSS.SEGMENT) ; names(SEGMENT.refdat)
### Resort by column and row names
SEGMENT.refdat <- SEGMENT.refdat[ , order(names(SEGMENT.refdat))]
SPSS.SEGMENT <- SPSS.SEGMENT[ , order(names(SPSS.SEGMENT))]
rn <- rownames(SPSS.SEGMENT)
SPSS.SEGMENT <- SPSS.SEGMENT[order(rn),]
View(SEGMENT.refdat) ; View(SPSS.SEGMENT)
accuracy.test <- anti_join(SEGMENT.refdat, SPSS.SEGMENT)
### Export comparison data in "long" format
invisible(setDT(SEGMENT.refdat, keep.rownames = TRUE)[])
invisible(setDT(SPSS.SEGMENT, keep.rownames = TRUE)[])
ref.data.melt <- melt.data.frame(SEGMENT.refdat)
SPSS.data.melt <- melt.data.frame(SPSS.SEGMENT)
comp.data <- merge(ref.data.melt, SPSS.data.melt, by = c("rn", "variable"))
comp.column <- comp.data$value.x - comp.data$value.y
comp.column <- ifelse(comp.column == 0, "SAME", "DIFFERENT")
comp.data <- cbind(comp.data, comp.column)
comp.data.names <- c("Brand/Product", "KPI", "Reference Data", "SPSS Data", "Comparison")
names(comp.data) <- comp.data.names
write.csv (comp.data, file=paste0("Comparison ", gsub(pattern = "[[:punct:]]", seg.lab$res, replacement = ""), ".csv"))
final.message <- ifelse(length(accuracy.test[,1]) == 0, "REFERENCE DATA AND SPSS DATA ARE IDENTICAL",
"DIFFERENCES BETWEEN REFERENCE DATA AND SPSS DATA. REVIEW RELEVANT COMPARISON.csv")
dlgMessage(final.message)$res
Complete quality control review or take corrective action depending on the results from the test. The output COMPARISON.csv contains frequency columns with the reference data and the data imported into SPSS. Each row is marked as “SAME” or “DIFFERENT” depending on whether the frequency match. If there are no differences, the quality control process is complete. If “DIFFERENT” appears anywhere in the file, corrective action need to take place in the data processing and this script can be run again.