Write a data frame to an SPSS file using the foreign package
Quck Plots
Diagnostic Plots
Recode variables (Incomplete)

Write a data frame to an SPSS file using the foreign package

write.SPSS <- function (fileout) {
  wd <- getwd()
  txt <- paste(wd,"/",fileout,".txt",sep="")
  sps <- paste(wd,"/",fileout,".sps",sep="")
  foreign::write.foreign(eval(parse(text=fileout)),txt,sps,package="SPSS")
  print(paste("SPSS file written to: ",wd))
}

Quck Plots

#Load functions and libraries
g.hist <- function(var,bw=1){
  library(ggplot2)
  data <- as.data.frame(var)
  p1 <- ggplot(data,aes(x=data[,1]))
  p1 <- p1 + geom_histogram(aes(y = ..density..),binwidth=bw)
  p1 <- p1 + stat_function(fun = dnorm, 
                           args = list(mean = mean(data[,1]), sd = sd(data[,1])), 
                           lwd = 1, 
                           col = 'red')
  p1 <- p1 + geom_density(alpha=.2, fill="#FF6666") 
  p1
}

g.freq <- function(var) {
  var <-dat.abq$Beds
  var <- as.data.frame(var)
  library(ggplot2)
  p <- ggplot(data=var)
  p + geom_freqpoly()
  print(p)
}

Diagnostic Plots

lm.diag.plots <- function(fit, rc.mfrow=NA, which.plot=c(1,4,6), outliers=3, sw.order.of.data = FALSE) {
  ## lm.diag.plots() is a general function for plotting residual diagnostics for an lm() object
  ## Arguments:
  # fit          linear model object returned by lm()
  # rc.mfrow     number of rows and columns for the graphic plot, e.g., c(2,3)
  # which.plot   default plot numbers for lm()
  # outliers     number to identify in plots from lm() and qqPlot()
  # sw.order.of.data T/F for whether to show residuals by order of data

  # variable names
  var.names <- names(fit$model)[-1]
  # display settings
  if (is.na(rc.mfrow[1])) {
    rc.mfrow <- c(ceiling((length(var.names) + 3 + length(which.plot) + sw.order.of.data) / 3), 3)
  }
  op <- par(no.readonly = TRUE) # the whole list of settable par
  par(mfrow = rc.mfrow)

  # default: Fitted, Cook's distance (with cutoff), and Leverage (with cutoffs)
  for(i.plot in which.plot) {
    plot(fit, which = i.plot, id.n = outliers)
    if (i.plot == 4) {
      Di.large <- 4 / (dim(fit$model)[1] - dim(fit$model)[2] - 1)
      abline(h = Di.large, col = "blue", lty = 3)  # horizontal line
    }
    if (i.plot == 6) {
      lev.large <- c(2, 3) * dim(fit$model)[2] / dim(fit$model)[1]
      abline(v = lev.large[1], col = "blue", lty = 3)  # horizontal line
      abline(v = lev.large[2], col = "blue", lty = 2)  # horizontal line
    }
  }

  # Evaluate homoscedasticity
  library(car)
  # non-constant error variance test
  print(ncvTest(fit))
  # plot studentized residuals vs. fitted values
  try(spreadLevelPlot(fit, sub = "(Homoscedasticity)"))

  # Evaluate Collinearity
  library(car)
  vif.val <- vif(fit) # variance inflation factors
  dotchart(vif.val, main = "Collinearity", xlab = "Variance Inflation Factor (VIF)", sub = "Not as useful with interactions")
  abline(v = 2^2, col = "blue", lty = 2)  # vertical line

  # Normal quantile plot (QQ-plot)
  library(car)
  qqPlot(fit$residuals, las = 1, id.n = outliers, main="QQ Plot", ylab = "Residuals")

  # Box-Cox transformation suggestion
  # only if all values are positive
  if(min(fit$model[,1] > 0)){
    library(car)
    boxCox(lm.full, lambda = seq(-3,3,length=101), main = "Box-Cox power transformation")
  }

  # residuals vs order of data
  if(sw.order.of.data) {
    # order of data (not always interesting)
    plot(fit$residuals, main="Residuals vs Order of data", ylab = "Residuals")
    abline(h = 0, col = "gray75", lty = 3)  # horizontal line at zero
  }

  # residuals plotted vs each main effect
  for(i.plot in 1:length(var.names)) {
    m.lab <- paste("Residuals vs.", var.names[i.plot])
    plot(fit$model[,var.names[i.plot]], fit$residuals, main=m.lab, ylab = "Residuals", xlab = var.names[i.plot])
    abline(h = 0, col = "gray75", lty = 3)  # horizontal line at zero
  }

  par(op) # reset plotting options

  ## Useful list of diags: http://www.statmethods.net/stats/rdiagnostics.html
} # end of reg.diag.plots()

Recode variables (Incomplete)

#recode variable
recode2 <- function(data,var,isvalue=uniquefx,newvalue,as.n=TRUE,as.f=FALSE) {
  ###debug###
  #data <- CP
  #var <- "partnergender"
  #isvalue <- unique(data[,var])
  #newvalue <- c("female","male",NA,NA)
  #as.n <- FALSE
  #as.f <- TRUE
  ######
  uniquefx <- unique(data[,var])
  string.out <- NULL
    if(as.n==TRUE){
      for (i in 1:length(isvalue)){
      string.out <- c(string.out, paste("'",isvalue[i],"'=",newvalue[i],";",sep=""))
      }
    }
    if(as.n==FALSE){
      for (i in 1:length(isvalue)) {
        string.out <- c(string.out, paste("'",isvalue[i],"'=","'",newvalue[i],"'",";",sep=""))
      }
    }
  s2 <- paste(string.out,sep="", collapse="")
  data[,var] <-car::recode(data[,var],s2, as.numeric.result=as.n, as.factor.result=as.f)
  data
}

##create a list with unique values for each variable
ls.unique <- function(data) {
  ##Debug
  #data <- CP[,147:180]
  ##
  c.name <- colnames(data)
  l1 <- list()[1:length(c.name)]
  names(l1)[1:length(c.name)] <- c.name
  for (i in 1:length(c.name)) {
    l1[[c.name[i]]] <- unique(data[,c.name[i]])
  }
  l1
}

##Converts a list to a data frame where the list has different values
Cor.ls.to.df <- function(ls,empty.cell.val="XXXX") {
  #Debug
  #List <- l1
  #i <- 1
  #empty.cell.val="XXXX"
  maxl <- max(as.data.frame(lapply(ls,length)))
  for(i in 1:length(ls)) {
    dif <- maxl-length(ls[[i]])
    ls[[i]] <- c(ls[[i]],rep(empty.cell.val,dif))
  }
  as.data.frame(ls,stringsAsFactors = FALSE)
}

#Converts data frame back to a list and removes placeholders for unequal 
Cor.df.to.ls <- function(df,empty.cel.val="XXXX") {
  #debug
  #df <- df.1
  ls.df <- as.list(df)
  for (i in 1:length(ls.df)){
    x <- ls.df[[i]] 
    x <- x[x!="XXXX"]
    ls.df[[i]] <- x
  }
  ls.df
}

##Bulk recoding based on Previous
recode.bulk <- function(ls,data,method="none") {
  #i <- 1
  #ls <- values.new
  #data <- CP[,147:180]
  #method<- "none"
  if(method=="none") {n=F ; f=F}
  if(method=="numeric") {n=T; f=F}
  if(method=="factor") {n=F; f=T}
  for(i in 1:ncol(data)){
    data <- recode2(data,colnames(data)[i],newvalue=ls[[i]],as.n=n,as.f=n)
  }
  data
}

#####recode function

#step1 create a list of unique values
#step2 append the list so each has the same length
#step3 convert the list to a data frame
#step4 place the structure of the output on top
#step5 double the data frame <- NULL values creating the empty space
#step6 open the fix window
#step7 save the key file
#step8 recode the data based on the new variables

CUSTOM FUNCTIONS

Eric Kruger

March 13, 2016

Write a data frame to an SPSS file using the foreign package

Quck Plots

Diagnostic Plots

Recode variables (Incomplete)