Write a data frame to an SPSS file using the foreign package
write.SPSS <- function (fileout) {
wd <- getwd()
txt <- paste(wd,"/",fileout,".txt",sep="")
sps <- paste(wd,"/",fileout,".sps",sep="")
foreign::write.foreign(eval(parse(text=fileout)),txt,sps,package="SPSS")
print(paste("SPSS file written to: ",wd))
}
Quck Plots
#Load functions and libraries
g.hist <- function(var,bw=1){
library(ggplot2)
data <- as.data.frame(var)
p1 <- ggplot(data,aes(x=data[,1]))
p1 <- p1 + geom_histogram(aes(y = ..density..),binwidth=bw)
p1 <- p1 + stat_function(fun = dnorm,
args = list(mean = mean(data[,1]), sd = sd(data[,1])),
lwd = 1,
col = 'red')
p1 <- p1 + geom_density(alpha=.2, fill="#FF6666")
p1
}
g.freq <- function(var) {
var <-dat.abq$Beds
var <- as.data.frame(var)
library(ggplot2)
p <- ggplot(data=var)
p + geom_freqpoly()
print(p)
}
Diagnostic Plots
lm.diag.plots <- function(fit, rc.mfrow=NA, which.plot=c(1,4,6), outliers=3, sw.order.of.data = FALSE) {
## lm.diag.plots() is a general function for plotting residual diagnostics for an lm() object
## Arguments:
# fit linear model object returned by lm()
# rc.mfrow number of rows and columns for the graphic plot, e.g., c(2,3)
# which.plot default plot numbers for lm()
# outliers number to identify in plots from lm() and qqPlot()
# sw.order.of.data T/F for whether to show residuals by order of data
# variable names
var.names <- names(fit$model)[-1]
# display settings
if (is.na(rc.mfrow[1])) {
rc.mfrow <- c(ceiling((length(var.names) + 3 + length(which.plot) + sw.order.of.data) / 3), 3)
}
op <- par(no.readonly = TRUE) # the whole list of settable par
par(mfrow = rc.mfrow)
# default: Fitted, Cook's distance (with cutoff), and Leverage (with cutoffs)
for(i.plot in which.plot) {
plot(fit, which = i.plot, id.n = outliers)
if (i.plot == 4) {
Di.large <- 4 / (dim(fit$model)[1] - dim(fit$model)[2] - 1)
abline(h = Di.large, col = "blue", lty = 3) # horizontal line
}
if (i.plot == 6) {
lev.large <- c(2, 3) * dim(fit$model)[2] / dim(fit$model)[1]
abline(v = lev.large[1], col = "blue", lty = 3) # horizontal line
abline(v = lev.large[2], col = "blue", lty = 2) # horizontal line
}
}
# Evaluate homoscedasticity
library(car)
# non-constant error variance test
print(ncvTest(fit))
# plot studentized residuals vs. fitted values
try(spreadLevelPlot(fit, sub = "(Homoscedasticity)"))
# Evaluate Collinearity
library(car)
vif.val <- vif(fit) # variance inflation factors
dotchart(vif.val, main = "Collinearity", xlab = "Variance Inflation Factor (VIF)", sub = "Not as useful with interactions")
abline(v = 2^2, col = "blue", lty = 2) # vertical line
# Normal quantile plot (QQ-plot)
library(car)
qqPlot(fit$residuals, las = 1, id.n = outliers, main="QQ Plot", ylab = "Residuals")
# Box-Cox transformation suggestion
# only if all values are positive
if(min(fit$model[,1] > 0)){
library(car)
boxCox(lm.full, lambda = seq(-3,3,length=101), main = "Box-Cox power transformation")
}
# residuals vs order of data
if(sw.order.of.data) {
# order of data (not always interesting)
plot(fit$residuals, main="Residuals vs Order of data", ylab = "Residuals")
abline(h = 0, col = "gray75", lty = 3) # horizontal line at zero
}
# residuals plotted vs each main effect
for(i.plot in 1:length(var.names)) {
m.lab <- paste("Residuals vs.", var.names[i.plot])
plot(fit$model[,var.names[i.plot]], fit$residuals, main=m.lab, ylab = "Residuals", xlab = var.names[i.plot])
abline(h = 0, col = "gray75", lty = 3) # horizontal line at zero
}
par(op) # reset plotting options
## Useful list of diags: http://www.statmethods.net/stats/rdiagnostics.html
} # end of reg.diag.plots()
Recode variables (Incomplete)
#recode variable
recode2 <- function(data,var,isvalue=uniquefx,newvalue,as.n=TRUE,as.f=FALSE) {
###debug###
#data <- CP
#var <- "partnergender"
#isvalue <- unique(data[,var])
#newvalue <- c("female","male",NA,NA)
#as.n <- FALSE
#as.f <- TRUE
######
uniquefx <- unique(data[,var])
string.out <- NULL
if(as.n==TRUE){
for (i in 1:length(isvalue)){
string.out <- c(string.out, paste("'",isvalue[i],"'=",newvalue[i],";",sep=""))
}
}
if(as.n==FALSE){
for (i in 1:length(isvalue)) {
string.out <- c(string.out, paste("'",isvalue[i],"'=","'",newvalue[i],"'",";",sep=""))
}
}
s2 <- paste(string.out,sep="", collapse="")
data[,var] <-car::recode(data[,var],s2, as.numeric.result=as.n, as.factor.result=as.f)
data
}
##create a list with unique values for each variable
ls.unique <- function(data) {
##Debug
#data <- CP[,147:180]
##
c.name <- colnames(data)
l1 <- list()[1:length(c.name)]
names(l1)[1:length(c.name)] <- c.name
for (i in 1:length(c.name)) {
l1[[c.name[i]]] <- unique(data[,c.name[i]])
}
l1
}
##Converts a list to a data frame where the list has different values
Cor.ls.to.df <- function(ls,empty.cell.val="XXXX") {
#Debug
#List <- l1
#i <- 1
#empty.cell.val="XXXX"
maxl <- max(as.data.frame(lapply(ls,length)))
for(i in 1:length(ls)) {
dif <- maxl-length(ls[[i]])
ls[[i]] <- c(ls[[i]],rep(empty.cell.val,dif))
}
as.data.frame(ls,stringsAsFactors = FALSE)
}
#Converts data frame back to a list and removes placeholders for unequal
Cor.df.to.ls <- function(df,empty.cel.val="XXXX") {
#debug
#df <- df.1
ls.df <- as.list(df)
for (i in 1:length(ls.df)){
x <- ls.df[[i]]
x <- x[x!="XXXX"]
ls.df[[i]] <- x
}
ls.df
}
##Bulk recoding based on Previous
recode.bulk <- function(ls,data,method="none") {
#i <- 1
#ls <- values.new
#data <- CP[,147:180]
#method<- "none"
if(method=="none") {n=F ; f=F}
if(method=="numeric") {n=T; f=F}
if(method=="factor") {n=F; f=T}
for(i in 1:ncol(data)){
data <- recode2(data,colnames(data)[i],newvalue=ls[[i]],as.n=n,as.f=n)
}
data
}
#####recode function
#step1 create a list of unique values
#step2 append the list so each has the same length
#step3 convert the list to a data frame
#step4 place the structure of the output on top
#step5 double the data frame <- NULL values creating the empty space
#step6 open the fix window
#step7 save the key file
#step8 recode the data based on the new variables