sjPlot R-package

Description

Collection of several plotting functions for data visualization using ggplot. Results of several statistical analyses (that are commonly used in social sciences) can be visualized using this package, including simple and cross tabulated frequencies, histograms, box plots, (generalized) linear models (forest plots), PCA, correlations etc.

Furthermore, this package contains some tools that are useful when carrying out data analysis or interpreting data, for instance data set import, variable recoding, determination of cluster groups, interpretation of interactions in linear models etc.

Requirements

The sjPlot package depends on following R packages that have to be installed prior to the installtion of sjPlot.

car
ggplot2
lmtest
MASS
plyr
quantreg
reshape2
scales

Use following script to install only those required packages that are missing:

depends <- c("MASS", "car", "foreign", "ggplot2", "plyr", "lmtest", "reshape2", 
    "scales", "quantreg")
for (i in 1:length(depends)) {
    if ((depends[i] %in% rownames(installed.packages())) == FALSE) {
        cat(paste0("Installing missing package \"", depends[i], "\"...\n"))
        install.packages(depends[i])
    } else {
        cat(paste0("Package \"", depends[i], "\" already installed...\n"))
    }
}

## Package "MASS" already installed...
## Package "car" already installed...
## Package "foreign" already installed...
## Package "ggplot2" already installed...
## Package "plyr" already installed...
## Package "lmtest" already installed...
## Package "reshape2" already installed...
## Package "scales" already installed...
## Package "quantreg" already installed...

install.packages("C:/Users/Mark/Downloads/sjPlot_0.7.tar.gz", repos = NULL, 
    type = "source")

library(sjPlot)
data(efc)

sji.convertToLabel Replaces variable values with their associated value labels

print(sji.getValueLabels(efc)["c161sex"])

## $c161sex
## [1] "Male"   "Female"

head(efc$c161sex)

## [1] 2 2 1 1 2 1

head(sji.convertToLabel(efc$c161sex))

## [1] Female Female Male   Male   Female Male  
## Levels: Female Male


print(sji.getValueLabels(efc)["e42dep"])

## $e42dep
## [1] "independent"          "slightly dependent"   "moderately dependent"
## [4] "severely dependent"

table(efc$e42dep)

## 
##   1   2   3   4 
##  66 225 306 304

table(sji.convertToLabel(efc$e42dep))

## 
##          independent moderately dependent   severely dependent 
##                   66                  306                  304 
##   slightly dependent 
##                  225

sji.convertToValue Converts factors to numeric variables

test <- sji.convertToLabel(efc$e42dep)
table(test)

## test
##          independent moderately dependent   severely dependent 
##                   66                  306                  304 
##   slightly dependent 
##                  225


table(sji.convertToValue(test))

## 
##   1   2   3   4 
##  66 306 304 225

hist(sji.convertToValue(test, 0))

plot of chunk unnamed-chunk-5

sjp.aov1 Plot One-Way-Anova tables

sjp.aov1(efc$c12hour, as.factor(efc$e42dep))

plot of chunk unnamed-chunk-6


data(efc)
efc.val <- sji.getValueLabels(efc)
efc.var <- sji.getVariableLabels(efc)
sjp.aov1(efc$c12hour, as.factor(efc$e42dep), axisLabels.y = efc.val["e42dep"], 
    axisTitle.x = efc.var[["c12hour"]])

plot of chunk unnamed-chunk-6


sjp.aov1(efc$c12hour, as.factor(efc$e42dep), axisLabels.y = efc.val["e42dep"], 
    title = efc.var[["c12hour"]], type = "bars", meansums = TRUE, hideErrorBars = TRUE, 
    theme = "minimal", minorGridColor = "white", showTickMarks = FALSE, showModelSummary = FALSE, 
    hideGrid.x = TRUE)

plot of chunk unnamed-chunk-6

sjp.chi2 Plot Pearsons Chi2-Test of multiple contingency tables

# create data frame with 5 dichotomous (dummy) variables
df <- data.frame(as.factor(sample(1:2, 100, replace = TRUE)), as.factor(sample(1:2, 
    100, replace = TRUE)), as.factor(sample(1:2, 100, replace = TRUE)), as.factor(sample(1:2, 
    100, replace = TRUE)), as.factor(sample(1:2, 100, replace = TRUE)))

str(df)

## 'data.frame':    100 obs. of  5 variables:
##  $ as.factor.sample.1.2..100..replace...TRUE..  : Factor w/ 2 levels "1","2": 1 2 1 2 1 2 1 1 1 2 ...
##  $ as.factor.sample.1.2..100..replace...TRUE...1: Factor w/ 2 levels "1","2": 2 1 1 1 1 1 2 1 1 2 ...
##  $ as.factor.sample.1.2..100..replace...TRUE...2: Factor w/ 2 levels "1","2": 2 1 2 1 1 1 2 2 1 2 ...
##  $ as.factor.sample.1.2..100..replace...TRUE...3: Factor w/ 2 levels "1","2": 2 2 1 1 2 2 1 2 1 2 ...
##  $ as.factor.sample.1.2..100..replace...TRUE...4: Factor w/ 2 levels "1","2": 1 1 1 1 2 1 2 2 1 1 ...

# create variable labels
items <- list(c("Item 1", "Item 2", "Item 3", "Item 4", "Item 5"))

# plot Chi2-contingency-table
sjp.chi2(df, axisLabels = items)

plot of chunk unnamed-chunk-7

sjp.corr Plot correlation matrix

# create data frame with 5 random variables
df <- as.data.frame(cbind(rnorm(10), rnorm(10), rnorm(10), rnorm(10), rnorm(10)))

# plot correlation matrix using circles
sjp.corr(df)

## Warning: the condition has length > 1 and only the first element will be used
## Warning: the condition has length > 1 and only the first element will be used

## [1] "Computing correlation using spearman-method with listwise-deletion..."

plot of chunk unnamed-chunk-8


# plot correlation matrix using square tiles without diagram background
sjp.corr(df, type = "tile", theme = "none")

## Warning: the condition has length > 1 and only the first element will be used
## Warning: the condition has length > 1 and only the first element will be used

## [1] "Computing correlation using spearman-method with listwise-deletion..."

plot of chunk unnamed-chunk-8


# ------------------------------- Data from the EUROFAMCARE sample dataset
# -------------------------------
data(efc)

# retrieve variable and value labels
varlabs <- sji.getVariableLabels(efc)

# recveive first item of COPE-index scale
start <- which(colnames(efc) == "c83cop2")
start

## [1] 7


# recveive first item of COPE-index scale
end <- which(colnames(efc) == "c88cop7")
end

## [1] 12


# create data frame with COPE-index scale
df <- as.data.frame(efc[, c(start:end)])
colnames(df) <- varlabs[c(start:end)]

# we have high correlations here, because all items belong to one factor.
# See example from 'sjp.pca'.
sjp.corr(df, type = "tile", theme = "none", outlineColor = "white", hideLegend = FALSE)

## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: the condition has length > 1 and only the first element will be used
## Warning: the condition has length > 1 and only the first element will be used

## [1] "Computing correlation using spearman-method with listwise-deletion..."

plot of chunk unnamed-chunk-8

sjp.frq Plot frequencies of (count) variables

# boxplot
sjp.frq(ChickWeight$weight, type = "box")

plot of chunk unnamed-chunk-9


# histogram
sjp.frq(discoveries, type = "hist", showMeanIntercept = TRUE)

plot of chunk unnamed-chunk-9


# histogram with minimal theme
sjp.frq(discoveries, type = "hist", showMeanIntercept = TRUE, theme = "minimal", 
    minorGridColor = "white", showTickMarks = FALSE, hideGrid.x = TRUE)

plot of chunk unnamed-chunk-9


# violin plot
sjp.frq(ChickWeight$weight, type = "v")

plot of chunk unnamed-chunk-9


# bar plot
sjp.frq(ChickWeight$Diet)

plot of chunk unnamed-chunk-9

sjp.frq(ChickWeight$Diet, maxYlim = TRUE)

plot of chunk unnamed-chunk-9


# bar plot with EUROFAMCARE sample dataset dataset was importet from an
# SPSS-file, using: efc <- sji.SPSS('efc.sav', enc='UTF-8')
data(efc)
efc.val <- sji.getValueLabels(efc)
efc.var <- sji.getVariableLabels(efc)
sjp.frq(as.factor(efc$e15relat), title = efc.var[["e15relat"]], axisLabels.x = efc.val["e15relat"], 
    axisLabelAngle.x = 90)

plot of chunk unnamed-chunk-9


# bar plot with EUROFAMCARE sample dataset grouped variable
ageGrp <- sju.groupVar(efc$e17age)
ageGrpLab <- sju.groupVarLabels(efc$e17age)
sjp.frq(ageGrp, title = efc.var[["e17age"]], axisLabels.x = ageGrpLab)

plot of chunk unnamed-chunk-9


# minimal theme
sjp.frq(ageGrp, title = efc.var[["e17age"]], axisLabels.x = ageGrpLab, theme = "minimal", 
    minorGridColor = "white", showTickMarks = FALSE, hideGrid.x = TRUE)

plot of chunk unnamed-chunk-9


# box plots with interaction variable the following example is equal to the
# function call sjp.grpfrq(efc$e17age, efc$e16sex, type='box')
sjp.frq(efc$e17age, title = paste(efc.var[["e17age"]], "by", efc.var[["e16sex"]]), 
    interactionVar = efc$e16sex, interactionVarLabels = efc.val["e16sex"], type = "box")

plot of chunk unnamed-chunk-9

sjp.glm Plot odds ratios (forest plots)

# prepare dichotomous dependent variable
y <- ifelse(swiss$Fertility < median(swiss$Fertility), 0, 1)

# fit model
fitOR <- glm(y ~ swiss$Education + swiss$Examination + swiss$Infant.Mortality + 
    swiss$Catholic, family = binomial)

# print Odds Ratios as dots
sjp.glm(fitOR)

## Waiting for profiling to be done...

## Intercept = 0.02
## R2[cs] = 0.395
## R2[n] = 0.527
## Lambda = 41.50
## Chi2 = 0.00
## AIC = 51.50

plot of chunk unnamed-chunk-10


# print Odds Ratios as bars
sjp.glm(fitOR, type = "bars")

## Waiting for profiling to be done...

## Intercept = 0.02
## R2[cs] = 0.395
## R2[n] = 0.527
## Lambda = 41.50
## Chi2 = 0.00
## AIC = 51.50

plot of chunk unnamed-chunk-10


# ------------------------------- Predictors for negative impact of care.
# Data from the EUROFAMCARE sample dataset -------------------------------
data(efc)

# retrieve predictor variable labels
labs <- sji.getVariableLabels(efc)
predlab <- c(labs[["c161sex"]], labs[["e42dep"]], paste0(labs[["c172code"]], 
    " (mid)"), paste0(labs[["c172code"]], " (high)"))

# create binary response
y <- ifelse(efc$neg_c_7 < median(na.omit(efc$neg_c_7)), 0, 1)

# create dummy variables for educational status
edu.mid <- ifelse(efc$c172code == 2, 1, 0)
edu.high <- ifelse(efc$c172code == 3, 1, 0)

# create data frame for fitted model
df <- na.omit(as.data.frame(cbind(y, as.factor(efc$c161sex), as.factor(efc$e42dep), 
    as.factor(edu.mid), as.factor(edu.high))))

# fit model
fit <- glm(y ~ ., data = df, family = binomial(link = "logit"))

# plot odds
sjp.glm(fit, title = labs[["neg_c_7"]], axisLabels.y = predlab)

## Waiting for profiling to be done...

## Intercept = 0.02
## R2[cs] = 0.161
## R2[n] = 0.215
## Lambda = 1000.04
## Chi2 = 0.00
## AIC = 1010.04

plot of chunk unnamed-chunk-10

sjp.glm.ma Plot model assumptions of glms

# prepare dichotomous dependent variable
y <- ifelse(swiss$Fertility < median(swiss$Fertility), 0, 1)

# fit model
fitOR <- glm(y ~ swiss$Education + swiss$Examination + swiss$Infant.Mortality + 
    swiss$Catholic, family = binomial)

# plot model assumptions
sjp.glm.ma(fitOR)

## 
## Removed 1 cases during 1 step(s).
## AIC-value of original model: 51.50
## AIC-value of updated model: 46.41

plot of chunk unnamed-chunk-11

## 
## --------------------
## Check significance of terms when they entered the model...
## Anova original model:
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: y
## 
## Terms added sequentially (first to last)
## 
## 
##                        Df Deviance Resid. Df Resid. Dev Pr(>Chi)   
## NULL                                      46       65.1            
## swiss$Education         1     9.21        45       55.9   0.0024 **
## swiss$Examination       1     9.30        44       46.6   0.0023 **
## swiss$Infant.Mortality  1     4.56        43       42.1   0.0327 * 
## swiss$Catholic          1     0.56        42       41.5   0.4547   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Waiting for profiling to be done...

plot of chunk unnamed-chunk-11

## Intercept = 0.02
## R2[cs] = 0.395
## R2[n] = 0.527
## Lambda = 41.50
## Chi2 = 0.00
## AIC = 51.50

plot of chunk unnamed-chunk-11

## 
## Call:  glm(formula = y ~ swiss$Education + swiss$Examination + swiss$Infant.Mortality + 
##     swiss$Catholic, family = binomial, subset = -c(vars))
## 
## Coefficients:
##            (Intercept)         swiss$Education       swiss$Examination  
##                -3.6914                 -0.0396                 -0.1818  
## swiss$Infant.Mortality          swiss$Catholic  
##                 0.3332                  0.0146  
## 
## Degrees of Freedom: 45 Total (i.e. Null);  41 Residual
## Null Deviance:       63.7 
## Residual Deviance: 36.4  AIC: 46.4

sjp.grpfrq Plot grouped or stacked frequencies

# histogram plot
sjp.grpfrq(discoveries, sample(1:3, length(discoveries), replace = TRUE), type = "hist", 
    showValueLabels = FALSE)