sjPlot R-package

sjPlot R-package

Description

Collection of several plotting functions for data visualization using ggplot. Results of several statistical analyses (that are commonly used in social sciences) can be visualized using this package, including simple and cross tabulated frequencies, histograms, box plots, (generalized) linear models (forest plots), PCA, correlations etc.

Furthermore, this package contains some tools that are useful when carrying out data analysis or interpreting data, for instance data set import, variable recoding, determination of cluster groups, interpretation of interactions in linear models etc.

Requirements

The sjPlot package depends on following R packages that have to be installed prior to the installtion of sjPlot.

Use following script to install only those required packages that are missing:

depends <- c("MASS", "car", "foreign", "ggplot2", "plyr", "lmtest", "reshape2", 
    "scales", "quantreg")
for (i in 1:length(depends)) {
    if ((depends[i] %in% rownames(installed.packages())) == FALSE) {
        cat(paste0("Installing missing package \"", depends[i], "\"...\n"))
        install.packages(depends[i])
    } else {
        cat(paste0("Package \"", depends[i], "\" already installed...\n"))
    }
}
## Package "MASS" already installed...
## Package "car" already installed...
## Package "foreign" already installed...
## Package "ggplot2" already installed...
## Package "plyr" already installed...
## Package "lmtest" already installed...
## Package "reshape2" already installed...
## Package "scales" already installed...
## Package "quantreg" already installed...
install.packages("C:/Users/Mark/Downloads/sjPlot_0.7.tar.gz", repos = NULL, 
    type = "source")
library(sjPlot)
data(efc)

sji.convertToLabel Replaces variable values with their associated value labels

print(sji.getValueLabels(efc)["c161sex"])
## $c161sex
## [1] "Male"   "Female"
head(efc$c161sex)
## [1] 2 2 1 1 2 1
head(sji.convertToLabel(efc$c161sex))
## [1] Female Female Male   Male   Female Male  
## Levels: Female Male

print(sji.getValueLabels(efc)["e42dep"])
## $e42dep
## [1] "independent"          "slightly dependent"   "moderately dependent"
## [4] "severely dependent"
table(efc$e42dep)
## 
##   1   2   3   4 
##  66 225 306 304
table(sji.convertToLabel(efc$e42dep))
## 
##          independent moderately dependent   severely dependent 
##                   66                  306                  304 
##   slightly dependent 
##                  225

sji.convertToValue Converts factors to numeric variables

test <- sji.convertToLabel(efc$e42dep)
table(test)
## test
##          independent moderately dependent   severely dependent 
##                   66                  306                  304 
##   slightly dependent 
##                  225

table(sji.convertToValue(test))
## 
##   1   2   3   4 
##  66 306 304 225
hist(sji.convertToValue(test, 0))

plot of chunk unnamed-chunk-5

sjp.aov1 Plot One-Way-Anova tables

sjp.aov1(efc$c12hour, as.factor(efc$e42dep))

plot of chunk unnamed-chunk-6


data(efc)
efc.val <- sji.getValueLabels(efc)
efc.var <- sji.getVariableLabels(efc)
sjp.aov1(efc$c12hour, as.factor(efc$e42dep), axisLabels.y = efc.val["e42dep"], 
    axisTitle.x = efc.var[["c12hour"]])

plot of chunk unnamed-chunk-6


sjp.aov1(efc$c12hour, as.factor(efc$e42dep), axisLabels.y = efc.val["e42dep"], 
    title = efc.var[["c12hour"]], type = "bars", meansums = TRUE, hideErrorBars = TRUE, 
    theme = "minimal", minorGridColor = "white", showTickMarks = FALSE, showModelSummary = FALSE, 
    hideGrid.x = TRUE)

plot of chunk unnamed-chunk-6

sjp.chi2 Plot Pearson’s Chi2-Test of multiple contingency tables

# create data frame with 5 dichotomous (dummy) variables
df <- data.frame(as.factor(sample(1:2, 100, replace = TRUE)), as.factor(sample(1:2, 
    100, replace = TRUE)), as.factor(sample(1:2, 100, replace = TRUE)), as.factor(sample(1:2, 
    100, replace = TRUE)), as.factor(sample(1:2, 100, replace = TRUE)))

str(df)
## 'data.frame':    100 obs. of  5 variables:
##  $ as.factor.sample.1.2..100..replace...TRUE..  : Factor w/ 2 levels "1","2": 1 2 1 2 1 2 1 1 1 2 ...
##  $ as.factor.sample.1.2..100..replace...TRUE...1: Factor w/ 2 levels "1","2": 2 1 1 1 1 1 2 1 1 2 ...
##  $ as.factor.sample.1.2..100..replace...TRUE...2: Factor w/ 2 levels "1","2": 2 1 2 1 1 1 2 2 1 2 ...
##  $ as.factor.sample.1.2..100..replace...TRUE...3: Factor w/ 2 levels "1","2": 2 2 1 1 2 2 1 2 1 2 ...
##  $ as.factor.sample.1.2..100..replace...TRUE...4: Factor w/ 2 levels "1","2": 1 1 1 1 2 1 2 2 1 1 ...
# create variable labels
items <- list(c("Item 1", "Item 2", "Item 3", "Item 4", "Item 5"))

# plot Chi2-contingency-table
sjp.chi2(df, axisLabels = items)

plot of chunk unnamed-chunk-7

sjp.corr Plot correlation matrix

# create data frame with 5 random variables
df <- as.data.frame(cbind(rnorm(10), rnorm(10), rnorm(10), rnorm(10), rnorm(10)))

# plot correlation matrix using circles
sjp.corr(df)
## Warning: the condition has length > 1 and only the first element will be used
## Warning: the condition has length > 1 and only the first element will be used
## [1] "Computing correlation using spearman-method with listwise-deletion..."

plot of chunk unnamed-chunk-8


# plot correlation matrix using square tiles without diagram background
sjp.corr(df, type = "tile", theme = "none")
## Warning: the condition has length > 1 and only the first element will be used
## Warning: the condition has length > 1 and only the first element will be used
## [1] "Computing correlation using spearman-method with listwise-deletion..."

plot of chunk unnamed-chunk-8


# ------------------------------- Data from the EUROFAMCARE sample dataset
# -------------------------------
data(efc)

# retrieve variable and value labels
varlabs <- sji.getVariableLabels(efc)

# recveive first item of COPE-index scale
start <- which(colnames(efc) == "c83cop2")
start
## [1] 7

# recveive first item of COPE-index scale
end <- which(colnames(efc) == "c88cop7")
end
## [1] 12

# create data frame with COPE-index scale
df <- as.data.frame(efc[, c(start:end)])
colnames(df) <- varlabs[c(start:end)]

# we have high correlations here, because all items belong to one factor.
# See example from 'sjp.pca'.
sjp.corr(df, type = "tile", theme = "none", outlineColor = "white", hideLegend = FALSE)
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: the condition has length > 1 and only the first element will be used
## Warning: the condition has length > 1 and only the first element will be used
## [1] "Computing correlation using spearman-method with listwise-deletion..."

plot of chunk unnamed-chunk-8

sjp.frq Plot frequencies of (count) variables

# boxplot
sjp.frq(ChickWeight$weight, type = "box")

plot of chunk unnamed-chunk-9


# histogram
sjp.frq(discoveries, type = "hist", showMeanIntercept = TRUE)

plot of chunk unnamed-chunk-9


# histogram with minimal theme
sjp.frq(discoveries, type = "hist", showMeanIntercept = TRUE, theme = "minimal", 
    minorGridColor = "white", showTickMarks = FALSE, hideGrid.x = TRUE)

plot of chunk unnamed-chunk-9


# violin plot
sjp.frq(ChickWeight$weight, type = "v")

plot of chunk unnamed-chunk-9


# bar plot
sjp.frq(ChickWeight$Diet)

plot of chunk unnamed-chunk-9

sjp.frq(ChickWeight$Diet, maxYlim = TRUE)

plot of chunk unnamed-chunk-9


# bar plot with EUROFAMCARE sample dataset dataset was importet from an
# SPSS-file, using: efc <- sji.SPSS('efc.sav', enc='UTF-8')
data(efc)
efc.val <- sji.getValueLabels(efc)
efc.var <- sji.getVariableLabels(efc)
sjp.frq(as.factor(efc$e15relat), title = efc.var[["e15relat"]], axisLabels.x = efc.val["e15relat"], 
    axisLabelAngle.x = 90)

plot of chunk unnamed-chunk-9


# bar plot with EUROFAMCARE sample dataset grouped variable
ageGrp <- sju.groupVar(efc$e17age)
ageGrpLab <- sju.groupVarLabels(efc$e17age)
sjp.frq(ageGrp, title = efc.var[["e17age"]], axisLabels.x = ageGrpLab)

plot of chunk unnamed-chunk-9


# minimal theme
sjp.frq(ageGrp, title = efc.var[["e17age"]], axisLabels.x = ageGrpLab, theme = "minimal", 
    minorGridColor = "white", showTickMarks = FALSE, hideGrid.x = TRUE)

plot of chunk unnamed-chunk-9


# box plots with interaction variable the following example is equal to the
# function call sjp.grpfrq(efc$e17age, efc$e16sex, type='box')
sjp.frq(efc$e17age, title = paste(efc.var[["e17age"]], "by", efc.var[["e16sex"]]), 
    interactionVar = efc$e16sex, interactionVarLabels = efc.val["e16sex"], type = "box")

plot of chunk unnamed-chunk-9

sjp.glm Plot odds ratios (forest plots)

# prepare dichotomous dependent variable
y <- ifelse(swiss$Fertility < median(swiss$Fertility), 0, 1)

# fit model
fitOR <- glm(y ~ swiss$Education + swiss$Examination + swiss$Infant.Mortality + 
    swiss$Catholic, family = binomial)

# print Odds Ratios as dots
sjp.glm(fitOR)
## Waiting for profiling to be done...
## Intercept = 0.02
## R2[cs] = 0.395
## R2[n] = 0.527
## Lambda = 41.50
## Chi2 = 0.00
## AIC = 51.50

plot of chunk unnamed-chunk-10


# print Odds Ratios as bars
sjp.glm(fitOR, type = "bars")
## Waiting for profiling to be done...
## Intercept = 0.02
## R2[cs] = 0.395
## R2[n] = 0.527
## Lambda = 41.50
## Chi2 = 0.00
## AIC = 51.50

plot of chunk unnamed-chunk-10


# ------------------------------- Predictors for negative impact of care.
# Data from the EUROFAMCARE sample dataset -------------------------------
data(efc)

# retrieve predictor variable labels
labs <- sji.getVariableLabels(efc)
predlab <- c(labs[["c161sex"]], labs[["e42dep"]], paste0(labs[["c172code"]], 
    " (mid)"), paste0(labs[["c172code"]], " (high)"))

# create binary response
y <- ifelse(efc$neg_c_7 < median(na.omit(efc$neg_c_7)), 0, 1)

# create dummy variables for educational status
edu.mid <- ifelse(efc$c172code == 2, 1, 0)
edu.high <- ifelse(efc$c172code == 3, 1, 0)

# create data frame for fitted model
df <- na.omit(as.data.frame(cbind(y, as.factor(efc$c161sex), as.factor(efc$e42dep), 
    as.factor(edu.mid), as.factor(edu.high))))

# fit model
fit <- glm(y ~ ., data = df, family = binomial(link = "logit"))

# plot odds
sjp.glm(fit, title = labs[["neg_c_7"]], axisLabels.y = predlab)
## Waiting for profiling to be done...
## Intercept = 0.02
## R2[cs] = 0.161
## R2[n] = 0.215
## Lambda = 1000.04
## Chi2 = 0.00
## AIC = 1010.04

plot of chunk unnamed-chunk-10

sjp.glm.ma Plot model assumptions of glm’s

# prepare dichotomous dependent variable
y <- ifelse(swiss$Fertility < median(swiss$Fertility), 0, 1)

# fit model
fitOR <- glm(y ~ swiss$Education + swiss$Examination + swiss$Infant.Mortality + 
    swiss$Catholic, family = binomial)

# plot model assumptions
sjp.glm.ma(fitOR)
## 
## Removed 1 cases during 1 step(s).
## AIC-value of original model: 51.50
## AIC-value of updated model: 46.41

plot of chunk unnamed-chunk-11 plot of chunk unnamed-chunk-11 plot of chunk unnamed-chunk-11

## 
## --------------------
## Check significance of terms when they entered the model...
## Anova original model:
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: y
## 
## Terms added sequentially (first to last)
## 
## 
##                        Df Deviance Resid. Df Resid. Dev Pr(>Chi)   
## NULL                                      46       65.1            
## swiss$Education         1     9.21        45       55.9   0.0024 **
## swiss$Examination       1     9.30        44       46.6   0.0023 **
## swiss$Infant.Mortality  1     4.56        43       42.1   0.0327 * 
## swiss$Catholic          1     0.56        42       41.5   0.4547   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Waiting for profiling to be done...

plot of chunk unnamed-chunk-11

## Intercept = 0.02
## R2[cs] = 0.395
## R2[n] = 0.527
## Lambda = 41.50
## Chi2 = 0.00
## AIC = 51.50

plot of chunk unnamed-chunk-11

## 
## Call:  glm(formula = y ~ swiss$Education + swiss$Examination + swiss$Infant.Mortality + 
##     swiss$Catholic, family = binomial, subset = -c(vars))
## 
## Coefficients:
##            (Intercept)         swiss$Education       swiss$Examination  
##                -3.6914                 -0.0396                 -0.1818  
## swiss$Infant.Mortality          swiss$Catholic  
##                 0.3332                  0.0146  
## 
## Degrees of Freedom: 45 Total (i.e. Null);  41 Residual
## Null Deviance:       63.7 
## Residual Deviance: 36.4  AIC: 46.4

sjp.grpfrq Plot grouped or stacked frequencies

# histogram plot
sjp.grpfrq(discoveries, sample(1:3, length(discoveries), replace = TRUE), type = "hist", 
    showValueLabels = FALSE)

plot of chunk unnamed-chunk-12


# histrogram with EUROFAMCARE sample dataset
data(efc)
efc.val <- sji.getValueLabels(efc)
efc.var <- sji.getVariableLabels(efc)
sjp.grpfrq(efc$e17age, efc$e16sex, title = efc.var["e17age"], legendTitle = efc.var["e16sex"], 
    legendLabels = efc.val[["e16sex"]], type = "hist", showValueLabels = FALSE)
## Warning: Removed 128 rows containing missing values (geom_text).

plot of chunk unnamed-chunk-12


# boxplot
sjp.grpfrq(ChickWeight$weight, as.numeric(ChickWeight$Diet), type = "box")

plot of chunk unnamed-chunk-12


# violin plot
sjp.grpfrq(ChickWeight$weight, as.numeric(ChickWeight$Diet), type = "v")
## Warning: sum(weights) != 1  -- will not get true density
## Warning: sum(weights) != 1  -- will not get true density
## Warning: sum(weights) != 1  -- will not get true density
## Warning: sum(weights) != 1  -- will not get true density

plot of chunk unnamed-chunk-12


# grouped bars
sjp.grpfrq(sample(1:3, length(ChickWeight$Diet), replace = TRUE), as.numeric(ChickWeight$Diet), 
    barSpace = 0.2)
## ymax not defined: adjusting position using y instead

plot of chunk unnamed-chunk-12


# grouped bars with EUROFAMCARE sample dataset dataset was importet from an
# SPSS-file, using: efc <- sji.SPSS('efc.sav', enc='UTF-8')
data(efc)
efc.val <- sji.getValueLabels(efc)
efc.var <- sji.getVariableLabels(efc)
sjp.grpfrq(efc$e42dep, efc$e16sex, title = efc.var["e42dep"], axisLabels.x = efc.val[["e42dep"]], 
    legendTitle = efc.var["e16sex"], legendLabels = efc.val[["e16sex"]])
## ymax not defined: adjusting position using y instead

plot of chunk unnamed-chunk-12


# grouped bars using the maximum y-limit
sjp.grpfrq(efc$e42dep, efc$e16sex, title = efc.var["e42dep"], axisLabels.x = efc.val[["e42dep"]], 
    legendTitle = efc.var["e16sex"], legendLabels = efc.val[["e16sex"]], maxYlim = TRUE)
## ymax not defined: adjusting position using y instead

plot of chunk unnamed-chunk-12


# grouped bars using necessary y-limit
sjp.grpfrq(efc$e16sex, efc$e42dep, title = efc.var["e16sex"], axisLabels.x = efc.val[["e16sex"]], 
    legendTitle = efc.var["e42dep"], legendLabels = efc.val[["e42dep"]])
## ymax not defined: adjusting position using y instead

plot of chunk unnamed-chunk-12


# box plots with interaction variable
sjp.grpfrq(efc$e17age, efc$e42dep, interactionVar = efc$e16sex, title = paste(efc.var["e17age"], 
    "by", efc.var["e42dep"], "and", efc.var["e16sex"]), axisLabels.x = efc.val[["e17age"]], 
    interactionVarLabels = efc.val[["e16sex"]], legendTitle = efc.var["e42dep"], 
    legendLabels = efc.val[["e42dep"]], type = "box")

plot of chunk unnamed-chunk-12

sjp.likert Plot likert scales as centered stacked bars

# prepare data for dichotomous likert scale, 5 items
likert_2 <- data.frame(as.factor(sample(1:2, 500, replace = TRUE, prob = c(0.3, 
    0.7))), as.factor(sample(1:2, 500, replace = TRUE, prob = c(0.6, 0.4))), 
    as.factor(sample(1:2, 500, replace = TRUE, prob = c(0.25, 0.75))), as.factor(sample(1:2, 
        500, replace = TRUE, prob = c(0.9, 0.1))), as.factor(sample(1:2, 500, 
        replace = TRUE, prob = c(0.35, 0.65))))

# create labels
levels_2 <- list(c("Disagree", "Agree"))

# prepare data for 4-category likert scale, 5 items
likert_4 <- data.frame(as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.2, 
    0.3, 0.1, 0.4))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.5, 
    0.25, 0.15, 0.1))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.25, 
    0.1, 0.4, 0.25))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.1, 
    0.4, 0.4, 0.1))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.35, 
    0.25, 0.15, 0.25))))

# create labels
levels_4 <- list(c("Strongly disagree", "Disagree", "Agree", "Strongly Agree"))

# prepare data for 6-category likert scale, 5 items
likert_6 <- data.frame(as.factor(sample(1:6, 500, replace = TRUE, prob = c(0.2, 
    0.1, 0.1, 0.3, 0.2, 0.1))), as.factor(sample(1:6, 500, replace = TRUE, prob = c(0.15, 
    0.15, 0.3, 0.1, 0.1, 0.2))), as.factor(sample(1:6, 500, replace = TRUE, 
    prob = c(0.2, 0.25, 0.05, 0.2, 0.2, 0.2))), as.factor(sample(1:6, 500, replace = TRUE, 
    prob = c(0.2, 0.1, 0.1, 0.4, 0.1, 0.1))), as.factor(sample(1:6, 500, replace = TRUE, 
    prob = c(0.1, 0.4, 0.1, 0.3, 0.05, 0.15))))

# create labels
levels_6 <- list(c("Very strongly disagree", "Strongly disagree", "Disagree", 
    "Agree", "Strongly Agree", "Very strongly disagree"))

# create item labels
items <- list(c("Q1", "Q2", "Q3", "Q4", "Q5"))

# plot dichotomous likert scale, ordered by 'negative' values
sjp.likert(likert_2, legendLabels = levels_2, axisLabels.y = items, orderBy = "neg")
## Warning: Stacking not well defined when ymin != 0

plot of chunk unnamed-chunk-13


# plot 4-category-likert-scale, no order
sjp.likert(likert_4, legendLabels = levels_4, axisLabels.y = items)
## Warning: Stacking not well defined when ymin != 0

plot of chunk unnamed-chunk-13


# plot 4-category-likert-scale, ordered by positive values and in brown
# color scale
sjp.likert(likert_6, legendLabels = levels_6, barColor = "brown", axisLabels.y = items, 
    orderBy = "pos")
## Warning: Stacking not well defined when ymin != 0

plot of chunk unnamed-chunk-13

sjp.lm Plot beta coefficients of lm

# fit linear model
fit <- lm(airquality$Ozone ~ airquality$Wind + airquality$Temp + airquality$Solar.R)

# plot estimates with CI and standardized beta-values
sjp.lm(fit, gridBreaksAt = 2)

plot of chunk unnamed-chunk-14


# plot estimates with CI without standardized beta-values and with ugly
# narrow tick marks (because 'gridBreaksAt' was not specified)
sjp.lm(fit, showStandardBeta = FALSE)

plot of chunk unnamed-chunk-14

sjp.lm.int Plot interaction terms of linear models

# Note that the data sets used in this example may not be perfectly suitable
# for fitting linear models. I just used them because they are part of the
# R-software.

# fit 'dummy' model.
fit <- lm(weight ~ Time * Diet, data = ChickWeight, x = TRUE)

# show summary to see significant interactions
summary(fit)
## 
## Call:
## lm(formula = weight ~ Time * Diet, data = ChickWeight, x = TRUE)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -135.43  -13.76   -1.31   11.07  130.39 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   30.931      4.247    7.28  1.1e-12 ***
## Time           6.842      0.341   20.08  < 2e-16 ***
## Diet2         -2.297      7.267   -0.32   0.7520    
## Diet3        -12.681      7.267   -1.74   0.0815 .  
## Diet4         -0.139      7.286   -0.02   0.9848    
## Time:Diet2     1.767      0.572    3.09   0.0021 ** 
## Time:Diet3     4.581      0.572    8.01  6.3e-15 ***
## Time:Diet4     2.873      0.578    4.97  8.9e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 34.1 on 570 degrees of freedom
## Multiple R-squared:  0.773,  Adjusted R-squared:  0.77 
## F-statistic:  277 on 7 and 570 DF,  p-value: <2e-16

# plot regression line of interaction terms
sjp.lm.int(fit)

plot of chunk unnamed-chunk-15 plot of chunk unnamed-chunk-15 plot of chunk unnamed-chunk-15


# plot regression line of interaction terms, including value labels
sjp.lm.int(fit, showValueLabels = TRUE)

plot of chunk unnamed-chunk-15 plot of chunk unnamed-chunk-15 plot of chunk unnamed-chunk-15


# fit 'dummy' model
fit <- lm(Fertility ~ . * ., data = swiss, na.action = na.omit, x = TRUE)

# show summary to see significant interactions
summary(fit)
## 
## Call:
## lm(formula = Fertility ~ . * ., data = swiss, na.action = na.omit, 
##     x = TRUE)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -8.76  -3.89  -0.68   3.14  14.10 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  253.97615   67.99721    3.74  0.00076 ***
## Agriculture                   -2.10867    0.70163   -3.01  0.00522 ** 
## Examination                   -5.58074    2.75010   -2.03  0.05109 .  
## Education                     -3.47089    2.68377   -1.29  0.20547    
## Catholic                      -0.17693    0.40653   -0.44  0.66642    
## Infant.Mortality              -5.95748    3.08963   -1.93  0.06303 .  
## Agriculture:Examination        0.02137    0.01377    1.55  0.13091    
## Agriculture:Education          0.01906    0.01523    1.25  0.22009    
## Agriculture:Catholic           0.00263    0.00285    0.92  0.36387    
## Agriculture:Infant.Mortality   0.06370    0.02981    2.14  0.04060 *  
## Examination:Education          0.07517    0.03634    2.07  0.04703 *  
## Examination:Catholic          -0.00153    0.01079   -0.14  0.88791    
## Examination:Infant.Mortality   0.17101    0.12907    1.33  0.19485    
## Education:Catholic            -0.00713    0.01018   -0.70  0.48865    
## Education:Infant.Mortality     0.03359    0.12420    0.27  0.78863    
## Catholic:Infant.Mortality      0.00992    0.01617    0.61  0.54409    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.47 on 31 degrees of freedom
## Multiple R-squared:  0.819,  Adjusted R-squared:  0.731 
## F-statistic: 9.35 on 15 and 31 DF,  p-value: 1.08e-07

# plot regression line of interaction terms
sjp.lm.int(fit)

plot of chunk unnamed-chunk-15 plot of chunk unnamed-chunk-15


# plot smoothes regression line of interaction terms
sjp.lm.int(fit, smooth = "loess")

plot of chunk unnamed-chunk-15 plot of chunk unnamed-chunk-15


# plot linear regression line of interaction terms
sjp.lm.int(fit, smooth = "lm")

plot of chunk unnamed-chunk-15 plot of chunk unnamed-chunk-15

sjp.lm.ma Plot model assumptions of lm’s

# fit linear model
fit <- lm(airquality$Ozone ~ airquality$Wind + airquality$Temp + airquality$Solar.R)
fit.updated <- sjp.lm.ma(fit)
## 
## Removed 1 cases during 1 step(s).
## R-square/adj. R-square of original model: 0.605895 / 0.594845
## R-square/adj. R-square of updated model: 0.636945 / 0.626670

plot of chunk unnamed-chunk-16 plot of chunk unnamed-chunk-16

## Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-16

## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-16 plot of chunk unnamed-chunk-16

sjp.pca Plot PCA results

# randomly create data frame with 7 items, each consisting of 4 categories
likert_4 <- data.frame(sample(1:4, 500, replace = TRUE, prob = c(0.2, 0.3, 0.1, 
    0.4)), sample(1:4, 500, replace = TRUE, prob = c(0.5, 0.25, 0.15, 0.1)), 
    sample(1:4, 500, replace = TRUE, prob = c(0.4, 0.15, 0.25, 0.2)), sample(1:4, 
        500, replace = TRUE, prob = c(0.25, 0.1, 0.4, 0.25)), sample(1:4, 500, 
        replace = TRUE, prob = c(0.1, 0.4, 0.4, 0.1)), sample(1:4, 500, replace = TRUE), 
    sample(1:4, 500, replace = TRUE, prob = c(0.35, 0.25, 0.15, 0.25)))

# Create variable labels
colnames(likert_4) <- c("V1", "V2", "V3", "V4", "V5", "V6", "V7")

# plot results from PCA as square-tiled 'heatmap'
sjp.pca(likert_4)
## Using  as id variables

plot of chunk unnamed-chunk-17

## [1] "Following items have been removed:"
## [1] "none."

# manually compute PCS
pca <- prcomp(na.omit(likert_4), retx = TRUE, center = TRUE, scale. = TRUE)

# plot results from PCA as circles, including Eigenvalue-diagnostic.  note
# that this plot does not compute the Cronbach's Alpha
sjp.pca(pca, plotEigenvalues = TRUE, type = "circle")
## [1] "--------------------------------------------"
## Importance of components:
##                         PC1   PC2   PC3   PC4   PC5   PC6   PC7
## Standard deviation     1.09 1.046 1.038 0.991 0.988 0.927 0.908
## Proportion of Variance 0.17 0.156 0.154 0.140 0.139 0.123 0.118
## Cumulative Proportion  0.17 0.326 0.480 0.620 0.760 0.882 1.000
## [1] "Eigenvalues:"
## [1] 1.1894 1.0938 1.0765 0.9822 0.9762 0.8584 0.8236
## [1] "--------------------------------------------"
## [1] "Cronbach's Alpha can only be calculated when having a data frame with each component / variable as column"
## Using  as id variables

plot of chunk unnamed-chunk-17 plot of chunk unnamed-chunk-17

## $loadings
## 
## Loadings:
##    [,1]   [,2]   [,3]  
## V1         0.247  0.742
## V2        -0.480       
## V3  0.360 -0.536  0.360
## V4  0.262  0.675       
## V5         0.194 -0.499
## V6  0.690        -0.339
## V7  0.705  0.125  0.207
## 
##                 [,1]  [,2]  [,3]
## SS loadings    1.176 1.093 1.091
## Proportion Var 0.168 0.156 0.156
## Cumulative Var 0.168 0.324 0.480
## 
## $rotmat
##         [,1]    [,2]    [,3]
## [1,]  0.9313 -0.1872  0.3124
## [2,]  0.3180  0.8361 -0.4470
## [3,] -0.1775  0.5157  0.8382

# ------------------------------- Data from the EUROFAMCARE sample dataset
# -------------------------------
data(efc)

# retrieve variable and value labels
varlabs <- sji.getVariableLabels(efc)

# recveive first item of COPE-index scale
start <- which(colnames(efc) == "c82cop1")

# recveive first item of COPE-index scale
end <- which(colnames(efc) == "c90cop9")

# create data frame with COPE-index scale
df <- as.data.frame(efc[, c(start:end)])
colnames(df) <- varlabs[c(start:end)]
sjp.pca(df)
## Using  as id variables

plot of chunk unnamed-chunk-17

## [1] "Following items have been removed:"
## [1] "none."

sjp.reglin Plot regression lines for each predictor

data(efc)
fit <- lm(tot_sc_e ~ c12hour + e17age + e42dep, data = efc)

# reression line and scatter plot
sjp.reglin(fit, efc)
## Warning: Removed 6 rows containing missing values (stat_smooth).
## Warning: Removed 6 rows containing missing values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-18

## Warning: Removed 17 rows containing missing values (stat_smooth).
## Warning: Removed 17 rows containing missing values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-18

## Warning: Removed 7 rows containing missing values (stat_smooth).
## Warning: Removed 7 rows containing missing values (stat_smooth).
## Warning: pseudoinverse used at 4.015
## Warning: neighborhood radius 2.015
## Warning: reciprocal condition number  1.676e-015
## Warning: There are other near singularities as well. 1
## Warning: Removed 7 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-18


# reression line w/o scatter plot
sjp.reglin(fit, efc, showScatterPlot = FALSE)
## Warning: Removed 6 rows containing missing values (stat_smooth).
## Warning: Removed 6 rows containing missing values (stat_smooth).

plot of chunk unnamed-chunk-18

## Warning: Removed 17 rows containing missing values (stat_smooth).
## Warning: Removed 17 rows containing missing values (stat_smooth).

plot of chunk unnamed-chunk-18

## Warning: Removed 7 rows containing missing values (stat_smooth).
## Warning: Removed 7 rows containing missing values (stat_smooth).
## Warning: pseudoinverse used at 4.015
## Warning: neighborhood radius 2.015
## Warning: reciprocal condition number  1.676e-015
## Warning: There are other near singularities as well. 1

plot of chunk unnamed-chunk-18


# reression line w/o CI
sjp.reglin(fit, efc, showCI = FALSE)
## Warning: Removed 6 rows containing missing values (stat_smooth).
## Warning: Removed 6 rows containing missing values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-18

## Warning: Removed 17 rows containing missing values (stat_smooth).
## Warning: Removed 17 rows containing missing values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-18

## Warning: Removed 7 rows containing missing values (stat_smooth).
## Warning: Removed 7 rows containing missing values (stat_smooth).
## Warning: pseudoinverse used at 4.015
## Warning: neighborhood radius 2.015
## Warning: reciprocal condition number  1.676e-015
## Warning: There are other near singularities as well. 1
## Warning: Removed 7 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-18

sjp.stackfrq Plot stacked proportional bars

# ------------------------------- random sample
# ------------------------------- prepare data for 4-category likert scale,
# 5 items
likert_4 <- data.frame(as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.2, 
    0.3, 0.1, 0.4))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.5, 
    0.25, 0.15, 0.1))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.25, 
    0.1, 0.4, 0.25))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.1, 
    0.4, 0.4, 0.1))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.35, 
    0.25, 0.15, 0.25))))

# create labels
levels_4 <- list(c("Independent", "Slightly dependent", "Dependent", "Severely dependent"))

# create item labels
items <- list(c("Q1", "Q2", "Q3", "Q4", "Q5"))

# plot stacked frequencies of 5 (ordered) item-scales
sjp.stackfrq(likert_4, legendLabels = levels_4, axisLabels.y = items)

plot of chunk unnamed-chunk-19


# ------------------------------- Data from the EUROFAMCARE sample dataset
# -------------------------------
data(efc)

# recveive first item of COPE-index scale
start <- which(colnames(efc) == "c82cop1")

# recveive first item of COPE-index scale
end <- which(colnames(efc) == "c90cop9")

# retrieve variable and value labels
varlabs <- sji.getVariableLabels(efc)
vallabs <- sji.getValueLabels(efc)

# create value labels. We need just one variable of the COPE-index scale
# because they have all the same level / categorie / value labels
levels <- vallabs["c82cop1"]

# create item labels
items <- list(varlabs[c(start:end)])
sjp.stackfrq(efc[, c(start:end)], legendLabels = levels, axisLabels.y = items)

plot of chunk unnamed-chunk-19

sjp.xtab Plot proportional crosstables

# create 4-category-items
x <- sample(1:4, 100, replace = TRUE)

# create 3-category-items
y <- sample(1:3, 100, replace = TRUE)

# plot 'cross tablulation' of x and y
sjp.xtab(y, x)
## ymax not defined: adjusting position using y instead

plot of chunk unnamed-chunk-20


# plot 'cross tablulation' of x and y, including labels
sjp.xtab(y, x, axisLabels.x = c("low", "mid", "high"), legendLabels = c("Grp 1", 
    "Grp 2", "Grp 3", "Grp 4"))
## ymax not defined: adjusting position using y instead

plot of chunk unnamed-chunk-20


# plot 'cross tablulation' of x and y as stacked proportional bars
sjp.xtab(y, x, tableIndex = "row", barPosition = "stack", flipCoordinates = TRUE)

plot of chunk unnamed-chunk-20


# grouped bars with EUROFAMCARE sample dataset dataset was importet from an
# SPSS-file, using: efc <- sji.SPSS('efc.sav', enc='UTF-8')
data(efc)
efc.val <- sji.getValueLabels(efc)
efc.var <- sji.getVariableLabels(efc)
sjp.xtab(efc$e42dep, efc$e16sex, title = efc.var["e42dep"], axisLabels.x = efc.val[["e42dep"]], 
    legendTitle = efc.var["e16sex"], legendLabels = efc.val[["e16sex"]])
## ymax not defined: adjusting position using y instead

plot of chunk unnamed-chunk-20


sjp.xtab(efc$e16sex, efc$e42dep, title = efc.var["e16sex"], axisLabels.x = efc.val[["e16sex"]], 
    legendTitle = efc.var["e42dep"], legendLabels = efc.val[["e42dep"]])
## ymax not defined: adjusting position using y instead

plot of chunk unnamed-chunk-20


sjp.xtab(efc$e16sex, efc$e42dep, title = efc.var["e16sex"], axisLabels.x = efc.val[["e16sex"]], 
    legendTitle = efc.var["e42dep"], legendLabels = efc.val[["e42dep"]], tableIndex = "row", 
    barPosition = "stack", flipCoordinates = F)

plot of chunk unnamed-chunk-20