Collection of several plotting functions for data visualization using ggplot. Results of several statistical analyses (that are commonly used in social sciences) can be visualized using this package, including simple and cross tabulated frequencies, histograms, box plots, (generalized) linear models (forest plots), PCA, correlations etc.
Furthermore, this package contains some tools that are useful when carrying out data analysis or interpreting data, for instance data set import, variable recoding, determination of cluster groups, interpretation of interactions in linear models etc.
The sjPlot package depends on following R packages that have to be installed prior to the installtion of sjPlot.
Use following script to install only those required packages that are missing:
depends <- c("MASS", "car", "foreign", "ggplot2", "plyr", "lmtest", "reshape2",
"scales", "quantreg")
for (i in 1:length(depends)) {
if ((depends[i] %in% rownames(installed.packages())) == FALSE) {
cat(paste0("Installing missing package \"", depends[i], "\"...\n"))
install.packages(depends[i])
} else {
cat(paste0("Package \"", depends[i], "\" already installed...\n"))
}
}
## Package "MASS" already installed...
## Package "car" already installed...
## Package "foreign" already installed...
## Package "ggplot2" already installed...
## Package "plyr" already installed...
## Package "lmtest" already installed...
## Package "reshape2" already installed...
## Package "scales" already installed...
## Package "quantreg" already installed...
install.packages("C:/Users/Mark/Downloads/sjPlot_0.7.tar.gz", repos = NULL,
type = "source")
library(sjPlot)
data(efc)
sji.convertToLabel
Replaces variable values with their associated value labels
print(sji.getValueLabels(efc)["c161sex"])
## $c161sex
## [1] "Male" "Female"
head(efc$c161sex)
## [1] 2 2 1 1 2 1
head(sji.convertToLabel(efc$c161sex))
## [1] Female Female Male Male Female Male
## Levels: Female Male
print(sji.getValueLabels(efc)["e42dep"])
## $e42dep
## [1] "independent" "slightly dependent" "moderately dependent"
## [4] "severely dependent"
table(efc$e42dep)
##
## 1 2 3 4
## 66 225 306 304
table(sji.convertToLabel(efc$e42dep))
##
## independent moderately dependent severely dependent
## 66 306 304
## slightly dependent
## 225
sji.convertToValue
Converts factors to numeric variables
test <- sji.convertToLabel(efc$e42dep)
table(test)
## test
## independent moderately dependent severely dependent
## 66 306 304
## slightly dependent
## 225
table(sji.convertToValue(test))
##
## 1 2 3 4
## 66 306 304 225
hist(sji.convertToValue(test, 0))
sjp.aov1
Plot One-Way-Anova tables
sjp.aov1(efc$c12hour, as.factor(efc$e42dep))
data(efc)
efc.val <- sji.getValueLabels(efc)
efc.var <- sji.getVariableLabels(efc)
sjp.aov1(efc$c12hour, as.factor(efc$e42dep), axisLabels.y = efc.val["e42dep"],
axisTitle.x = efc.var[["c12hour"]])
sjp.aov1(efc$c12hour, as.factor(efc$e42dep), axisLabels.y = efc.val["e42dep"],
title = efc.var[["c12hour"]], type = "bars", meansums = TRUE, hideErrorBars = TRUE,
theme = "minimal", minorGridColor = "white", showTickMarks = FALSE, showModelSummary = FALSE,
hideGrid.x = TRUE)
sjp.chi2
Plot Pearsons Chi2-Test of multiple contingency tables
# create data frame with 5 dichotomous (dummy) variables
df <- data.frame(as.factor(sample(1:2, 100, replace = TRUE)), as.factor(sample(1:2,
100, replace = TRUE)), as.factor(sample(1:2, 100, replace = TRUE)), as.factor(sample(1:2,
100, replace = TRUE)), as.factor(sample(1:2, 100, replace = TRUE)))
str(df)
## 'data.frame': 100 obs. of 5 variables:
## $ as.factor.sample.1.2..100..replace...TRUE.. : Factor w/ 2 levels "1","2": 1 2 1 2 1 2 1 1 1 2 ...
## $ as.factor.sample.1.2..100..replace...TRUE...1: Factor w/ 2 levels "1","2": 2 1 1 1 1 1 2 1 1 2 ...
## $ as.factor.sample.1.2..100..replace...TRUE...2: Factor w/ 2 levels "1","2": 2 1 2 1 1 1 2 2 1 2 ...
## $ as.factor.sample.1.2..100..replace...TRUE...3: Factor w/ 2 levels "1","2": 2 2 1 1 2 2 1 2 1 2 ...
## $ as.factor.sample.1.2..100..replace...TRUE...4: Factor w/ 2 levels "1","2": 1 1 1 1 2 1 2 2 1 1 ...
# create variable labels
items <- list(c("Item 1", "Item 2", "Item 3", "Item 4", "Item 5"))
# plot Chi2-contingency-table
sjp.chi2(df, axisLabels = items)
sjp.corr
Plot correlation matrix
# create data frame with 5 random variables
df <- as.data.frame(cbind(rnorm(10), rnorm(10), rnorm(10), rnorm(10), rnorm(10)))
# plot correlation matrix using circles
sjp.corr(df)
## Warning: the condition has length > 1 and only the first element will be used
## Warning: the condition has length > 1 and only the first element will be used
## [1] "Computing correlation using spearman-method with listwise-deletion..."
# plot correlation matrix using square tiles without diagram background
sjp.corr(df, type = "tile", theme = "none")
## Warning: the condition has length > 1 and only the first element will be used
## Warning: the condition has length > 1 and only the first element will be used
## [1] "Computing correlation using spearman-method with listwise-deletion..."
# ------------------------------- Data from the EUROFAMCARE sample dataset
# -------------------------------
data(efc)
# retrieve variable and value labels
varlabs <- sji.getVariableLabels(efc)
# recveive first item of COPE-index scale
start <- which(colnames(efc) == "c83cop2")
start
## [1] 7
# recveive first item of COPE-index scale
end <- which(colnames(efc) == "c88cop7")
end
## [1] 12
# create data frame with COPE-index scale
df <- as.data.frame(efc[, c(start:end)])
colnames(df) <- varlabs[c(start:end)]
# we have high correlations here, because all items belong to one factor.
# See example from 'sjp.pca'.
sjp.corr(df, type = "tile", theme = "none", outlineColor = "white", hideLegend = FALSE)
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: Cannot compute exact p-value with ties
## Warning: the condition has length > 1 and only the first element will be used
## Warning: the condition has length > 1 and only the first element will be used
## [1] "Computing correlation using spearman-method with listwise-deletion..."
sjp.frq
Plot frequencies of (count) variables
# boxplot
sjp.frq(ChickWeight$weight, type = "box")
# histogram
sjp.frq(discoveries, type = "hist", showMeanIntercept = TRUE)
# histogram with minimal theme
sjp.frq(discoveries, type = "hist", showMeanIntercept = TRUE, theme = "minimal",
minorGridColor = "white", showTickMarks = FALSE, hideGrid.x = TRUE)
# violin plot
sjp.frq(ChickWeight$weight, type = "v")
# bar plot
sjp.frq(ChickWeight$Diet)
sjp.frq(ChickWeight$Diet, maxYlim = TRUE)
# bar plot with EUROFAMCARE sample dataset dataset was importet from an
# SPSS-file, using: efc <- sji.SPSS('efc.sav', enc='UTF-8')
data(efc)
efc.val <- sji.getValueLabels(efc)
efc.var <- sji.getVariableLabels(efc)
sjp.frq(as.factor(efc$e15relat), title = efc.var[["e15relat"]], axisLabels.x = efc.val["e15relat"],
axisLabelAngle.x = 90)
# bar plot with EUROFAMCARE sample dataset grouped variable
ageGrp <- sju.groupVar(efc$e17age)
ageGrpLab <- sju.groupVarLabels(efc$e17age)
sjp.frq(ageGrp, title = efc.var[["e17age"]], axisLabels.x = ageGrpLab)
# minimal theme
sjp.frq(ageGrp, title = efc.var[["e17age"]], axisLabels.x = ageGrpLab, theme = "minimal",
minorGridColor = "white", showTickMarks = FALSE, hideGrid.x = TRUE)
# box plots with interaction variable the following example is equal to the
# function call sjp.grpfrq(efc$e17age, efc$e16sex, type='box')
sjp.frq(efc$e17age, title = paste(efc.var[["e17age"]], "by", efc.var[["e16sex"]]),
interactionVar = efc$e16sex, interactionVarLabels = efc.val["e16sex"], type = "box")
sjp.glm
Plot odds ratios (forest plots)
# prepare dichotomous dependent variable
y <- ifelse(swiss$Fertility < median(swiss$Fertility), 0, 1)
# fit model
fitOR <- glm(y ~ swiss$Education + swiss$Examination + swiss$Infant.Mortality +
swiss$Catholic, family = binomial)
# print Odds Ratios as dots
sjp.glm(fitOR)
## Waiting for profiling to be done...
## Intercept = 0.02
## R2[cs] = 0.395
## R2[n] = 0.527
## Lambda = 41.50
## Chi2 = 0.00
## AIC = 51.50
# print Odds Ratios as bars
sjp.glm(fitOR, type = "bars")
## Waiting for profiling to be done...
## Intercept = 0.02
## R2[cs] = 0.395
## R2[n] = 0.527
## Lambda = 41.50
## Chi2 = 0.00
## AIC = 51.50
# ------------------------------- Predictors for negative impact of care.
# Data from the EUROFAMCARE sample dataset -------------------------------
data(efc)
# retrieve predictor variable labels
labs <- sji.getVariableLabels(efc)
predlab <- c(labs[["c161sex"]], labs[["e42dep"]], paste0(labs[["c172code"]],
" (mid)"), paste0(labs[["c172code"]], " (high)"))
# create binary response
y <- ifelse(efc$neg_c_7 < median(na.omit(efc$neg_c_7)), 0, 1)
# create dummy variables for educational status
edu.mid <- ifelse(efc$c172code == 2, 1, 0)
edu.high <- ifelse(efc$c172code == 3, 1, 0)
# create data frame for fitted model
df <- na.omit(as.data.frame(cbind(y, as.factor(efc$c161sex), as.factor(efc$e42dep),
as.factor(edu.mid), as.factor(edu.high))))
# fit model
fit <- glm(y ~ ., data = df, family = binomial(link = "logit"))
# plot odds
sjp.glm(fit, title = labs[["neg_c_7"]], axisLabels.y = predlab)
## Waiting for profiling to be done...
## Intercept = 0.02
## R2[cs] = 0.161
## R2[n] = 0.215
## Lambda = 1000.04
## Chi2 = 0.00
## AIC = 1010.04
sjp.glm.ma
Plot model assumptions of glms
# prepare dichotomous dependent variable
y <- ifelse(swiss$Fertility < median(swiss$Fertility), 0, 1)
# fit model
fitOR <- glm(y ~ swiss$Education + swiss$Examination + swiss$Infant.Mortality +
swiss$Catholic, family = binomial)
# plot model assumptions
sjp.glm.ma(fitOR)
##
## Removed 1 cases during 1 step(s).
## AIC-value of original model: 51.50
## AIC-value of updated model: 46.41
##
## --------------------
## Check significance of terms when they entered the model...
## Anova original model:
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: y
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 46 65.1
## swiss$Education 1 9.21 45 55.9 0.0024 **
## swiss$Examination 1 9.30 44 46.6 0.0023 **
## swiss$Infant.Mortality 1 4.56 43 42.1 0.0327 *
## swiss$Catholic 1 0.56 42 41.5 0.4547
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Waiting for profiling to be done...
## Intercept = 0.02
## R2[cs] = 0.395
## R2[n] = 0.527
## Lambda = 41.50
## Chi2 = 0.00
## AIC = 51.50
##
## Call: glm(formula = y ~ swiss$Education + swiss$Examination + swiss$Infant.Mortality +
## swiss$Catholic, family = binomial, subset = -c(vars))
##
## Coefficients:
## (Intercept) swiss$Education swiss$Examination
## -3.6914 -0.0396 -0.1818
## swiss$Infant.Mortality swiss$Catholic
## 0.3332 0.0146
##
## Degrees of Freedom: 45 Total (i.e. Null); 41 Residual
## Null Deviance: 63.7
## Residual Deviance: 36.4 AIC: 46.4
sjp.grpfrq
Plot grouped or stacked frequencies
# histogram plot
sjp.grpfrq(discoveries, sample(1:3, length(discoveries), replace = TRUE), type = "hist",
showValueLabels = FALSE)
# histrogram with EUROFAMCARE sample dataset
data(efc)
efc.val <- sji.getValueLabels(efc)
efc.var <- sji.getVariableLabels(efc)
sjp.grpfrq(efc$e17age, efc$e16sex, title = efc.var["e17age"], legendTitle = efc.var["e16sex"],
legendLabels = efc.val[["e16sex"]], type = "hist", showValueLabels = FALSE)
## Warning: Removed 128 rows containing missing values (geom_text).
# boxplot
sjp.grpfrq(ChickWeight$weight, as.numeric(ChickWeight$Diet), type = "box")
# violin plot
sjp.grpfrq(ChickWeight$weight, as.numeric(ChickWeight$Diet), type = "v")
## Warning: sum(weights) != 1 -- will not get true density
## Warning: sum(weights) != 1 -- will not get true density
## Warning: sum(weights) != 1 -- will not get true density
## Warning: sum(weights) != 1 -- will not get true density
# grouped bars
sjp.grpfrq(sample(1:3, length(ChickWeight$Diet), replace = TRUE), as.numeric(ChickWeight$Diet),
barSpace = 0.2)
## ymax not defined: adjusting position using y instead
# grouped bars with EUROFAMCARE sample dataset dataset was importet from an
# SPSS-file, using: efc <- sji.SPSS('efc.sav', enc='UTF-8')
data(efc)
efc.val <- sji.getValueLabels(efc)
efc.var <- sji.getVariableLabels(efc)
sjp.grpfrq(efc$e42dep, efc$e16sex, title = efc.var["e42dep"], axisLabels.x = efc.val[["e42dep"]],
legendTitle = efc.var["e16sex"], legendLabels = efc.val[["e16sex"]])
## ymax not defined: adjusting position using y instead
# grouped bars using the maximum y-limit
sjp.grpfrq(efc$e42dep, efc$e16sex, title = efc.var["e42dep"], axisLabels.x = efc.val[["e42dep"]],
legendTitle = efc.var["e16sex"], legendLabels = efc.val[["e16sex"]], maxYlim = TRUE)
## ymax not defined: adjusting position using y instead
# grouped bars using necessary y-limit
sjp.grpfrq(efc$e16sex, efc$e42dep, title = efc.var["e16sex"], axisLabels.x = efc.val[["e16sex"]],
legendTitle = efc.var["e42dep"], legendLabels = efc.val[["e42dep"]])
## ymax not defined: adjusting position using y instead
# box plots with interaction variable
sjp.grpfrq(efc$e17age, efc$e42dep, interactionVar = efc$e16sex, title = paste(efc.var["e17age"],
"by", efc.var["e42dep"], "and", efc.var["e16sex"]), axisLabels.x = efc.val[["e17age"]],
interactionVarLabels = efc.val[["e16sex"]], legendTitle = efc.var["e42dep"],
legendLabels = efc.val[["e42dep"]], type = "box")
sjp.likert
Plot likert scales as centered stacked bars
# prepare data for dichotomous likert scale, 5 items
likert_2 <- data.frame(as.factor(sample(1:2, 500, replace = TRUE, prob = c(0.3,
0.7))), as.factor(sample(1:2, 500, replace = TRUE, prob = c(0.6, 0.4))),
as.factor(sample(1:2, 500, replace = TRUE, prob = c(0.25, 0.75))), as.factor(sample(1:2,
500, replace = TRUE, prob = c(0.9, 0.1))), as.factor(sample(1:2, 500,
replace = TRUE, prob = c(0.35, 0.65))))
# create labels
levels_2 <- list(c("Disagree", "Agree"))
# prepare data for 4-category likert scale, 5 items
likert_4 <- data.frame(as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.2,
0.3, 0.1, 0.4))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.5,
0.25, 0.15, 0.1))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.25,
0.1, 0.4, 0.25))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.1,
0.4, 0.4, 0.1))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.35,
0.25, 0.15, 0.25))))
# create labels
levels_4 <- list(c("Strongly disagree", "Disagree", "Agree", "Strongly Agree"))
# prepare data for 6-category likert scale, 5 items
likert_6 <- data.frame(as.factor(sample(1:6, 500, replace = TRUE, prob = c(0.2,
0.1, 0.1, 0.3, 0.2, 0.1))), as.factor(sample(1:6, 500, replace = TRUE, prob = c(0.15,
0.15, 0.3, 0.1, 0.1, 0.2))), as.factor(sample(1:6, 500, replace = TRUE,
prob = c(0.2, 0.25, 0.05, 0.2, 0.2, 0.2))), as.factor(sample(1:6, 500, replace = TRUE,
prob = c(0.2, 0.1, 0.1, 0.4, 0.1, 0.1))), as.factor(sample(1:6, 500, replace = TRUE,
prob = c(0.1, 0.4, 0.1, 0.3, 0.05, 0.15))))
# create labels
levels_6 <- list(c("Very strongly disagree", "Strongly disagree", "Disagree",
"Agree", "Strongly Agree", "Very strongly disagree"))
# create item labels
items <- list(c("Q1", "Q2", "Q3", "Q4", "Q5"))
# plot dichotomous likert scale, ordered by 'negative' values
sjp.likert(likert_2, legendLabels = levels_2, axisLabels.y = items, orderBy = "neg")
## Warning: Stacking not well defined when ymin != 0
# plot 4-category-likert-scale, no order
sjp.likert(likert_4, legendLabels = levels_4, axisLabels.y = items)
## Warning: Stacking not well defined when ymin != 0
# plot 4-category-likert-scale, ordered by positive values and in brown
# color scale
sjp.likert(likert_6, legendLabels = levels_6, barColor = "brown", axisLabels.y = items,
orderBy = "pos")
## Warning: Stacking not well defined when ymin != 0
sjp.lm
Plot beta coefficients of lm
# fit linear model
fit <- lm(airquality$Ozone ~ airquality$Wind + airquality$Temp + airquality$Solar.R)
# plot estimates with CI and standardized beta-values
sjp.lm(fit, gridBreaksAt = 2)
# plot estimates with CI without standardized beta-values and with ugly
# narrow tick marks (because 'gridBreaksAt' was not specified)
sjp.lm(fit, showStandardBeta = FALSE)
sjp.lm.int
Plot interaction terms of linear models
# Note that the data sets used in this example may not be perfectly suitable
# for fitting linear models. I just used them because they are part of the
# R-software.
# fit 'dummy' model.
fit <- lm(weight ~ Time * Diet, data = ChickWeight, x = TRUE)
# show summary to see significant interactions
summary(fit)
##
## Call:
## lm(formula = weight ~ Time * Diet, data = ChickWeight, x = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -135.43 -13.76 -1.31 11.07 130.39
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.931 4.247 7.28 1.1e-12 ***
## Time 6.842 0.341 20.08 < 2e-16 ***
## Diet2 -2.297 7.267 -0.32 0.7520
## Diet3 -12.681 7.267 -1.74 0.0815 .
## Diet4 -0.139 7.286 -0.02 0.9848
## Time:Diet2 1.767 0.572 3.09 0.0021 **
## Time:Diet3 4.581 0.572 8.01 6.3e-15 ***
## Time:Diet4 2.873 0.578 4.97 8.9e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34.1 on 570 degrees of freedom
## Multiple R-squared: 0.773, Adjusted R-squared: 0.77
## F-statistic: 277 on 7 and 570 DF, p-value: <2e-16
# plot regression line of interaction terms
sjp.lm.int(fit)
# plot regression line of interaction terms, including value labels
sjp.lm.int(fit, showValueLabels = TRUE)
# fit 'dummy' model
fit <- lm(Fertility ~ . * ., data = swiss, na.action = na.omit, x = TRUE)
# show summary to see significant interactions
summary(fit)
##
## Call:
## lm(formula = Fertility ~ . * ., data = swiss, na.action = na.omit,
## x = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.76 -3.89 -0.68 3.14 14.10
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 253.97615 67.99721 3.74 0.00076 ***
## Agriculture -2.10867 0.70163 -3.01 0.00522 **
## Examination -5.58074 2.75010 -2.03 0.05109 .
## Education -3.47089 2.68377 -1.29 0.20547
## Catholic -0.17693 0.40653 -0.44 0.66642
## Infant.Mortality -5.95748 3.08963 -1.93 0.06303 .
## Agriculture:Examination 0.02137 0.01377 1.55 0.13091
## Agriculture:Education 0.01906 0.01523 1.25 0.22009
## Agriculture:Catholic 0.00263 0.00285 0.92 0.36387
## Agriculture:Infant.Mortality 0.06370 0.02981 2.14 0.04060 *
## Examination:Education 0.07517 0.03634 2.07 0.04703 *
## Examination:Catholic -0.00153 0.01079 -0.14 0.88791
## Examination:Infant.Mortality 0.17101 0.12907 1.33 0.19485
## Education:Catholic -0.00713 0.01018 -0.70 0.48865
## Education:Infant.Mortality 0.03359 0.12420 0.27 0.78863
## Catholic:Infant.Mortality 0.00992 0.01617 0.61 0.54409
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.47 on 31 degrees of freedom
## Multiple R-squared: 0.819, Adjusted R-squared: 0.731
## F-statistic: 9.35 on 15 and 31 DF, p-value: 1.08e-07
# plot regression line of interaction terms
sjp.lm.int(fit)
# plot smoothes regression line of interaction terms
sjp.lm.int(fit, smooth = "loess")
# plot linear regression line of interaction terms
sjp.lm.int(fit, smooth = "lm")
sjp.lm.ma
Plot model assumptions of lms
# fit linear model
fit <- lm(airquality$Ozone ~ airquality$Wind + airquality$Temp + airquality$Solar.R)
fit.updated <- sjp.lm.ma(fit)
##
## Removed 1 cases during 1 step(s).
## R-square/adj. R-square of original model: 0.605895 / 0.594845
## R-square/adj. R-square of updated model: 0.636945 / 0.626670
## Warning: position_stack requires constant width: output may be incorrect
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
sjp.pca
Plot PCA results
# randomly create data frame with 7 items, each consisting of 4 categories
likert_4 <- data.frame(sample(1:4, 500, replace = TRUE, prob = c(0.2, 0.3, 0.1,
0.4)), sample(1:4, 500, replace = TRUE, prob = c(0.5, 0.25, 0.15, 0.1)),
sample(1:4, 500, replace = TRUE, prob = c(0.4, 0.15, 0.25, 0.2)), sample(1:4,
500, replace = TRUE, prob = c(0.25, 0.1, 0.4, 0.25)), sample(1:4, 500,
replace = TRUE, prob = c(0.1, 0.4, 0.4, 0.1)), sample(1:4, 500, replace = TRUE),
sample(1:4, 500, replace = TRUE, prob = c(0.35, 0.25, 0.15, 0.25)))
# Create variable labels
colnames(likert_4) <- c("V1", "V2", "V3", "V4", "V5", "V6", "V7")
# plot results from PCA as square-tiled 'heatmap'
sjp.pca(likert_4)
## Using as id variables
## [1] "Following items have been removed:"
## [1] "none."
# manually compute PCS
pca <- prcomp(na.omit(likert_4), retx = TRUE, center = TRUE, scale. = TRUE)
# plot results from PCA as circles, including Eigenvalue-diagnostic. note
# that this plot does not compute the Cronbach's Alpha
sjp.pca(pca, plotEigenvalues = TRUE, type = "circle")
## [1] "--------------------------------------------"
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.09 1.046 1.038 0.991 0.988 0.927 0.908
## Proportion of Variance 0.17 0.156 0.154 0.140 0.139 0.123 0.118
## Cumulative Proportion 0.17 0.326 0.480 0.620 0.760 0.882 1.000
## [1] "Eigenvalues:"
## [1] 1.1894 1.0938 1.0765 0.9822 0.9762 0.8584 0.8236
## [1] "--------------------------------------------"
## [1] "Cronbach's Alpha can only be calculated when having a data frame with each component / variable as column"
## Using as id variables
## $loadings
##
## Loadings:
## [,1] [,2] [,3]
## V1 0.247 0.742
## V2 -0.480
## V3 0.360 -0.536 0.360
## V4 0.262 0.675
## V5 0.194 -0.499
## V6 0.690 -0.339
## V7 0.705 0.125 0.207
##
## [,1] [,2] [,3]
## SS loadings 1.176 1.093 1.091
## Proportion Var 0.168 0.156 0.156
## Cumulative Var 0.168 0.324 0.480
##
## $rotmat
## [,1] [,2] [,3]
## [1,] 0.9313 -0.1872 0.3124
## [2,] 0.3180 0.8361 -0.4470
## [3,] -0.1775 0.5157 0.8382
# ------------------------------- Data from the EUROFAMCARE sample dataset
# -------------------------------
data(efc)
# retrieve variable and value labels
varlabs <- sji.getVariableLabels(efc)
# recveive first item of COPE-index scale
start <- which(colnames(efc) == "c82cop1")
# recveive first item of COPE-index scale
end <- which(colnames(efc) == "c90cop9")
# create data frame with COPE-index scale
df <- as.data.frame(efc[, c(start:end)])
colnames(df) <- varlabs[c(start:end)]
sjp.pca(df)
## Using as id variables
## [1] "Following items have been removed:"
## [1] "none."
sjp.reglin
Plot regression lines for each predictor
data(efc)
fit <- lm(tot_sc_e ~ c12hour + e17age + e42dep, data = efc)
# reression line and scatter plot
sjp.reglin(fit, efc)
## Warning: Removed 6 rows containing missing values (stat_smooth).
## Warning: Removed 6 rows containing missing values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (stat_smooth).
## Warning: Removed 17 rows containing missing values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing missing values (stat_smooth).
## Warning: Removed 7 rows containing missing values (stat_smooth).
## Warning: pseudoinverse used at 4.015
## Warning: neighborhood radius 2.015
## Warning: reciprocal condition number 1.676e-015
## Warning: There are other near singularities as well. 1
## Warning: Removed 7 rows containing missing values (geom_point).
# reression line w/o scatter plot
sjp.reglin(fit, efc, showScatterPlot = FALSE)
## Warning: Removed 6 rows containing missing values (stat_smooth).
## Warning: Removed 6 rows containing missing values (stat_smooth).
## Warning: Removed 17 rows containing missing values (stat_smooth).
## Warning: Removed 17 rows containing missing values (stat_smooth).
## Warning: Removed 7 rows containing missing values (stat_smooth).
## Warning: Removed 7 rows containing missing values (stat_smooth).
## Warning: pseudoinverse used at 4.015
## Warning: neighborhood radius 2.015
## Warning: reciprocal condition number 1.676e-015
## Warning: There are other near singularities as well. 1
# reression line w/o CI
sjp.reglin(fit, efc, showCI = FALSE)
## Warning: Removed 6 rows containing missing values (stat_smooth).
## Warning: Removed 6 rows containing missing values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (stat_smooth).
## Warning: Removed 17 rows containing missing values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing missing values (stat_smooth).
## Warning: Removed 7 rows containing missing values (stat_smooth).
## Warning: pseudoinverse used at 4.015
## Warning: neighborhood radius 2.015
## Warning: reciprocal condition number 1.676e-015
## Warning: There are other near singularities as well. 1
## Warning: Removed 7 rows containing missing values (geom_point).
sjp.stackfrq
Plot stacked proportional bars
# ------------------------------- random sample
# ------------------------------- prepare data for 4-category likert scale,
# 5 items
likert_4 <- data.frame(as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.2,
0.3, 0.1, 0.4))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.5,
0.25, 0.15, 0.1))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.25,
0.1, 0.4, 0.25))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.1,
0.4, 0.4, 0.1))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.35,
0.25, 0.15, 0.25))))
# create labels
levels_4 <- list(c("Independent", "Slightly dependent", "Dependent", "Severely dependent"))
# create item labels
items <- list(c("Q1", "Q2", "Q3", "Q4", "Q5"))
# plot stacked frequencies of 5 (ordered) item-scales
sjp.stackfrq(likert_4, legendLabels = levels_4, axisLabels.y = items)
# ------------------------------- Data from the EUROFAMCARE sample dataset
# -------------------------------
data(efc)
# recveive first item of COPE-index scale
start <- which(colnames(efc) == "c82cop1")
# recveive first item of COPE-index scale
end <- which(colnames(efc) == "c90cop9")
# retrieve variable and value labels
varlabs <- sji.getVariableLabels(efc)
vallabs <- sji.getValueLabels(efc)
# create value labels. We need just one variable of the COPE-index scale
# because they have all the same level / categorie / value labels
levels <- vallabs["c82cop1"]
# create item labels
items <- list(varlabs[c(start:end)])
sjp.stackfrq(efc[, c(start:end)], legendLabels = levels, axisLabels.y = items)
sjp.xtab
Plot proportional crosstables
# create 4-category-items
x <- sample(1:4, 100, replace = TRUE)
# create 3-category-items
y <- sample(1:3, 100, replace = TRUE)
# plot 'cross tablulation' of x and y
sjp.xtab(y, x)
## ymax not defined: adjusting position using y instead
# plot 'cross tablulation' of x and y, including labels
sjp.xtab(y, x, axisLabels.x = c("low", "mid", "high"), legendLabels = c("Grp 1",
"Grp 2", "Grp 3", "Grp 4"))
## ymax not defined: adjusting position using y instead
# plot 'cross tablulation' of x and y as stacked proportional bars
sjp.xtab(y, x, tableIndex = "row", barPosition = "stack", flipCoordinates = TRUE)
# grouped bars with EUROFAMCARE sample dataset dataset was importet from an
# SPSS-file, using: efc <- sji.SPSS('efc.sav', enc='UTF-8')
data(efc)
efc.val <- sji.getValueLabels(efc)
efc.var <- sji.getVariableLabels(efc)
sjp.xtab(efc$e42dep, efc$e16sex, title = efc.var["e42dep"], axisLabels.x = efc.val[["e42dep"]],
legendTitle = efc.var["e16sex"], legendLabels = efc.val[["e16sex"]])
## ymax not defined: adjusting position using y instead
sjp.xtab(efc$e16sex, efc$e42dep, title = efc.var["e16sex"], axisLabels.x = efc.val[["e16sex"]],
legendTitle = efc.var["e42dep"], legendLabels = efc.val[["e42dep"]])
## ymax not defined: adjusting position using y instead
sjp.xtab(efc$e16sex, efc$e42dep, title = efc.var["e16sex"], axisLabels.x = efc.val[["e16sex"]],
legendTitle = efc.var["e42dep"], legendLabels = efc.val[["e42dep"]], tableIndex = "row",
barPosition = "stack", flipCoordinates = F)