Andrew Heiss — Jan 28, 2014, 11:16 PM
# Load ggplot (install it first if you need to)
library(ggplot2)
# Load built-in data
mtcars <- mtcars
movies <- movies
# Select 1000 of the movies
movies <- movies[sample(nrow(movies), 1000), ]
# If you want to use data from Stata, do the following:
# library(foreign) # Lets you load data from other programs
# read.dta(file.choose()) # read.dta() opens a .dta file. file.choose() opens a file dialog so you can navigate to your dta file.
# Alternatively you can put the actual path to the file, like so:
# read.dta("data.dta")
# Also, if your data is a csv or something, you don't need to use the foreign library and you can just do this:
# read.csv(file.choose())
# Univariate stuff
# Continuous data
# Histograms
p <- ggplot(data=movies, aes(x=rating))
p + geom_histogram() # default bins
stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
this.
Warning: position_stack requires constant width: output may be incorrect
p + geom_histogram(binwidth=2) # big bins
p + geom_histogram(binwidth=0.1) # small bins
Warning: position_stack requires constant width: output may be incorrect
# Density plots
p + geom_density(fill="darkgrey")
# Boxplot
m <- ggplot(data=movies, aes(y=rating, x=factor(0)))
m + geom_boxplot() + coord_flip()
# Bar charts (categorical)
c <- ggplot(mtcars, aes(factor(cyl)))
c + geom_bar()
# Bivariate stuff
# Continuous + continuous
p <- ggplot(mtcars, aes(wt, mpg))
p + geom_point(size=4)
# Add other variables
p + geom_point(aes(colour=factor(cyl), size = qsec))
# Continuous + categorical
p <- ggplot(mpg, aes(factor(cyl), hwy))
p + geom_point(size=4) # Overlaid dots
p + geom_point(size=4, position="jitter") # Jittered dots
p + geom_point(size=4, alpha=.2) # Transparent dots
# Violin plots
p <- ggplot(mpg, aes(x=factor(cyl), y=hwy, fill=factor(cyl)))
p + geom_violin(scale = "width")
# Add jittered dots for fun
p + geom_violin(scale = "width") + geom_point(size=2, position="jitter")
# Categorical stuff
mosaicplot(~ Sex + Survived, data = Titanic, color = TRUE)
mosaicplot(Titanic, color = TRUE)
# Coefficient plots
library(coefplot) # Install this package
model1 <- lm(mpg ~ wt + disp + hp + factor(cyl), data=mtcars)
summary(model1)
Call:
lm(formula = mpg ~ wt + disp + hp + factor(cyl), data = mtcars)
Residuals:
Min 1Q Median 3Q Max
-4.274 -1.035 -0.383 0.981 5.419
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 36.0024 2.1307 16.90 1.5e-15 ***
wt -3.4286 1.0555 -3.25 0.0032 **
disp 0.0042 0.0129 0.33 0.7477
hp -0.0235 0.0122 -1.93 0.0652 .
factor(cyl)6 -3.4660 1.4630 -2.37 0.0255 *
factor(cyl)8 -3.7532 2.8140 -1.33 0.1938
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.48 on 26 degrees of freedom
Multiple R-squared: 0.858, Adjusted R-squared: 0.83
F-statistic: 31.4 on 5 and 26 DF, p-value: 3.18e-10
coefplot(model1) + labs(title=NULL) + theme_bw()
# Scatterplot matrix
library(car)
scatterplotMatrix(~ mpg + wt + disp + hp | cyl, data=mtcars, main=NULL)