install.packages(“mosaicData”) # library(mosaicData)
#Calling the CPS85 dataset from the mosaicData package data(CPS85, package=“mosaicData”) View(CPS85) ?CPS85 dim(CPS85)
#Using ggplot2 for data visualization
library(ggplot2) ggplot(data = CPS85, mapping = aes(x = exper, y = wage))
ggplot(data = CPS85, mapping = aes(x = exper, y = wage)) + geom_point()
library(dplyr) plotdata <- filter(CPS85, wage < 40) dim(plotdata) ggplot(data = plotdata, mapping = aes(x = exper, y = wage)) + geom_point()
ggplot(data = plotdata, mapping = aes(x = exper, y = wage)) + geom_point(color = “blue”, alpha = .6, size = 2, pch=18)
ggplot(data = plotdata, mapping = aes(x = exper, y = wage)) + geom_point(color = “cornflowerblue”, alpha = .7, size = 3, pch = 8) + geom_smooth(method = “lm”, lty=1, color = “red”) #lm is linear model and lty is line type
#grouping ggplot(data = plotdata, mapping = aes(x = exper, y = wage, color = sex)) + geom_point(alpha = .7, size = 3) + geom_smooth(method = “lm”, se = TRUE, size = 1.5)
#Scaling library(scales) ggplot(data = plotdata, mapping = aes(x = exper, y = wage, color = sex)) + geom_point(alpha = .7, size = 3) + geom_smooth(method = “lm”, se = FALSE, size = 1.5) + scale_x_continuous(breaks = seq(0, 60, 10)) + scale_y_continuous(breaks = seq(0, 30, 5), label = scales::dollar) + scale_color_manual(values = c(“indianred3”, “cornflowerblue”))
ggplot(data = plotdata, mapping = aes(x = exper, y = wage, color = sex)) + geom_point(alpha = .7) + geom_smooth(method = “lm”, se = FALSE) + scale_x_continuous(breaks = seq(0, 60, 10)) + scale_y_continuous(breaks = seq(0, 30, 5), label = scales::dollar) + scale_color_manual(values = c(“indianred3”, “cornflowerblue”)) + facet_wrap(~sector)
#Labels ggplot(data = plotdata, mapping = aes(x = exper, y = wage, color = sex)) + geom_point(alpha = .7) + geom_smooth(method = “lm”, se = FALSE) + scale_x_continuous(breaks = seq(0, 60, 10)) + scale_y_continuous(breaks = seq(0, 30, 5), label = scales::dollar) + scale_color_manual(values = c(“indianred3”, “cornflowerblue”)) + facet_wrap(~sector) + labs(title = “Relationship between wages and experience”, subtitle = “Current Population Survey”, caption = “source: http://mosaic-web.org/”, x = ” Years of Experience”, y = “Hourly Wage”, color = “Gender”)
#Themes ggplot(data = plotdata, mapping = aes(x = exper, y = wage, color = sex)) + geom_point(alpha = .7) + geom_smooth(method = “lm”, se = FALSE) + scale_x_continuous(breaks = seq(0, 60, 10)) + scale_y_continuous(breaks = seq(0, 30, 5), label = scales::dollar) + scale_color_manual(values = c(“indianred3”, “cornflowerblue”)) + facet_wrap(~sector) + labs(title = “Relationship between wages and experience”, subtitle = “Current Population Survey”, caption = “source: http://mosaic-web.org/”, x = ” Years of Experience”, y = “Hourly Wage”, color = “Gender”)+ theme_dark()
myplot <- ggplot(plotdata, aes(x = exper, y = wage, color = sex)) + geom_point(alpha = .7, size = 3) + geom_smooth(method = “lm”, formula = y ~ poly(x,2), se = FALSE, size = 1.5)
myplot
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.