GGplot is based on a grammar of graphics, which means . . .
The documentation provides details.
This cheatsheet is a handy reference guide
This blogpost has a lot of nice examples.
religion <- read.csv("religion-survey-results.csv")
#names(religion)
names(religion)[1] <- "relig"
names(religion)[5] <- "prayMotion"
Before we can use GGPlot2 to make plots, we need the library.
library(ggplot2)
First graphs
ggplot(data=religion, aes(x=relig)) + geom_bar()
ggplot(data=religion, aes(x=relig)) + geom_bar() + coord_flip()
religion <- subset(religion, relig %in% c("Roman Catholic", "Protestant", "Atheist","Agnostic"))
religion <- droplevels(religion)
ggplot(data=religion, aes(x=relig)) + geom_bar() + coord_flip()
ggplot(data=religion, aes(x=relig)) + geom_bar() + coord_flip()
ggplot(data=religion, aes(x=prayMotion,fill=prayMotion)) + geom_point(stat="count") + geom_bar(width=.02) + coord_flip()
ggplot(data=religion, aes(x=relig, fill=prayMotion)) + geom_bar(position="fill") + coord_flip()
ggplot(data=religion,aes(x=factor(1),fill=relig)) + geom_bar(position="fill")+coord_polar(theta="y")
toyData <- data.frame(c("C2","C2","C1","C1","C1","C1"))
names(toyData) <- "Cat"
ggplot(data=toyData,aes(x=Cat,fill=Cat))+geom_bar(width=1)+coord_polar()+scale_y_sqrt()
ggplot(data=religion,aes(x=relig,fill=relig))+geom_bar(width=1,stat="count")+scale_y_sqrt()+coord_polar()
Some additional data processing and graphs not presented in class:
names(religion)[9]<-"offerPrayer"
names(religion)[19]<-"comfOfferPrayer"
g<-ggplot(religion, aes(relig,fill=offerPrayer))+geom_bar()+coord_flip()
g+theme_bw()
g+theme_light()
g+theme_dark()
g+theme_minimal()
g+theme_classic()
g+theme_linedraw()
ggplot(religion, aes(relig,fill=offerPrayer))+geom_bar(position="fill")+coord_flip()
religion <- subset(religion,offerPrayer != "")
ggplot(religion, aes(relig,fill=offerPrayer))+geom_bar(position="fill")+coord_flip()
ggplot(religion, aes(relig,fill=offerPrayer))+geom_bar(position="fill")+coord_flip()
ggplot(religion, aes(relig,fill=comfOfferPrayer))+geom_bar(position="fill")+coord_flip()
This plot, showing the comfort level of those offering to pray with others is not very informative, because a large proportion of respondents say they don’t do this. Let’s restrict our attention to those who do.
religion <- subset(religion, comfOfferPrayer != "" & comfOfferPrayer != "I don't do this")
religion <- droplevels(religion)
ggplot(religion, aes(relig,fill=comfOfferPrayer))+geom_bar(position="fill")+coord_flip()
The order of these labels doesn’t make sense. Let’s change it so that it follows a logical order (instead of the order that R encountered while reading the data). Before we change the labels, let’s look at the data.
levels(religion$comfOfferPrayer)
## [1] "Extremely comfortable" "Not at all comfortable"
## [3] "Not so comfortable" "Somewhat comfortably"
## [5] "Very comfortable"
table(religion$comfOfferPrayer)
##
## Extremely comfortable Not at all comfortable Not so comfortable
## 83 16 43
## Somewhat comfortably Very comfortable
## 65 84
Now, let’s change the labels so they go from highest to lowest level of comfort.
religion$comfOfferPrayer <- factor(religion$comfOfferPrayer, levels=c("Extremely comfortable","Very comfortable", "Somewhat comfortably", "Not so comfortable", "Not at all comfortable"))
After changing the labels, let’s look at the data again, to confirm that we only re-ordered the labels, without changing the underlying data.
table(religion$comfOfferPrayer)
##
## Extremely comfortable Very comfortable Somewhat comfortably
## 83 84 65
## Not so comfortable Not at all comfortable
## 43 16
Looks good. Let’s graph.
ggplot(religion, aes(relig,fill=comfOfferPrayer))+geom_bar(position="fill")+coord_flip()
With the revised labels, it is much easier to see what proportion of each religious group are at least somewhat comfortable with offering a prayer.
ggplot(religion, aes(comfOfferPrayer,fill=relig))+geom_bar()+coord_flip()
ggplot(religion, aes(comfOfferPrayer,fill=relig))+geom_bar(position="identity",alpha=.5)+coord_flip()
ggplot(religion, aes(comfOfferPrayer,fill=relig))+geom_bar(position="dodge")+coord_flip()
ggplot(religion, aes(comfOfferPrayer,fill=relig))+geom_bar(position="fill")+coord_flip()
Capital Bikeshare data is taken from Capital Bikeshare’s system data portal.
bikes <- read.csv("bikes.csv")
summary(bikes)
## Year Month Station.Name
## Min. :2014 Apr : 317 10th & E St NW : 12
## 1st Qu.:2015 Aug : 317 10th & Florida Ave NW : 12
## Median :2015 Jul : 317 10th & Monroe St NE : 12
## Mean :2015 Jun : 317 10th & U St NW : 12
## 3rd Qu.:2015 Mar : 317 10th St & Constitution Ave NW: 12
## Max. :2015 May : 317 11th & F St NW : 12
## (Other):1894 (Other) :3724
## Municipality Departures Arrivals
## Alexandria, VA : 96 Min. : 0.0 Min. : 0.0
## Arlington, VA : 867 1st Qu.: 100.0 1st Qu.: 92.0
## Rest of Montgomery County: 336 Median : 397.0 Median : 338.0
## Rockville/Shady Grove : 240 Mean : 765.7 Mean : 762.9
## Washington, DC :2257 3rd Qu.:1086.0 3rd Qu.:1079.2
## Max. :9994.0 Max. :9990.0
##
names(bikes)
## [1] "Year" "Month" "Station.Name" "Municipality"
## [5] "Departures" "Arrivals"
ggplot(data=bikes, aes(x=Departures,fill=Month)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=bikes) + geom_histogram(aes(x=Departures),fill="green",alpha=.5) + geom_histogram(aes(x=Arrivals),fill="blue",alpha=.5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
all_states <- map_data("state")
dc <- subset(all_states, region %in% c("maryland", "district of columbia","virginia") )
ggplot()+geom_polygon(data=all_states, aes(x=long, y=lat, group=group),color="white",fill="black")+coord_map()
ggplot()+geom_polygon(data=dc, aes(x=long, y=lat, group=group),color="white",fill="black")+coord_map(xlim=c(-77.2,-76.9),ylim=c(38.8,39.05))
head(bikes$Station.Name)
## [1] Eads St & 15th St S 18th & Eads St.
## [3] 20th & Crystal Dr 15th & Crystal Dr
## [5] S Joyce & Army Navy Dr Crystal City Metro / 18th & Bell St
## 319 Levels: 10th & E St NW 10th & Florida Ave NW ... Wisconsin Ave & O St NW
StationNames <- levels(bikes$Station.Name)
StationAddresses <- paste(StationNames,", Washington, DC", sep="")
write.csv(StationAddresses,"stations.csv")
bikes$Station.Name <- as.factor(bikes$Station.Name)
#go to http://www.findlatitudeandlongitude.com/batch-geocode/ to convert addresses into coordinates
#save results as stationLatLong.csv
coordData <- read.csv("stationLatLong.csv")
coordData$original.address <- StationNames
names(coordData)[1] <- names(bikes)[3]
mapdata <- merge(bikes,coordData, by="Station.Name")
mapdata <- subset(mapdata,!is.na(longitude))
mapdata <- subset(mapdata, Year == 2014 & Month == "Dec")
longLim <- c(min(mapdata$longitude)-.01,max(mapdata$longitude)+.01)
latLim <- c(min(mapdata$latitude)-.01,max(mapdata$latitude)+.01)
p <- ggplot()
p <- p + geom_polygon( data=dc, aes(x=long, y=lat, group = group),colour="white", fill="grey10" )
p <- p + geom_point(data=mapdata, aes(x=longitude, y=latitude,color=Arrivals))
p <- p + coord_map(xlim=longLim, ylim=latLim)
p <- p + scale_color_gradient(low="lightblue", high="red")
p
We could restrict our attention to DC stations.
mapdata <- merge(bikes,coordData, by="Station.Name")
mapdata <- subset(mapdata,!is.na(longitude))
mapdata <- subset(mapdata, Year == 2014 & Month == "Dec" & Municipality == "Washington, DC")
longLim <- c(min(mapdata$longitude),max(mapdata$longitude))
latLim <- c(min(mapdata$latitude),max(mapdata$latitude))
p <- ggplot()
p <- p + geom_polygon( data=dc, aes(x=long, y=lat, group = group),colour="white", fill="grey65" )
p <- p + geom_point(data=mapdata, aes(x=longitude, y=latitude,color=Arrivals))
p <- p + coord_map(xlim=longLim, ylim=latLim)
p <- p + scale_color_gradient(low="lightblue", high="red")
p