The main diffence between this two function is that we use ggplot()
function when we plot data from a dataframe and qplot()
function when we plot data from other different object from dataframes (Vectors usually.)
qplot(
mtcars$cyl,
geom = "bar",
colour = I("#000000"),
fill = I("#000000"),
ylab = "Cylinders",
xlab = "Number of Vehicles",
main = "Vehicles by Cylinders"
) + theme_minimal()
the “identity” transformation will leave the data “as is”
bar_palette <- c("#1FAB89","#FF8080","#FFBA92","#C6F1D6")
class_count <- dplyr::count(mpg,class)
ggplot(class_count, aes(x = class, y = n)) +
geom_bar(stat = "identity", fill = "#1a3e59") +
theme_classic()
The geom_bar
by default uses a position adjustment of "stack"
, which makes each rectangle’s height proprotional to its value and stacks them on top of each other.
# bar chart of class, colored by drive (front, rear, 4-wheel)
ggplot(mpg, aes(x = class, fill = drv)) +
geom_bar() + theme_classic() +
scale_fill_manual(values = bar_palette)
# position = "dodge": values next to each other
ggplot(mpg, aes(x = class, fill = drv)) +
geom_bar(position = "dodge") + theme_classic() +
scale_fill_manual(values = bar_palette)
# position = "fill": percentage chart
ggplot(mpg, aes(x = class, fill = drv)) +
geom_bar(position = "fill") + theme_classic()+
scale_fill_manual(values = bar_palette) +
scale_y_continuous(breaks = seq(0, 1, by = .2), labels = scales::percent)
## Histograms
qplot(
mtcars$hp,
geom = "histogram",
binwidth = 25,
colour = I("black"),
xlim = c(50,350),
xlab = "Horse Power",
ylab = "Number of cars",
alpha = I(0),
main = "Histogram"
) + theme_classic()
## Warning: Removed 2 rows containing missing values (geom_bar).
Facets are ways of grouping a data plot into multiple different pieces (subplots)
## With more than one categorical variable
ggplot(mpg, aes(x = displ, y = hwy)) +
geom_point() +
facet_grid(year ~ cyl)
The pie chart graph isn’t included in the library, so first is needed create a stacked bar chart
my_palette2 <- c("#394a6d","#3c9d9b","#52de97", "#c0ffb3", "#8105d8",
"#e688a1", "#730068")
ggplot(mtcars, aes(x = 1, y = sort(mtcars$carb), fill =sort(mtcars$carb))) +
geom_bar(stat = "identity") + theme_classic() +coord_polar(theta = "x") +
theme(
axis.line = element_blank(),
axis.text.x = element_blank(),
panel.background = element_blank()
) + scale_color_manual(values = my_palette2)
ggplot(mtcars, aes(x = 1, y = sort(mtcars$carb), fill =sort(mtcars$carb))) +
geom_bar(stat = "identity") + theme_classic() +coord_polar(theta = "y")
coord_cartesian
the default cartesian coordinate system, where you specify x and y values.coord_flip
A cartesisn system with x and y flippedcoord_fixed
A cartesian system with a “fixed” aspect ratiocoord_polar
A plot using polar coordinatescoord_quickmap
a coordinate system that aproximates a good aspect ratio for maps.my_palette <- c("#512c96","#3c6f9c", "#dd6892", "#f9c6ba")
mtcars$cylFactor <- factor(mtcars$cyl)
best_in_class <- mpg %>%
group_by(class) %>%
filter(row_number(desc(hwy)) == 1)
# milage relationship, ordered in reverse
ggplot(mpg, aes(x = cty, y = hwy)) +
geom_point( color = "#E6A400") +
scale_x_reverse() +
scale_y_reverse() +
theme_classic()
## Scatterplot with factor variables
ggplot(mtcars, aes( x = mpg, y = wt,
shape = cylFactor, colour = cylFactor)) +
geom_point() + scale_color_manual( values = my_palette) +
labs(colour = "Cylinders")+
ggtitle("Scatterplot") +
theme_classic()
## Scatterplot with numeric vaariables
ggplot(mtcars, aes( x = mpg, y = wt,
shape = cylFactor, colour = cyl)) +
geom_point() + theme_classic()
# specifying color palette
ggplot(mpg, aes(x = displ, y = hwy, color = class, size = cty)) +
geom_point() +
scale_color_brewer(palette = "Set3") + theme_classic() +
labs(title = "Fuel Efficiency by Engine Power",
subtitle = "Fuel economy data from 1999 and 2008 for 38 popular models of cars",
x = "Engine power (litres displacement)",
y = "Fuel Efficiency (miles per gallon)",
color = "Car Type") +
geom_text_repel(data = best_in_class, aes(label = model))
euStockDF <- as_tibble(EuStockMarkets)
ggplot() +
geom_line(data = euStockDF, aes(x = c(1:nrow(euStockDF)), y = DAX),
size = 0.8, colour = my_palette2[3]) +
geom_line(data = euStockDF, aes(x = c(1:nrow(euStockDF)), y = FTSE),
size = 0.8, colour = my_palette2[2])+
geom_line(data = euStockDF, aes(x = c(1:nrow(euStockDF)), y = SMI),
size = 0.8, colour = my_palette2[6])+
geom_line(data = euStockDF, aes(x = c(1:nrow(euStockDF)), y = CAC),
size = 0.8, colour = my_palette2[5])+
labs(x = "Time", y = "Stocks") +
ggtitle("EU Stocks") + theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
## Linear regression
ggplot(mtcars, aes(x = mpg, y = wt, color = cylFactor))+
geom_point(shape = 19) +
scale_color_manual(values = my_palette2) +
geom_smooth(method = "lm", se = FALSE,
color = my_palette2[7]) +
ggtitle("Linear Regression") +
theme_minimal() +
xlab("Miles per Gallon") +
ylab("Weight")
## Gaussian regression
ggplot(mtcars, aes(x = mpg, y = wt, color = cylFactor))+
geom_point(shape = 19) +
scale_color_manual(values = my_palette2) +
geom_smooth(method = "auto", se = TRUE,
color = my_palette[3]) +
theme_classic() + ggtitle("Gaussian Regression") +
xlab("Miles per Gallon") +
ylab("Weight")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Stat summary
ggplot(mpg, aes(displ, hwy)) +
geom_point(color = "#7F78D2") +
stat_summary(fun.y = "mean", geom = "line", size = 0.5, linetype = "dashed") +
theme_minimal()
if(!dir.exists("./WordCloud/")) dir.create("./WordCloud/")
download.file("https://ibm.box.com/shared/static/cmid70rpa7xe4ocitcga1bve7r0kqnia.txt",destfile = "WordCloud/Churchill_speeches.txt", quiet = TRUE)
dirPath <- "./WordCloud"
speech <- Corpus(DirSource(dirPath))
#inspect(speech)
## convert to lower case the letters
speech <- tm_map(speech, content_transformer(tolower))
## Extract the numbers of the text
speech <- tm_map(speech, removeNumbers)
## Remove common stop words like'the' or 'we'
speech <- tm_map(speech, removeWords,
stopwords("english"))
## If we want to remove our own words...
speech <- tm_map(speech, removeWords,
c("floccinaucinihilipification","squirreled"))
## Remove punctuation
speech <- tm_map(speech, removePunctuation)
## Remove unnecessary whitespace
speech <- tm_map(speech, stripWhitespace)
## Create a Term document matrix
dtm <- TermDocumentMatrix(speech)
## Matrix Transformation
m <- as.matrix(dtm)
# Sort it to show the most frequent words
v <- sort(rowSums(m),decreasing = TRUE)
# Transform to dataframe
d <- tibble(word = names(v),
freq = v)
head(d, 10)
## # A tibble: 10 x 2
## word freq
## <chr> <dbl>
## 1 shall 11
## 2 fight 7
## 3 may 6
## 4 will 6
## 5 europe 5
## 6 upon 5
## 7 victory 5
## 8 war 5
## 9 can 4
## 10 many 4
## Wordcloud visualization
wordcloud(words = d$word,
freq = d$freq,
min.freq =1, max.words = 100,
colors = brewer.pal(6,"Dark2"),
random.order = FALSE)
waffle_palette <- c("#c7d4b6", "#a3aabd", "#a0d0de","#97b5cf")
expenses <- c(`Health ($43,212)` = 43212,
`Education ($113.412)` = 113412,
`Transportation ($20,231)` = 20231,
`Entertaiment ($28,145)` = 28145)
#IRkernel::set_plot_options(width = 950, height = 600, units = 'px')
waffle(expenses/1235, rows = 5, size = 0.3,
colors = waffle_palette, title = "Imaginary Household Expenses Each Year",
xlab = "1 square = $934")
set.seed(1234)
set_a <- rnorm(200, mean = 1, sd = 2)
set_b <- rnorm(200, mean = 0, sd = 1)
df <- tibble(label = factor(rep(c("A","B"), each = 200)),
value = c(set_a,set_b))
ggplot(df, aes(x = label,y=value)) + geom_boxplot() +
ggtitle("Boxplot") +theme_minimal()
## Times Square
map <- leaflet() %>% addTiles() %>%
addMarkers(lng = -73.9851, lat = 40.7589,
popup = "Times square")
map
## Map with different styles
eiffel_tower <- leaflet() %>% addProviderTiles("Stamen.Watercolor") %>%
addMarkers(lng = 2.2945, lat = 48.8584,
popup = "Eiffel tower")
eiffel_tower
### Maps with dataframes
quakes <- quakes
map_quakes <- leaflet(quakes) %>% addTiles() %>%
addCircleMarkers(lng = quakes$long, lat = quakes$lat)
map_quakes
### For improve the clarity of the map
clusterd_map_quakes <- leaflet(quakes) %>% addTiles() %>%
addMarkers(clusterOptions = markerClusterOptions())
## Assuming "long" and "lat" are longitude and latitude, respectively
ggvis
is a library that uses the Grammar of Graphics (similar to ggplot), but for interactive visualizations.
plotly
is a open-source library for developing interactive visualizations. It provides a number of “standard” interactions (pop-up labels, drag to pan, select to zoom, etc) automatically. Moreover, it is possible to take a ggplot2
plot and wrap it in Plotly in order to make it interactive. Plotly has many examples to learn from, though a less effective set of documentation.
htmlwidgets
provides a way to utilize a number of JavaScript interactive visualization libraries. JavaScript is the programming language used to create interactive websites (HTML files), and so is highly specialized for creating interactive experiences.