ANGGA-DONGOSA-ggplot2-tutorials-Part3-in-R.R

# Mindanao State University
# General Santos City
# Submitted by: Angga, Princess Joy & Davy D. Dongosa
# Visualizations in R
# Math 108

# Task is to Perform About Advance Visualizations in R

# Task 1: Correlation and Sub-sections
# The following plots help to examine how well correlated two variables are

# Scatterplot (most frequently used plot for data analysis to understand the nature of relationship between two variables)
# It can be drawn using geom_point()
# Additionally, geom_smooth which draws a smoothing line (based on loess) by default, can be tweaked to draw the line of best fit by setting method='lm'
# Steps to make a scatterplot

# install.packages("ggplot2")
# load package and data
options(scipen=999)  # turn-off scientific notation like 1e+48

library(ggplot2)
theme_set(theme_bw())  # pre-set the bw theme.
data("midwest", package = "ggplot2")
# midwest <- read.csv("http://goo.gl/G1K41K")  # bkup data source

# Scatterplot
gg <- ggplot(midwest, aes(x=area, y=poptotal)) + 
  geom_point(aes(col=state, size=popdensity)) + 
  geom_smooth(method="loess", se=F) + 
  xlim(c(0, 0.1)) + 
  ylim(c(0, 500000)) + 
  labs(subtitle="Area Vs Population", 
       y="Population", 
       x="Area", 
       title="Scatterplot", 
       caption = "Source: midwest")

plot(gg)

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 15 rows containing non-finite values (`stat_smooth()`).

## Warning: Removed 15 rows containing missing values (`geom_point()`).

# Scatterplot With Encircling (to encirlce certain special group of points or region in the chart so as to draw the attention to those peculiar cases, when presenting the results)
# This can be conveniently done using the geom_encircle() in ggalt package
# Within geom_encircle(), set the data to a new dataframe that contains only the points (rows) or interest
# Moreover, You can expand the curve so as to pass just outside the points
# The color and size (thickness) of the curve can be modified as well
# Steps to make a scatterplot with encircling

# install 'ggalt' pkg
# devtools::install_github("hrbrmstr/ggalt")
options(scipen = 999)
library(ggplot2)
# install.packages("ggalt")
library(ggalt)

## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2

midwest_select <- midwest[midwest$poptotal > 350000 & 
                            midwest$poptotal <= 500000 & 
                            midwest$area > 0.01 & 
                            midwest$area < 0.1, ]

# Plot
ggplot(midwest, aes(x=area, y=poptotal)) + 
  geom_point(aes(col=state, size=popdensity)) +   # draw points
  geom_smooth(method="loess", se=F) + 
  xlim(c(0, 0.1)) + 
  ylim(c(0, 500000)) +   # draw smoothing line
  geom_encircle(aes(x=area, y=poptotal), 
                data=midwest_select, 
                color="red", 
                size=2, 
                expand=0.08) +   # encircle
  labs(subtitle="Area Vs Population", 
       y="Population", 
       x="Area", 
       title="Scatterplot + Encircle", 
       caption="Source: midwest")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 15 rows containing non-finite values (`stat_smooth()`).
## Removed 15 rows containing missing values (`geom_point()`).

# Jitter Plot
# use the mpg dataset to plot city mileage (cty) vs highway mileage (hwy)
# Steps to make the plot

# load package and data
library(ggplot2)



data(mpg, package="ggplot2") # alternate source: "http://goo.gl/uEeRGu")
theme_set(theme_bw())  # pre-set the bw theme.

g <- ggplot(mpg, aes(cty, hwy))

# Scatterplot
g + geom_point() + 
  geom_smooth(method="lm", se=F) +
  labs(subtitle="mpg: city vs highway mileage", 
       y="hwy", 
       x="cty", 
       title="Scatterplot with overlapping points", 
       caption="Source: midwest")

## `geom_smooth()` using formula = 'y ~ x'

# What we have here is a scatterplot of city and highway mileage in mpg dataset
# Scatterplot with overlapping points
# But, this innocent looking plot is hiding something
# The original data has 234 data points but the chart seems to display fewer points
# What has happened? This is because there are many overlapping points appearing as a single dot
# The fact that both cty and hwy are integers in the source dataset made it all the more convenient to hide this detail
# So just be extra careful the next time you make scatterplot with integers
# So how to handle this? There are few options
# We can make a jitter plot with jitter_geom()
# As the name suggests, the overlapping points are randomly jittered around its original position based on a threshold controlled by the width argument
# Steps to make jitter plot

# load package and data
library(ggplot2)
data(mpg, package="ggplot2")
# mpg <- read.csv("http://goo.gl/uEeRGu")

# Scatterplot
theme_set(theme_bw())  # pre-set the bw theme.
g <- ggplot(mpg, aes(cty, hwy))
g + geom_jitter(width = .5, size=1) +
  labs(subtitle="mpg: city vs highway mileage", 
       y="hwy", 
       x="cty", 
       title="Jittered Points")

# The second option to overcome the problem of data points overlap is to use what is called a counts chart
# Whereever there is more points overlap, the size of the circle gets bigger
# Steps to make a counts chart

# load package and data
library(ggplot2)
data(mpg, package="ggplot2")
# mpg <- read.csv("http://goo.gl/uEeRGu")

# Scatterplot
theme_set(theme_bw())  # pre-set the bw theme.
g <- ggplot(mpg, aes(cty, hwy))

g + geom_count(col="tomato3", show.legend=F) +
  labs(subtitle="mpg: city vs highway mileage", 
       y="hwy", 
       x="cty", 
       title="Jittered Points")

ANGGA-DONGOSA-ggplot2-tutorials-Part3-in-R.R

r1947346

2023-05-11