Import library

# ggplot

suppressMessages(library(ggplot2))
suppressMessages(library(ggthemes))
suppressMessages(library(tidyverse))
suppressMessages(library(ggrepel))

How to find file path

# a = file.choose()
# a

crime = read.csv("/Users/thien/Desktop/R-dir/R studying/dataset/Crime dataset reduced.csv")

1.1 Historgram

hist(crime$police)

hist(crime$population)

Step by step

for variable population

# Step 1: With 3 key components (data, mapping, geom_)
ggplot(data = crime, mapping = aes(x = population)) + geom_histogram()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Step 2
ggplot(data = crime, mapping = aes(x = population)) + geom_histogram(fill = "blue")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Step 3
ggplot(data = crime, mapping = aes(x = population)) + geom_histogram(fill = "blue", col = "white")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Step 4
ggplot(data = crime, mapping = aes(x = population)) + 
  geom_histogram(fill = "blue", col = "white", aes(y = ..density..)) +
  geom_density(col = "red")

## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Step 5: Add main title for this graph
ggplot(data = crime, mapping = aes(x = population)) + 
  geom_histogram(fill = "blue", col = "white", aes(y = ..density..)) +
  geom_density(col = "red") +
  ggtitle("Distribution of population")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Step 6: Center alignment for the main title
ggplot(data = crime, mapping = aes(x = population)) + 
  geom_histogram(fill = "blue", col = "white", aes(y = ..density..)) +
  geom_density(col = "red") +
  ggtitle("Distribution of population") +
  theme(plot.title = element_text(hjust = 0.5))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Step 7: I would like to change to the economist theme
ggplot(data = crime, mapping = aes(x = population)) + 
  geom_histogram(fill = "blue", col = "white", aes(y = ..density..)) +
  geom_density(col = "red") +
  ggtitle("Distribution of population") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme_economist() # this theme from library(ggthemes)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

for variable police

ggplot(crime, aes(police)) + geom_histogram()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(crime, aes(police)) + geom_histogram(fill = "blue")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(crime, aes(police)) + geom_histogram(fill = "blue", col = "white")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = crime, aes(x = police)) + 
  geom_histogram(aes(y = ..density..), fill = "blue", col = "white", binwidth = 0.05) +
  geom_density(col = "red")

ggplot(data = crime, aes(x = police)) + 
  geom_histogram(aes(y = ..density..), fill = "blue", col = "white", binwidth = 0.05) +
  geom_density(col = "red") +
  ggtitle("Distribution of the number of police")

ggplot(data = crime, aes(x = police)) + 
  geom_histogram(aes(y = ..density..), fill = "blue", col = "white", binwidth = 0.05) +
  geom_density(col = "red") +
  ggtitle("Distribution of the number of police") +
  theme(plot.title = element_text(hjust = 0.5))

ggplot(data = crime, aes(x = police)) + 
  geom_histogram(aes(y = ..density..), fill = "blue", col = "white", binwidth = 0.05) +
  geom_density(col = "red") +
  ggtitle("Distribution of the number of police") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme_bw()

ggplot(data = crime, aes(x = police)) + 
  geom_histogram(aes(y = ..density..), fill = "blue", col = "white", binwidth = 0.05) +
  geom_density(col = "red") +
  ggtitle("Distribution of the number of police") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme_economist() # I need the economist theme in the "ggthemes" library

Compare police frequencies between Fresno and Los Angeles

two_cities = crime %>% select(city, police, robbery) %>% filter(city %in% c("fresno", "losangel"))

ggplot(two_cities, aes(y = police, col = city)) + geom_boxplot() +
  theme_economist()

Compare robbery frequencies between Fresno and Los Angeles

ggplot(two_cities, aes(y = robbery, col = city)) + geom_boxplot() +
  theme_economist()

Examine the relationship between robbery and police

For 5 cities

ggplot(crime, aes(x = police, y = robbery, col = city)) + geom_point()

ggplot(crime, aes(x = police, y = robbery, col = city)) + geom_point() +
  geom_smooth(method = "lm")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(crime, aes(x = police, y = robbery, col = city)) + geom_point() +
  geom_smooth(method = "loess")

## `geom_smooth()` using formula = 'y ~ x'

Only for Los Angeles city

los = crime %>% filter(city == "losangel")

ggplot(los, aes(x = police, y = robbery, col = city)) + geom_point() +
  geom_smooth(method = "lm")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(los, aes(x = police, y = robbery, col = city)) + geom_point() +
  geom_smooth(method = "loess")

## `geom_smooth()` using formula = 'y ~ x'

Only in 1992

onlyin92 = crime %>% filter(year == 92)

ggplot(onlyin92, aes(x = police, y = robbery, label = city)) + geom_point() +
  geom_smooth(method = "lm", se = F) +
  geom_text_repel(aes(col=city)) + theme_economist()

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Basic 2 - ggplot2

Vu Thien

2023-04-06

Import library

How to find file path

1.1 Historgram

Step by step

for variable population

for variable police

Compare police frequencies between Fresno and Los Angeles

Compare robbery frequencies between Fresno and Los Angeles

Examine the relationship between robbery and police

For 5 cities

Only for Los Angeles city

Only in 1992