Import library

# ggplot

suppressMessages(library(ggplot2))
suppressMessages(library(ggthemes))
suppressMessages(library(tidyverse))
suppressMessages(library(ggrepel))

How to find file path

# a = file.choose()
# a
crime = read.csv("/Users/thien/Desktop/R-dir/R studying/dataset/Crime dataset reduced.csv")

1.1 Historgram

hist(crime$police)

hist(crime$population)

Step by step

for variable population

# Step 1: With 3 key components (data, mapping, geom_)
ggplot(data = crime, mapping = aes(x = population)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Step 2
ggplot(data = crime, mapping = aes(x = population)) + geom_histogram(fill = "blue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Step 3
ggplot(data = crime, mapping = aes(x = population)) + geom_histogram(fill = "blue", col = "white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Step 4
ggplot(data = crime, mapping = aes(x = population)) + 
  geom_histogram(fill = "blue", col = "white", aes(y = ..density..)) +
  geom_density(col = "red")
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Step 5: Add main title for this graph
ggplot(data = crime, mapping = aes(x = population)) + 
  geom_histogram(fill = "blue", col = "white", aes(y = ..density..)) +
  geom_density(col = "red") +
  ggtitle("Distribution of population")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Step 6: Center alignment for the main title
ggplot(data = crime, mapping = aes(x = population)) + 
  geom_histogram(fill = "blue", col = "white", aes(y = ..density..)) +
  geom_density(col = "red") +
  ggtitle("Distribution of population") +
  theme(plot.title = element_text(hjust = 0.5))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Step 7: I would like to change to the economist theme
ggplot(data = crime, mapping = aes(x = population)) + 
  geom_histogram(fill = "blue", col = "white", aes(y = ..density..)) +
  geom_density(col = "red") +
  ggtitle("Distribution of population") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme_economist() # this theme from library(ggthemes)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

for variable police

ggplot(crime, aes(police)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(crime, aes(police)) + geom_histogram(fill = "blue")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(crime, aes(police)) + geom_histogram(fill = "blue", col = "white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = crime, aes(x = police)) + 
  geom_histogram(aes(y = ..density..), fill = "blue", col = "white", binwidth = 0.05) +
  geom_density(col = "red")

ggplot(data = crime, aes(x = police)) + 
  geom_histogram(aes(y = ..density..), fill = "blue", col = "white", binwidth = 0.05) +
  geom_density(col = "red") +
  ggtitle("Distribution of the number of police")

ggplot(data = crime, aes(x = police)) + 
  geom_histogram(aes(y = ..density..), fill = "blue", col = "white", binwidth = 0.05) +
  geom_density(col = "red") +
  ggtitle("Distribution of the number of police") +
  theme(plot.title = element_text(hjust = 0.5))

ggplot(data = crime, aes(x = police)) + 
  geom_histogram(aes(y = ..density..), fill = "blue", col = "white", binwidth = 0.05) +
  geom_density(col = "red") +
  ggtitle("Distribution of the number of police") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme_bw()

ggplot(data = crime, aes(x = police)) + 
  geom_histogram(aes(y = ..density..), fill = "blue", col = "white", binwidth = 0.05) +
  geom_density(col = "red") +
  ggtitle("Distribution of the number of police") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme_economist() # I need the economist theme in the "ggthemes" library

Compare police frequencies between Fresno and Los Angeles

two_cities = crime %>% select(city, police, robbery) %>% filter(city %in% c("fresno", "losangel"))

ggplot(two_cities, aes(y = police, col = city)) + geom_boxplot() +
  theme_economist()

Compare robbery frequencies between Fresno and Los Angeles

ggplot(two_cities, aes(y = robbery, col = city)) + geom_boxplot() +
  theme_economist()

Examine the relationship between robbery and police

For 5 cities

ggplot(crime, aes(x = police, y = robbery, col = city)) + geom_point()

ggplot(crime, aes(x = police, y = robbery, col = city)) + geom_point() +
  geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(crime, aes(x = police, y = robbery, col = city)) + geom_point() +
  geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

Only for Los Angeles city

los = crime %>% filter(city == "losangel")

ggplot(los, aes(x = police, y = robbery, col = city)) + geom_point() +
  geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(los, aes(x = police, y = robbery, col = city)) + geom_point() +
  geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

Only in 1992

onlyin92 = crime %>% filter(year == 92)

ggplot(onlyin92, aes(x = police, y = robbery, label = city)) + geom_point() +
  geom_smooth(method = "lm", se = F) +
  geom_text_repel(aes(col=city)) + theme_economist()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?