Libraries

# Install and load required packages
required_packages <- c("readxl", "dplyr", "ggplot2", "gridExtra")

install_if_missing <- function(p) {
  if (!requireNamespace(p, quietly = TRUE)) {
    install.packages(p)
  }
}

invisible(sapply(required_packages, install_if_missing))

# Load the required packages
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(grid)

# Set knitr options
knitr::opts_chunk$set(echo = TRUE)

R Markdown

# Set your file path to the location where Sampledata2.xlsx is stored
file_path <- "~/Documents/LABSCloud/Sampledata2.xlsx"

# Check if the file exists and load the data
if (file.exists(file_path)) {
  raw <- read_excel(file_path)
  
  # Inspect the structure of the dataset
  
  #str(raw)
  

} else {
  stop("File not found. Please check the file path.")
}

Histogram Plot

df <- subset(raw, Year > 2012)
df$RangeGroup <- dplyr::case_when(
  df$CrimeRate < 250 ~ "CrimeRate < 250",
  df$CrimeRate >= 250 & df$CrimeRate <= 500 ~ "250 <= CrimeRate <= 500",
  df$CrimeRate > 500 ~ "CrimeRate > 500"
)
# summary(df)

# Plot a histogram of the variable “CrimeRate” and fill with the variable “Year”

no_yrs <- length(unique(df[['Year']]))
col1 <- colorspace::diverge_hcl(n = no_yrs)

ggplot(df, aes(x = CrimeRate, fill = as.factor(Year))) +
  geom_histogram(position = "dodge", binwidth = 100,color="black",alpha=0.7) +
  scale_fill_manual(values = col1) +
  labs(title = "Crime Rate Histogram Plots",
       x = "Crime Rate per 100, 1000 people",
       y = "Count",
       fill = "Year")

# Scatter Plot

# Group by Year and summarize the CrimeRate
df_new <- raw %>%
  group_by(Year) %>%
  summarize(
    mean_rate = mean(CrimeRate, na.rm = TRUE),
    max_rate = max(CrimeRate, na.rm = TRUE),
    min_rate = min(CrimeRate, na.rm = TRUE)
  )

# Display the summarized data
# df_new

# Plot the scatter plot with line and point
ggplot(df_new, aes(x = Year, y = mean_rate)) +
  geom_point(color = "blue", size = 1.0) +
  geom_line(color = "blue", size = 0.5) +
  labs(title = "Mean Crime Rate by Year",
       x = "Year",
       y = "Mean Crime Rate per 100, 1000 people in USA") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Boxplot

  boxplot_plot <- ggplot(raw, aes(x = factor(Year), y = CrimeRate, fill = factor(Year))) +
   geom_boxplot(outlier.colour = "red", outlier.shape = 8, outlier.size = 2) +
   labs(title = "Boxplot of CrimeRate by Year",
        x = "Year",
        y = "CrimeRate",
        fill = "Year") +
   theme_minimal()

  boxplot_plot

# Arrange the three plots in one page

  p1 <- ggplot(df, aes(x = CrimeRate, fill = as.factor(Year))) +
          geom_histogram(position = "dodge", binwidth = 100,color="black",alpha=0.7) +
          scale_fill_manual(values = col1) +
          labs(title = "Crime Rate Histogram Plots",
             x = "Crime Rate per 100, 1000 people",
             y = "Count",
             fill = "Year")
  p2 <- ggplot(df_new, aes(x = Year, y = mean_rate)) +
          geom_point(color = "blue", size = 1.0) +
          geom_line(color = "blue", size = 0.5) +
          labs(title = "Mean Crime Rate by Year",
            x = "Year",
            y = "Mean Crime Rate per 100, 1000 people in USA") +
          theme_minimal()
  
  p3 <- boxplot_plot <- ggplot(raw, aes(x = factor(Year), y = CrimeRate, fill = factor(Year))) +
          geom_boxplot(outlier.colour = "red", outlier.shape = 8, outlier.size = 2) +
          labs(title = "Boxplot of CrimeRate by Year",
            x = "Year",
            y = "CrimeRate",
            fill = "Year") +
          theme_minimal()
    
  grid.arrange(p1,p2,p3, ncol=1)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.