SETUP LIBRARIES

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(readr)

Read the CSV file into a data frame

library(readr)
bangladesh_crime <- read_csv("bangladesh crime.csv")
## Rows: 180 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Unit Name
## dbl (15): Year, Robbery, Murder, SpeedyTrial, Riot, WomanChildRepression, Ki...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(bangladesh_crime)

head(bangladesh_crime, 5)
## # A tibble: 5 × 16
##   `Unit Name`  Year Robbery Murder SpeedyTrial  Riot WomanChildRepression
##   <chr>       <dbl>   <dbl>  <dbl>       <dbl> <dbl>                <dbl>
## 1 DMP          2010     220    245         363     3                 1370
## 2 CMP          2010     108     94          31     7                  455
## 3 KMP          2010       9     29          25     0                  153
## 4 RMP          2010      20     21           9    15                  157
## 5 BMP          2010      12     19          21     0                  112
## # ℹ 9 more variables: Kidnapping <dbl>, PoliceAssault <dbl>, Burglary <dbl>,
## #   Theft <dbl>, OtherCases <dbl>, ArmsAct <dbl>, Explosive <dbl>,
## #   Narcotics <dbl>, Smuggling <dbl>

Clean the data set

# Check for NA values and remove rows with NA values
cleaned_data <- bangladesh_crime %>% na.omit()

# Remove rows with all 0's
cleaned_data <- cleaned_data %>% filter(!rowSums(. == 0, na.rm = TRUE) == ncol(bangladesh_crime))

# Display the first few rows of the cleaned dataset
head(cleaned_data)
## # A tibble: 6 × 16
##   `Unit Name`  Year Robbery Murder SpeedyTrial  Riot WomanChildRepression
##   <chr>       <dbl>   <dbl>  <dbl>       <dbl> <dbl>                <dbl>
## 1 DMP          2010     220    245         363     3                 1370
## 2 CMP          2010     108     94          31     7                  455
## 3 KMP          2010       9     29          25     0                  153
## 4 RMP          2010      20     21           9    15                  157
## 5 BMP          2010      12     19          21     0                  112
## 6 SMP          2010      33     33          34     1                  104
## # ℹ 9 more variables: Kidnapping <dbl>, PoliceAssault <dbl>, Burglary <dbl>,
## #   Theft <dbl>, OtherCases <dbl>, ArmsAct <dbl>, Explosive <dbl>,
## #   Narcotics <dbl>, Smuggling <dbl>

Units Name is not a clear value to understand. In Bangladesh those values refer to Districts. Changing the name and making them lower case will make it easy to interpret

# Rename the "Unit Name" column to "District"
cleaned_data <- cleaned_data %>% rename(District = `Unit Name`)

# Convert all column names to lowercase
names(cleaned_data) <- tolower(names(cleaned_data))

head(cleaned_data)
## # A tibble: 6 × 16
##   district  year robbery murder speedytrial  riot womanchildrepression
##   <chr>    <dbl>   <dbl>  <dbl>       <dbl> <dbl>                <dbl>
## 1 DMP       2010     220    245         363     3                 1370
## 2 CMP       2010     108     94          31     7                  455
## 3 KMP       2010       9     29          25     0                  153
## 4 RMP       2010      20     21           9    15                  157
## 5 BMP       2010      12     19          21     0                  112
## 6 SMP       2010      33     33          34     1                  104
## # ℹ 9 more variables: kidnapping <dbl>, policeassault <dbl>, burglary <dbl>,
## #   theft <dbl>, othercases <dbl>, armsact <dbl>, explosive <dbl>,
## #   narcotics <dbl>, smuggling <dbl>

Used the summarize(), group_by(), and sum() to find the highest and lowest crimes of areas

# Group the data by Year and calculate the total crime for each year
year_summary <- cleaned_data %>%
  group_by(year) %>%
  summarize(total_crime = sum(robbery + murder + speedytrial + riot + `womanchildrepression` +
                                kidnapping + policeassault + burglary + theft +
                                othercases + armsact + explosive + narcotics + smuggling))

# Find the year with the highest crime
highest_crime_year <- year_summary %>%
  filter(total_crime == max(total_crime))

# Find the year with the lowest crime
lowest_crime_year <- year_summary %>%
  filter(total_crime == min(total_crime))

# Group the data by District and calculate the total crime for each district
district_summary <- cleaned_data %>%
  group_by(district) %>%
  summarize(total_crime = sum(robbery + murder + speedytrial + riot + `womanchildrepression` +
                                kidnapping + policeassault + burglary + theft +
                                othercases + armsact + explosive + narcotics + smuggling))

# Find the district with the highest crime
highest_crime_district <- district_summary %>%
  filter(total_crime == max(total_crime))

# Find the district with the lowest crime
lowest_crime_district <- district_summary %>%
  filter(total_crime == min(total_crime))

# Print the results
cat("Year with Highest Crime:", highest_crime_year$year, "Total Crime:", highest_crime_year$total_crime, "\n")
## Year with Highest Crime: 2018 Total Crime: 221157
cat("Year with Lowest Crime:", lowest_crime_year$year, "Total Crime:", lowest_crime_year$total_crime, "\n")
## Year with Lowest Crime: 2019 Total Crime: 17452
cat("District with Highest Crime:", highest_crime_district$district, "Total Crime:", highest_crime_district$total_crime, "\n")
## District with Highest Crime: Dhaka Range Total Crime: 362815
cat("District with Lowest Crime:", lowest_crime_district$district, "Total Crime:", lowest_crime_district$total_crime, "\n")
## District with Lowest Crime: ATU Total Crime: 0

Now I grouped by district and year for total crimes into a plot using ggplot2 to visualize which areas overtime had become more dnagerous overtime

# Group the data by Year and District and calculate the total crime for each combination
district_year_summary <- cleaned_data %>%
  group_by(year, district) %>%
  summarize(total_crime = sum(robbery, murder, speedytrial, riot, `womanchildrepression`,
                              kidnapping, policeassault, burglary, theft,
                              othercases, armsact, explosive, narcotics, smuggling))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# Create the ggplot2 plot
ggplot(district_year_summary, aes(x = year, y = total_crime, fill = district)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Total Crimes Per District by Year",
       x = "Year",
       y = "Total Crimes",
       fill = "District") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_brewer(palette = "Set3")
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set3 is 12
## Returning the palette you asked for with that many colors