library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.4.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(crimedata)

## Warning: package 'crimedata' was built under R version 4.4.3

get_crime_data(cities = "Seattle")

subset the dataframe to include all rows for specific columns

library(readr)
library(readxl)

Seattle <- read_excel("C:/Users/ku26/OneDrive - Drexel University/Documents/310Wi26/Seattle/Seattle.xlsx")

Seattle_households <- read_excel("C:/Users/ku26/OneDrive - Drexel University/Documents/310Wi26/Seattle/2010_Census_Tract_Seattle_-_Household_Statistics.xlsx")

Seattle_2010_households <- Seattle_households[, c("GEOID10", "Total_Population", "Total_Households", "Family_households", "Nonfamily_households")]

rename the columns

Seattle_2010_households <- Seattle_2010_households %>%
  rename(census_block = GEOID10, total_pop = Total_Population, total_households = Total_Households, total_family = Family_households, total_nonfam = Nonfamily_households)

delete one complete row if needed

# not application here Seattle_2010_households <- Seattle_2010_households[-1, ]

remove last four digits from the entries in one column

Seattle$census_block <- str_sub(Seattle$census_block, end = -5)

Seattle$census_block <- as.numeric(Seattle$census_block)

merge on block_group

Seattle_crime_households <- full_join(Seattle_2010_households, Seattle, by = "census_block")

str(Seattle)

## tibble [690 × 12] (S3: tbl_df/tbl/data.frame)
##  $ uid            : num [1:690] 27344461 27344544 27344564 27345105 27345138 ...
##  $ city_name      : chr [1:690] "Seattle" "Seattle" "Seattle" "Seattle" ...
##  $ offense_code   : chr [1:690] "90D" "22U" "23F" "13C" ...
##  $ offense_type   : chr [1:690] "driving under the influence" "burglary/breaking & entering" "theft from motor vehicle (except theft of motor vehicle parts or accessories)" "intimidation" ...
##  $ offense_group  : chr [1:690] "driving under the influence" "burglary/breaking & entering" "larceny/theft offenses" "assault offenses" ...
##  $ offense_against: chr [1:690] "society" "property" "property" "persons" ...
##  $ date_single    : POSIXct[1:690], format: "2022-01-01 11:48:00" "2022-01-01 21:32:00" ...
##  $ date_start     : POSIXct[1:690], format: "2022-01-01 11:00:00" "2022-01-01 21:30:00" ...
##  $ date_end       : POSIXct[1:690], format: "2022-01-01 12:36:00" "2022-01-01 21:35:00" ...
##  $ longitude      : num [1:690] -122 -122 -122 -122 -122 ...
##  $ latitude       : num [1:690] 47.6 47.6 47.7 47.6 47.6 ...
##  $ census_block   : num [1:690] 5303300 5303300 5303300 5303300 5303300 ...

define breaks for total population

breaks <- c(0,1000, 2000, 3000, 4000, 5000, Inf)
labels <- c("Small", "Kindof Small", "Medium", "kindof Medium", "Large", "Larger")

Seattle_crime_households$total_pop <- as.numeric(Seattle_crime_households$total_pop)

Seattle_crime_households <- Seattle_crime_households %>%
  mutate(population_category = cut(total_pop, breaks = breaks, labels = labels, include.lowest = TRUE))

summary(Seattle_crime_households)

##   census_block         total_pop    total_households  total_family   
##  Min.   :5.303e+06   Min.   :   0   Min.   :   0     Min.   :   0.0  
##  1st Qu.:5.303e+06   1st Qu.:3500   1st Qu.:1481     1st Qu.: 617.5  
##  Median :5.303e+06   Median :4476   Median :2047     Median : 899.0  
##  Mean   :8.683e+09   Mean   :4509   Mean   :2100     Mean   : 901.4  
##  3rd Qu.:5.303e+06   3rd Qu.:5612   3rd Qu.:2629     3rd Qu.:1117.5  
##  Max.   :5.303e+10   Max.   :7789   Max.   :4878     Max.   :2136.0  
##                      NA's   :690    NA's   :690      NA's   :690     
##   total_nonfam       uid            city_name         offense_code      
##  Min.   :   0   Min.   :27344461   Length:825         Length:825        
##  1st Qu.: 617   1st Qu.:27360594   Class :character   Class :character  
##  Median :1019   Median :27378578   Mode  :character   Mode  :character  
##  Mean   :1199   Mean   :27378709                                        
##  3rd Qu.:1582   3rd Qu.:27396509                                        
##  Max.   :3992   Max.   :27413244                                        
##  NA's   :690    NA's   :135                                             
##  offense_type       offense_group      offense_against   
##  Length:825         Length:825         Length:825        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##   date_single                       date_start                    
##  Min.   :2022-01-01 11:48:00.00   Min.   :2021-09-13 16:00:00.00  
##  1st Qu.:2022-03-22 11:59:00.00   1st Qu.:2022-03-20 09:57:30.00  
##  Median :2022-06-26 13:00:00.00   Median :2022-06-24 14:45:00.00  
##  Mean   :2022-06-28 00:37:50.98   Mean   :2022-06-26 10:15:58.47  
##  3rd Qu.:2022-09-28 05:37:00.00   3rd Qu.:2022-09-27 18:33:45.00  
##  Max.   :2022-12-31 19:08:00.00   Max.   :2022-12-31 19:08:00.00  
##  NA's   :160                      NA's   :197                     
##     date_end                        longitude         latitude    
##  Min.   :2022-01-01 12:36:00.00   Min.   :-122.4   Min.   :47.50  
##  1st Qu.:2022-03-23 00:09:00.00   1st Qu.:-122.3   1st Qu.:47.59  
##  Median :2022-06-26 07:00:30.00   Median :-122.3   Median :47.62  
##  Mean   :2022-06-29 17:52:35.69   Mean   :-122.3   Mean   :47.62  
##  3rd Qu.:2022-09-30 19:22:30.00   3rd Qu.:-122.3   3rd Qu.:47.66  
##  Max.   :2023-07-09 16:08:00.00   Max.   :-122.3   Max.   :47.73  
##  NA's   :171                      NA's   :135      NA's   :135    
##     population_category
##  Small        :  3     
##  Kindof Small :  2     
##  Medium       : 18     
##  kindof Medium: 26     
##  Large        : 38     
##  Larger       : 48     
##  NA's         :690

create visualization for offense types by population

Seattle_crime_households %>%
  ggplot(aes(x = population_category,  fill = offense_against)) +
  geom_bar(position = "dodge")

get rid of NA

clean_crime <- Seattle_crime_households %>% drop_na()

clean_crime %>%
  ggplot(aes(x = population_category,  fill = offense_against)) +
  geom_bar(position = "dodge")

note what happens

try this

clean_crime_1 <- Seattle_crime_households %>% drop_na(offense_against)

clean_crime_1 %>%
  ggplot(aes(x = population_category,  fill = offense_against)) +
  geom_bar(position = "dodge")

now this

clean_crime_2 <- clean_crime_1 %>% drop_na(population_category)

clean_crime_2 %>%
  ggplot(aes(x = population_category,  fill = offense_against)) +
  geom_bar(position = "dodge")

Merging

CJS 310

2026-02-24

subset the dataframe to include all rows for specific columns

rename the columns

delete one complete row if needed

remove last four digits from the entries in one column

merge on block_group

define breaks for total population

create visualization for offense types by population

get rid of NA

note what happens

try this

now this