library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(crimedata)
## Warning: package 'crimedata' was built under R version 4.4.3
get_crime_data(cities = "Seattle")

subset the dataframe to include all rows for specific columns

library(readr)
library(readxl)
Seattle <- read_excel("C:/Users/ku26/OneDrive - Drexel University/Documents/310Wi26/Seattle/Seattle.xlsx")
Seattle_households <- read_excel("C:/Users/ku26/OneDrive - Drexel University/Documents/310Wi26/Seattle/2010_Census_Tract_Seattle_-_Household_Statistics.xlsx")
Seattle_2010_households <- Seattle_households[, c("GEOID10", "Total_Population", "Total_Households", "Family_households", "Nonfamily_households")]

rename the columns

Seattle_2010_households <- Seattle_2010_households %>%
  rename(census_block = GEOID10, total_pop = Total_Population, total_households = Total_Households, total_family = Family_households, total_nonfam = Nonfamily_households)

delete one complete row if needed

# not application here Seattle_2010_households <- Seattle_2010_households[-1, ]

remove last four digits from the entries in one column

Seattle$census_block <- str_sub(Seattle$census_block, end = -5)
Seattle$census_block <- as.numeric(Seattle$census_block)

merge on block_group

Seattle_crime_households <- full_join(Seattle_2010_households, Seattle, by = "census_block")
str(Seattle)
## tibble [690 × 12] (S3: tbl_df/tbl/data.frame)
##  $ uid            : num [1:690] 27344461 27344544 27344564 27345105 27345138 ...
##  $ city_name      : chr [1:690] "Seattle" "Seattle" "Seattle" "Seattle" ...
##  $ offense_code   : chr [1:690] "90D" "22U" "23F" "13C" ...
##  $ offense_type   : chr [1:690] "driving under the influence" "burglary/breaking & entering" "theft from motor vehicle (except theft of motor vehicle parts or accessories)" "intimidation" ...
##  $ offense_group  : chr [1:690] "driving under the influence" "burglary/breaking & entering" "larceny/theft offenses" "assault offenses" ...
##  $ offense_against: chr [1:690] "society" "property" "property" "persons" ...
##  $ date_single    : POSIXct[1:690], format: "2022-01-01 11:48:00" "2022-01-01 21:32:00" ...
##  $ date_start     : POSIXct[1:690], format: "2022-01-01 11:00:00" "2022-01-01 21:30:00" ...
##  $ date_end       : POSIXct[1:690], format: "2022-01-01 12:36:00" "2022-01-01 21:35:00" ...
##  $ longitude      : num [1:690] -122 -122 -122 -122 -122 ...
##  $ latitude       : num [1:690] 47.6 47.6 47.7 47.6 47.6 ...
##  $ census_block   : num [1:690] 5303300 5303300 5303300 5303300 5303300 ...

define breaks for total population

breaks <- c(0,1000, 2000, 3000, 4000, 5000, Inf)
labels <- c("Small", "Kindof Small", "Medium", "kindof Medium", "Large", "Larger")
Seattle_crime_households$total_pop <- as.numeric(Seattle_crime_households$total_pop)
Seattle_crime_households <- Seattle_crime_households %>%
  mutate(population_category = cut(total_pop, breaks = breaks, labels = labels, include.lowest = TRUE))
summary(Seattle_crime_households)
##   census_block         total_pop    total_households  total_family   
##  Min.   :5.303e+06   Min.   :   0   Min.   :   0     Min.   :   0.0  
##  1st Qu.:5.303e+06   1st Qu.:3500   1st Qu.:1481     1st Qu.: 617.5  
##  Median :5.303e+06   Median :4476   Median :2047     Median : 899.0  
##  Mean   :8.683e+09   Mean   :4509   Mean   :2100     Mean   : 901.4  
##  3rd Qu.:5.303e+06   3rd Qu.:5612   3rd Qu.:2629     3rd Qu.:1117.5  
##  Max.   :5.303e+10   Max.   :7789   Max.   :4878     Max.   :2136.0  
##                      NA's   :690    NA's   :690      NA's   :690     
##   total_nonfam       uid            city_name         offense_code      
##  Min.   :   0   Min.   :27344461   Length:825         Length:825        
##  1st Qu.: 617   1st Qu.:27360594   Class :character   Class :character  
##  Median :1019   Median :27378578   Mode  :character   Mode  :character  
##  Mean   :1199   Mean   :27378709                                        
##  3rd Qu.:1582   3rd Qu.:27396509                                        
##  Max.   :3992   Max.   :27413244                                        
##  NA's   :690    NA's   :135                                             
##  offense_type       offense_group      offense_against   
##  Length:825         Length:825         Length:825        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##   date_single                       date_start                    
##  Min.   :2022-01-01 11:48:00.00   Min.   :2021-09-13 16:00:00.00  
##  1st Qu.:2022-03-22 11:59:00.00   1st Qu.:2022-03-20 09:57:30.00  
##  Median :2022-06-26 13:00:00.00   Median :2022-06-24 14:45:00.00  
##  Mean   :2022-06-28 00:37:50.98   Mean   :2022-06-26 10:15:58.47  
##  3rd Qu.:2022-09-28 05:37:00.00   3rd Qu.:2022-09-27 18:33:45.00  
##  Max.   :2022-12-31 19:08:00.00   Max.   :2022-12-31 19:08:00.00  
##  NA's   :160                      NA's   :197                     
##     date_end                        longitude         latitude    
##  Min.   :2022-01-01 12:36:00.00   Min.   :-122.4   Min.   :47.50  
##  1st Qu.:2022-03-23 00:09:00.00   1st Qu.:-122.3   1st Qu.:47.59  
##  Median :2022-06-26 07:00:30.00   Median :-122.3   Median :47.62  
##  Mean   :2022-06-29 17:52:35.69   Mean   :-122.3   Mean   :47.62  
##  3rd Qu.:2022-09-30 19:22:30.00   3rd Qu.:-122.3   3rd Qu.:47.66  
##  Max.   :2023-07-09 16:08:00.00   Max.   :-122.3   Max.   :47.73  
##  NA's   :171                      NA's   :135      NA's   :135    
##     population_category
##  Small        :  3     
##  Kindof Small :  2     
##  Medium       : 18     
##  kindof Medium: 26     
##  Large        : 38     
##  Larger       : 48     
##  NA's         :690

create visualization for offense types by population

Seattle_crime_households %>%
  ggplot(aes(x = population_category,  fill = offense_against)) +
  geom_bar(position = "dodge")

get rid of NA

clean_crime <- Seattle_crime_households %>% drop_na()
clean_crime %>%
  ggplot(aes(x = population_category,  fill = offense_against)) +
  geom_bar(position = "dodge")

note what happens

try this

clean_crime_1 <- Seattle_crime_households %>% drop_na(offense_against)
clean_crime_1 %>%
  ggplot(aes(x = population_category,  fill = offense_against)) +
  geom_bar(position = "dodge")

now this

clean_crime_2 <- clean_crime_1 %>% drop_na(population_category)
clean_crime_2 %>%
  ggplot(aes(x = population_category,  fill = offense_against)) +
  geom_bar(position = "dodge")