Hello! I’m a fourth year operation management student at Lindner College of Business. I’m born and raised in Cincinnati, Ohio. I love being outside, playing in a pool, walking in the park and kayaking in the little miami river.
Currently, I am warehouse associate and bakery sorter at Manufacturing Inc.. I’ve been there 3 years, rising through the ranks of warehousing industry at Graeters’ Ice Cream. My day-to-day responsibilities include:
I have nearly zero experience with R. I’m taking a data mining class that is exposing me to all characteristics and uses of R. Excel is my only exposure to analytic programs.
library(readr)
df<- readr::read_csv("data/blood_transfusion.csv")
## Rows: 748 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Class
## dbl (4): Recency, Frequency, Monetary, Time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(paste("Data types:", str(df)))
## spc_tbl_ [748 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Recency : num [1:748] 2 0 1 2 1 4 2 1 2 5 ...
## $ Frequency: num [1:748] 50 13 16 20 24 4 7 12 9 46 ...
## $ Monetary : num [1:748] 12500 3250 4000 5000 6000 1000 1750 3000 2250 11500 ...
## $ Time : num [1:748] 98 28 35 45 77 4 14 35 22 98 ...
## $ Class : chr [1:748] "donated" "donated" "donated" "donated" ...
## - attr(*, "spec")=
## .. cols(
## .. Recency = col_double(),
## .. Frequency = col_double(),
## .. Monetary = col_double(),
## .. Time = col_double(),
## .. Class = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
## [1] "Data types: "
sum(is.na(df))
## [1] 0
dim(df)
## [1] 748 5
head(df,10)
## # A tibble: 10 × 5
## Recency Frequency Monetary Time Class
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2 50 12500 98 donated
## 2 0 13 3250 28 donated
## 3 1 16 4000 35 donated
## 4 2 20 5000 45 donated
## 5 1 24 6000 77 not donated
## 6 4 4 1000 4 not donated
## 7 2 7 1750 14 donated
## 8 1 12 3000 35 not donated
## 9 2 9 2250 22 donated
## 10 5 46 11500 98 donated
tail(df,10)
## # A tibble: 10 × 5
## Recency Frequency Monetary Time Class
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 23 1 250 23 not donated
## 2 23 4 1000 52 not donated
## 3 23 1 250 23 not donated
## 4 23 7 1750 88 not donated
## 5 16 3 750 86 not donated
## 6 23 2 500 38 not donated
## 7 21 2 500 52 not donated
## 8 23 3 750 62 not donated
## 9 39 1 250 39 not donated
## 10 72 1 250 72 not donated
df[100,'Monetary']
## # A tibble: 1 × 1
## Monetary
## <dbl>
## 1 1750
mean(df[['Monetary']],na.rm=TRUE)
## [1] 1378.676
above_avg <- df[['Monetary']] > mean(df[['Monetary']], na.rm = TRUE)
(df[above_avg,'Monetary'])
## # A tibble: 267 × 1
## Monetary
## <dbl>
## 1 12500
## 2 3250
## 3 4000
## 4 5000
## 5 6000
## 6 1750
## 7 3000
## 8 2250
## 9 11500
## 10 5750
## # ℹ 257 more rows
df<-readr::read_csv("data/PDI__Police_Data_Initiative__Crime_Incidents.csv")
## Rows: 15155 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (34): INSTANCEID, INCIDENT_NO, DATE_REPORTED, DATE_FROM, DATE_TO, CLSD, ...
## dbl (6): UCR, LONGITUDE_X, LATITUDE_X, TOTALNUMBERVICTIMS, TOTALSUSPECTS, ZIP
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dim(df)
## [1] 15155 40
any(is.na(df))
## [1] TRUE
colSums(is.na(df))
## INSTANCEID INCIDENT_NO
## 0 0
## DATE_REPORTED DATE_FROM
## 0 2
## DATE_TO CLSD
## 9 545
## UCR DST
## 10 0
## BEAT OFFENSE
## 28 10
## LOCATION THEFT_CODE
## 2 10167
## FLOOR SIDE
## 14127 14120
## OPENING HATE_BIAS
## 14508 0
## DAYOFWEEK RPT_AREA
## 423 239
## CPD_NEIGHBORHOOD WEAPONS
## 249 5
## DATE_OF_CLEARANCE HOUR_FROM
## 2613 2
## HOUR_TO ADDRESS_X
## 9 148
## LONGITUDE_X LATITUDE_X
## 1714 1714
## VICTIM_AGE VICTIM_RACE
## 0 2192
## VICTIM_ETHNICITY VICTIM_GENDER
## 2192 2192
## SUSPECT_AGE SUSPECT_RACE
## 0 7082
## SUSPECT_ETHNICITY SUSPECT_GENDER
## 7082 7082
## TOTALNUMBERVICTIMS TOTALSUSPECTS
## 33 7082
## UCR_GROUP ZIP
## 10 1
## COMMUNITY_COUNCIL_NEIGHBORHOOD SNA_NEIGHBORHOOD
## 0 0
range(df[['DATE_REPORTED']], na.rm=TRUE)
## [1] "01/01/2022 01:08:00 AM" "06/26/2022 12:50:00 AM"
table(df[['SUSPECT_AGE']], useNA="no")
##
## 18-25 26-30 31-40 41-50 51-60 61-70 OVER 70 UNDER 18
## 1778 1126 1525 659 298 121 16 629
## UNKNOWN
## 9003
sort(table(df[['DAYOFWEEK']])/ sum(table(df[['DAYOFWEEK']])))
##
## THURSDAY FRIDAY WEDNESDAY TUESDAY MONDAY SUNDAY SATURDAY
## 0.1363019 0.1369807 0.1405105 0.1432935 0.1438365 0.1448547 0.1542221