Examine State of Oklahoma Payment Card Purchases by OSU

Payment cards (P-cards) are business credit cards that some employees are permitted to use to to make appropriate business purchases. Perform an audit of the purchasing cards for Oklahoma State University.

Three specific controls to check:


ETL P-cards files

# read in two P-card transaction files provided by course
pcard1 <- read_csv("pcard_data_part1_v01.csv")
## Rows: 63038 Columns: 10
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (5): Agency.Description, Cardholder.Name, Item.Description, Merchant, M...
## dbl  (2): Agency.Number, Amount
## lgl  (1): Project
## date (2): Transaction.Date, Posting.Date
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# str(pcard1)
pcard2 <- read_csv("pcard_data_part2_v01.csv")
## Rows: 63039 Columns: 10
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (5): Agency.Description, Cardholder.Name, Item.Description, Merchant, MCC
## dbl  (2): Agency.Number, Amount
## lgl  (1): Project
## date (2): Transaction.Date, Posting.Date
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# str(pcard2)
pcards <- bind_rows(rename(pcard1,MCC=MCC.Description),pcard2) %>%
  filter(Agency.Description == "OKLAHOMA STATE UNIVERSITY") %>%
#  select (-Agency.Number, -Posting.Date, -Project) %>%
  select(Agency.Description
    ,Cardholder.Name
    ,Item.Description
    ,Amount
    ,Merchant
    ,Transaction.Date
    ,MCC) %>%
  mutate(
    # Cardholder.Name = toupper(trimws(Cardholder.Name))
    Cardholder.Name = str_replace(toupper(trimws(Cardholder.Name))," CH,", ",")
    ,Item.Description = toupper(trimws(Item.Description))
    ,Merchant = toupper(trimws(Merchant))
    ,MCC = toupper(trimws(MCC))
    ,tran_year = year(Transaction.Date)
    ,tran_month = month(Transaction.Date, label=TRUE)
    ,tran_day = day(Transaction.Date)
    ,tran_weekday = wday(Transaction.Date, label=TRUE)
    ,tran_yearmo = (year(Transaction.Date)*100) + month(Transaction.Date)
    )
  
str(pcards)  
## tibble [83,126 x 12] (S3: tbl_df/tbl/data.frame)
##  $ Agency.Description: chr [1:83126] "OKLAHOMA STATE UNIVERSITY" "OKLAHOMA STATE UNIVERSITY" "OKLAHOMA STATE UNIVERSITY" "OKLAHOMA STATE UNIVERSITY" ...
##  $ Cardholder.Name   : chr [1:83126] "ROUNTREE, W" "USCANGA, J" "PRATER, J" "BLACKSHIRE, P" ...
##  $ Item.Description  : chr [1:83126] "SHIPPING CHARGES" "GENERAL PURCHASE" "L-110 X 3/4 STD 3/16 X 3/3 EAC|L-110 X 1-7/16 STD" "GRIMSLEYS INC EACH" ...
##  $ Amount            : num [1:83126] 7.87 45.92 174.23 350.32 207.25 ...
##  $ Merchant          : chr [1:83126] "UPS 000000Y059Y3390" "WAL-MART #0137" "MOTION INDUSTRIES OK01" "GRIMSLEYS INC" ...
##  $ Transaction.Date  : Date[1:83126], format: "2020-09-28" "2020-02-12" ...
##  $ MCC               : chr [1:83126] "COURIER SERVICES-AIR OR GROUND,FREIGHT F" "GROCERY STORES SUPERMARKETS" "INDUSTRIAL SUPPLIES NOT ESLEWHERE CLASSI" "INDUSTRIAL SUPPLIES NOT ESLEWHERE CLASSI" ...
##  $ tran_year         : num [1:83126] 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
##  $ tran_month        : Ord.factor w/ 12 levels "Jan"<"Feb"<"Mar"<..: 9 2 9 9 2 2 3 7 1 4 ...
##  $ tran_day          : int [1:83126] 28 12 11 9 6 7 12 9 10 28 ...
##  $ tran_weekday      : Ord.factor w/ 7 levels "Sun"<"Mon"<"Tue"<..: 2 4 6 4 5 6 5 5 6 3 ...
##  $ tran_yearmo       : num [1:83126] 202009 202002 202009 202009 202002 ...
# write.csv(pcards,"pcards_test.csv", row.names=FALSE)
# OPTIONAL: Additional history downloaded from OK Open Data
pcardadd <- bind_rows(
  rename(read_csv("pcard_public_202101120000.csv"),LAST_NAME = CH_LAST_NAME, FIRST_INITIAL = FIRST_NAME, MCC_DESCRIPTION
 = MCC_DESCR)
  ,read_csv("pcard_public_202102.csv")
  ,read_csv("pcard_public_20210405170447.csv")
  ,read_csv("pcard_public_202104.csv")
  ,read_csv("pcard_public_20210601090653.csv")
  ,read_csv("pcard_public_20210701070717.csv")
  ) %>%
  rename(Agency.Description = AGENCYNAME
    ,Item.Description = ITEM_DESCR
    ,Amount = AMOUNT
    ,Merchant = MERCHANT
    ,Transaction.Date = TRANSACTION_DATE
    ,MCC = MCC_DESCRIPTION) %>%
  mutate(
    # Cardholder.Name = paste0(LAST_NAME,", ",FIRST_INITIAL)
    Cardholder.Name = str_replace(toupper(trimws(paste0(LAST_NAME,", ",FIRST_INITIAL)))," CH,", ",")
    ,Transaction.Date = as.Date(Transaction.Date, format = "%d-%b-%y")
    ) %>%
  filter(Agency.Description == "OKLAHOMA STATE UNIVERSITY") %>%
  select(Agency.Description
    ,Cardholder.Name
    ,Item.Description
    ,Amount
    ,Merchant
    ,Transaction.Date
    ,MCC) %>%
    mutate(
    Cardholder.Name = toupper(trimws(Cardholder.Name))
    ,Item.Description = toupper(trimws(Item.Description))
    ,Merchant = toupper(trimws(Merchant))
    ,MCC = toupper(trimws(MCC))
    ,tran_year = year(Transaction.Date)
    ,tran_month = month(Transaction.Date, label=TRUE)
    ,tran_day = day(Transaction.Date)
    ,tran_weekday = wday(Transaction.Date, label=TRUE)
    ,tran_yearmo = (year(Transaction.Date)*100) + month(Transaction.Date)
    )
## Rows: 25352 Columns: 13
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (11): CALENDAR_MONTH, AGENCYNBR, AGENCYNAME, CH_LAST_NAME, FIRST_NAME, I...
## dbl  (2): CALENDAR_YEAR, AMOUNT
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 24101 Columns: 13
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (11): CALENDAR_MONTH, AGENCYNBR, AGENCYNAME, LAST_NAME, FIRST_INITIAL, I...
## dbl  (2): CALENDAR_YEAR, AMOUNT
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 32622 Columns: 13
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (11): CALENDAR_MONTH, AGENCYNBR, AGENCYNAME, LAST_NAME, FIRST_INITIAL, I...
## dbl  (2): CALENDAR_YEAR, AMOUNT
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 31250 Columns: 13
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (11): CALENDAR_MONTH, AGENCYNBR, AGENCYNAME, LAST_NAME, FIRST_INITIAL, I...
## dbl  (2): CALENDAR_YEAR, AMOUNT
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 30629 Columns: 13
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (11): CALENDAR_MONTH, AGENCYNBR, AGENCYNAME, LAST_NAME, FIRST_INITIAL, I...
## dbl  (2): CALENDAR_YEAR, AMOUNT
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 31066 Columns: 13
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (11): CALENDAR_MONTH, AGENCYNBR, AGENCYNAME, LAST_NAME, FIRST_INITIAL, I...
## dbl  (2): CALENDAR_YEAR, AMOUNT
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(pcardadd)  
## tibble [43,085 x 12] (S3: tbl_df/tbl/data.frame)
##  $ Agency.Description: chr [1:43085] "OKLAHOMA STATE UNIVERSITY" "OKLAHOMA STATE UNIVERSITY" "OKLAHOMA STATE UNIVERSITY" "OKLAHOMA STATE UNIVERSITY" ...
##  $ Cardholder.Name   : chr [1:43085] "SHARMA, S" "SHERRILL, C" "SMITH, E" "SPIVA, A" ...
##  $ Item.Description  : chr [1:43085] "MOBILE WHITEBOARD 48''X 36 PCE" "GENERAL PURCHASE" "MISCELLANEOUS EA" "GENERAL PURCHASE" ...
##  $ Amount            : num [1:43085] 159 49.8 476.2 45.1 90 ...
##  $ Merchant          : chr [1:43085] "AMZN MKTP US YQ5NE7J63" "THE RAILROAD YARD INC" "B & C BUSINESS PRODUCTS" "ATT BUS PHONE PMT" ...
##  $ Transaction.Date  : Date[1:43085], format: "2021-01-07" "2021-01-06" ...
##  $ MCC               : chr [1:43085] "BOOK STORES" "METAL SERVICE CENTERS AND OFFICES" "STATIONERY,OFFICE SUPPLIES,PRINTING AND" "TELECOMMUNICATION SERV.INCLUD. LOCAL/L.D" ...
##  $ tran_year         : num [1:43085] 2021 2021 2021 2021 2021 ...
##  $ tran_month        : Ord.factor w/ 12 levels "Jan"<"Feb"<"Mar"<..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ tran_day          : int [1:43085] 7 6 7 7 6 6 6 7 7 7 ...
##  $ tran_weekday      : Ord.factor w/ 7 levels "Sun"<"Mon"<"Tue"<..: 5 4 5 5 4 4 4 5 5 5 ...
##  $ tran_yearmo       : num [1:43085] 202101 202101 202101 202101 202101 ...
# write.csv(pcardadd,"pcardadd_test.csv", row.names=FALSE)
# uncomment below to add more data
# pcards <- bind_rows(pcards,pcardadd) 

EDA P-cards

# plots
summary(pcards$Amount)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -12395.38     27.37     87.66    258.47    267.32  23700.00
pcards %>% 
  ggplot(aes(y=Amount)) +
  geom_boxplot() +
  xlab(" ") +
  ylab("Amount Charged") +
  labs(title=paste0("OSU P-Card Expenditures, ",min(pcards$tran_yearmo
)," - ",max(pcards$tran_yearmo
))) +
  coord_cartesian(ylim = c(-1000, 1000))

# for the following plots, remove small transactions from 201911 and 2021912 that were posted late 
pcards %>%
  filter(tran_yearmo >= 202001) %>%
  group_by(tran_yearmo) %>%
  summarize(sum_amount = sum(Amount)) %>%
  ggplot(aes(y=sum_amount, x=as.factor(tran_yearmo), fill=as.factor(tran_yearmo))) + 
  geom_col(show.legend = FALSE) +
  xlab("Transaction Month") +
  ylab("Amount Charged") +
  labs(title=paste0("OSU P-Card Expenditures")) +
  theme(axis.text.x = element_text(angle = 75, vjust = 1.0, hjust=1)) +
  scale_y_continuous(breaks=seq(0,3000000,500000),labels=scales::dollar)

pcards %>%
  filter(tran_yearmo >= 202001) %>%
  group_by(tran_weekday) %>%
  summarize(sum_amount = sum(Amount)) %>%
  ggplot(aes(y=sum_amount, x=as.factor(tran_weekday), fill=as.factor(tran_weekday))) + 
  geom_col(show.legend = FALSE) +
  xlab("Day of Week") +
  ylab("Amount Charged") +
  labs(title=paste0("OSU P-Card Expenditures")) +
  theme(axis.text.x = element_text(angle = 75, vjust = 1.0, hjust=1)) +
  scale_y_continuous(breaks=seq(0,5000000,1000000),labels=scales::dollar)

pcards %>%
  filter(tran_yearmo >= 202001) %>%
  group_by(tran_day) %>%
  summarize(sum_amount = sum(Amount)) %>%
  ggplot(aes(y=sum_amount, x=as.factor(tran_day), fill=as.factor(tran_day))) + 
  geom_col(show.legend = FALSE) +
  xlab("Day of Month") +
  ylab("Amount Charged") +
  labs(title=paste0("OSU P-Card Expenditures")) +
  theme(axis.text.x = element_text(angle = 75, vjust = 1.0, hjust=1)) +
  scale_y_continuous(breaks=seq(0,1000000,200000),labels=scales::dollar)

Control Tests P-cards

# Create Summary-level Datasets and identify exceptions

# total purchases for the year are $50,000 or less
pcardyr <- pcards %>%
  select(Cardholder.Name, tran_year, Amount, Transaction.Date) %>%
  group_by(Cardholder.Name, tran_year) %>%
  summarize(Amount_yr = sum(Amount)
    ,num_rows_yr = n()
    ,tran_yr_min = min(Transaction.Date)
    ,tran_yr_max = max(Transaction.Date)
    ) %>%
  filter(Amount_yr > 50000)
## `summarise()` has grouped output by 'Cardholder.Name'. You can override using the `.groups` argument.
# write.csv(pcardyr,"pcardyr_test.csv", row.names=FALSE)
knitr::kable(pcardyr, caption = "Cardholder Total Annual Transactions > $50,000") %>% 
  kableExtra::kable_styling(latex_options = c("striped", "hold_position"), full_width = F) %>%
  kableExtra:: scroll_box(height = "200px")
Cardholder Total Annual Transactions > $50,000
Cardholder.Name tran_year Amount_yr num_rows_yr tran_yr_min tran_yr_max
AARON, D 2020 83894.75 121 2020-01-03 2020-12-09
ALAN, J 2020 83766.01 75 2020-01-08 2020-12-23
ANDERSON, K 2020 106176.67 347 2020-01-08 2020-12-21
ARMSTRONG, P 2020 78922.44 106 2020-01-03 2020-09-08
BALLARD, J 2020 54222.04 211 2020-01-06 2020-12-23
BEACH, D 2020 84116.51 217 2020-01-02 2020-12-21
BOETTCHER, C 2020 78853.19 310 2020-01-01 2020-12-29
BRINKER, B 2020 98241.66 384 2020-01-02 2020-12-23
BROWN, K 2020 70544.56 239 2020-01-02 2020-12-22
BULES, A 2020 59127.22 289 2020-01-02 2020-12-23
BURCH, K 2020 69448.55 114 2020-01-06 2020-12-30
CASEY, K 2020 78331.47 142 2020-01-02 2020-12-22
CHAPMAN, M 2020 118063.12 355 2020-01-02 2020-12-29
DALTON, A 2020 80120.34 181 2020-01-06 2020-12-17
DUCKWORTH, A 2020 700494.76 2662 2020-01-14 2020-12-30
DUNCAN, J 2020 56452.28 148 2020-01-06 2020-12-23
EDWARDS, M 2020 206108.18 498 2020-01-03 2020-12-23
EDWARDS, W 2020 75881.68 58 2020-01-02 2020-12-30
ESPOSITO, B 2020 70470.75 170 2020-01-02 2020-09-15
FIELD, M 2020 75069.48 235 2020-01-02 2020-11-20
FITZPATRICK, S 2020 313602.17 267 2020-01-03 2020-12-22
FUKSA, E 2020 105780.63 153 2020-01-02 2020-12-21
GERLT, T 2020 56489.06 168 2020-01-07 2020-12-24
GILROY, L 2020 70576.59 40 2020-01-04 2020-12-10
HALL, G 2020 62497.63 164 2020-01-07 2020-12-29
HENDERSON, K 2020 97763.14 168 2020-01-06 2020-12-23
HOLMES, K 2020 107442.03 27 2020-03-27 2020-08-04
HOYT, P 2020 53541.33 33 2020-01-04 2020-11-11
JACOB, J 2020 62084.11 188 2020-01-03 2020-12-24
JOHNSON, S 2020 66133.28 218 2020-01-07 2020-12-18
KINDRED, A 2020 80823.43 299 2020-01-03 2020-12-30
KINDSCHI, J 2020 59732.56 264 2020-02-18 2020-12-02
KING, S 2020 100570.59 201 2020-01-06 2020-12-23
KISSMAN, A 2020 87453.10 169 2020-01-06 2020-12-18
LEFLORE, K 2020 68224.68 209 2020-01-06 2020-12-23
LEISTER, J 2020 91929.67 336 2020-01-01 2020-12-29
LIANG, Y 2020 106265.67 197 2020-01-03 2020-12-18
MARSHALL, S 2020 53650.88 92 2020-01-03 2020-12-30
MATTHEWS, W 2020 498893.62 2522 2020-01-17 2020-11-30
MCAULIFF, D 2020 72044.65 192 2020-01-09 2020-12-29
MELTON, C 2020 56779.24 208 2020-02-04 2020-12-29
MOHIUDDIN, I 2020 52261.57 399 2020-01-03 2020-12-23
MONGOLD, B 2020 56104.56 186 2020-01-03 2020-12-22
MURLEY, C 2020 58634.84 146 2020-01-08 2020-12-22
PERRIN, T 2020 74170.39 106 2020-01-06 2020-12-16
PESTRAK, S 2020 82048.98 297 2020-01-02 2020-12-15
PETERSON, J 2020 101654.50 380 2020-01-24 2020-12-23
PRESLEY, A 2020 138183.51 156 2020-01-06 2020-12-28
REUTER, D 2020 155270.39 826 2020-01-02 2020-06-25
ROUSER, K 2020 55132.51 248 2020-01-02 2020-12-31
RUTHERFORD, T 2020 85323.14 434 2020-01-02 2020-07-17
SMITH, C 2020 71234.66 139 2020-01-07 2020-12-30
SPIVA, A 2020 52785.73 169 2020-01-01 2020-12-30
STANPHILL, S 2020 53126.81 195 2020-01-01 2020-12-23
STEANSON, A 2020 73783.17 240 2020-01-02 2020-12-21
TANNER, J 2020 110928.47 235 2020-01-06 2020-12-21
THACKER, G 2020 81762.79 208 2020-01-03 2020-12-18
TORNAKIAN, M 2020 156725.03 654 2020-01-02 2020-12-10
TROESTER, L 2020 117014.33 337 2020-01-01 2020-09-11
TURNER, D 2020 116445.13 470 2020-01-02 2020-12-28
VU, M 2020 64645.72 257 2020-01-06 2020-12-26
WATTS, A 2020 73651.71 268 2020-01-01 2020-12-18
WEST, B 2020 71747.08 139 2020-01-06 2020-12-18
WHITE, N 2020 56101.13 195 2020-01-08 2020-12-28
WHITMORE, D 2020 62343.69 158 2020-01-03 2020-12-18
WILBOURN, D 2020 59948.25 214 2020-01-02 2020-12-28
WILLIAMS, J 2020 65251.69 79 2020-01-06 2020-12-10
WOZNIAK, K 2020 60771.46 85 2020-01-10 2020-11-20
YARBROUGH-TESSMAN, V 2020 161540.47 223 2020-01-03 2020-12-28
# purchases not totaling more than $10,000 a month
pcardyrmo <- pcards %>%
  select(Cardholder.Name, tran_yearmo, Amount, Transaction.Date) %>%
  group_by(Cardholder.Name, tran_yearmo) %>%
  summarize(Amount_yrmo = sum(Amount)
    ,num_rows_yrmo = n()
    ,tran_yrmo_min = min(Transaction.Date)
    ,tran_yrmo_max = max(Transaction.Date)
    ) %>%
  filter(Amount_yrmo > 10000)
## `summarise()` has grouped output by 'Cardholder.Name'. You can override using the `.groups` argument.
# write.csv(pcardyrmo,"pcardyrmo_test.csv", row.names=FALSE)
knitr::kable(pcardyrmo, caption = "Cardholder Total Monthly Transactions > $10,000")  %>%
  kableExtra::kable_styling(latex_options = c("striped", "hold_position"), full_width = F) %>%
  kableExtra:: scroll_box(height = "200px")
Cardholder Total Monthly Transactions > $10,000
Cardholder.Name tran_yearmo Amount_yrmo num_rows_yrmo tran_yrmo_min tran_yrmo_max
AARON, D 202006 14027.77 7 2020-06-16 2020-06-30
AARON, D 202007 17188.96 21 2020-07-01 2020-07-31
AL-HARAKE, M 202002 10520.00 4 2020-02-07 2020-02-25
ALAN, J 202001 12751.38 6 2020-01-08 2020-01-31
ALAN, J 202003 12136.85 6 2020-03-02 2020-03-25
ANDERSON, K 202002 22107.83 50 2020-02-03 2020-02-29
ANDERSON, K 202003 13160.10 36 2020-03-01 2020-03-31
ANDERSON, K 202007 12637.42 43 2020-07-01 2020-07-31
ANDERSON, K 202008 12237.15 32 2020-08-01 2020-08-28
ARMSTRONG, P 202001 29004.89 31 2020-01-03 2020-01-31
ARMSTRONG, P 202002 13219.64 20 2020-02-01 2020-02-29
ARMSTRONG, P 202003 13515.79 21 2020-03-01 2020-03-31
BALLARD, J 202010 11380.73 20 2020-10-06 2020-10-30
BEACH, D 202008 10474.20 22 2020-08-03 2020-08-28
BEACH, D 202009 17420.20 19 2020-09-03 2020-09-24
BOETTCHER, C 202002 13073.28 36 2020-02-03 2020-02-29
BOETTCHER, C 202007 21503.87 58 2020-07-01 2020-07-27
BRATTON, A 202002 12887.28 70 2020-02-01 2020-02-29
BRINKER, B 202001 15556.09 41 2020-01-02 2020-01-30
BRINKER, B 202003 14574.91 37 2020-03-03 2020-03-31
BRINKER, B 202006 11683.91 49 2020-06-01 2020-06-30
BRINKER, B 202007 12333.61 45 2020-07-01 2020-07-31
BURCH, K 202004 10612.13 15 2020-04-01 2020-04-29
BUTLER, M 202011 11008.00 4 2020-11-10 2020-11-30
CALLAHAM, K 202006 14277.88 32 2020-06-03 2020-06-30
CASEY, K 202004 11826.60 9 2020-04-02 2020-04-29
CASEY, K 202006 11585.26 15 2020-06-03 2020-06-24
CHAPMAN, M 202001 24153.73 66 2020-01-02 2020-01-31
CHAPMAN, M 202002 10299.95 37 2020-02-03 2020-02-29
CHAPMAN, M 202008 15887.95 49 2020-08-01 2020-08-31
CHAPMAN, M 202009 10630.69 22 2020-09-02 2020-09-28
CHAPMAN, M 202011 14034.35 31 2020-11-02 2020-11-30
COOPER, K 202002 10198.52 11 2020-02-24 2020-02-25
CUSTAR, K 202004 10400.70 12 2020-04-01 2020-04-26
DALTON, A 202003 14785.31 22 2020-03-05 2020-03-26
DOWNING, S 202011 12893.12 23 2020-11-09 2020-11-20
DUCKWORTH, A 202001 24306.74 99 2020-01-14 2020-01-31
DUCKWORTH, A 202002 79820.55 249 2020-02-03 2020-02-28
DUCKWORTH, A 202003 47735.81 185 2020-03-02 2020-03-31
DUCKWORTH, A 202005 42162.76 180 2020-05-01 2020-05-30
DUCKWORTH, A 202006 79780.33 287 2020-06-01 2020-06-30
DUCKWORTH, A 202007 57540.00 262 2020-07-01 2020-07-31
DUCKWORTH, A 202008 103096.46 354 2020-08-02 2020-08-31
DUCKWORTH, A 202009 73039.61 300 2020-09-01 2020-09-30
DUCKWORTH, A 202010 71765.68 265 2020-10-01 2020-10-30
DUCKWORTH, A 202011 60959.38 195 2020-11-02 2020-11-30
DUCKWORTH, A 202012 54532.07 251 2020-12-01 2020-12-30
DUNCAN, J 202006 13031.65 23 2020-06-01 2020-06-30
EDWARDS, M 202001 11106.00 39 2020-01-03 2020-01-29
EDWARDS, M 202002 30155.12 69 2020-02-01 2020-02-29
EDWARDS, M 202003 15132.21 60 2020-03-02 2020-03-31
EDWARDS, M 202005 18256.28 37 2020-05-01 2020-05-31
EDWARDS, M 202006 13059.13 21 2020-06-02 2020-06-30
EDWARDS, M 202007 22918.32 62 2020-07-01 2020-07-31
EDWARDS, M 202008 19160.43 43 2020-08-01 2020-08-29
EDWARDS, M 202009 12931.26 40 2020-09-02 2020-09-30
EDWARDS, M 202010 23546.55 51 2020-10-01 2020-10-31
EDWARDS, M 202011 19030.76 31 2020-11-01 2020-11-25
EDWARDS, M 202012 13980.57 22 2020-12-01 2020-12-23
EDWARDS, W 202003 10128.45 5 2020-03-02 2020-03-26
EDWARDS, W 202009 11816.92 4 2020-09-03 2020-09-29
EDWARDS, W 202011 10809.02 4 2020-11-03 2020-11-25
ESPOSITO, B 202001 22529.17 56 2020-01-02 2020-01-31
ESPOSITO, B 202002 26642.33 32 2020-02-01 2020-02-27
FAHLENKAMP, H 202006 12897.18 13 2020-06-02 2020-06-16
FIELD, M 202001 12610.76 34 2020-01-02 2020-01-29
FIELD, M 202002 26231.92 41 2020-02-04 2020-02-28
FITZPATRICK, S 202001 36536.05 25 2020-01-03 2020-01-31
FITZPATRICK, S 202004 30230.84 28 2020-04-03 2020-04-30
FITZPATRICK, S 202005 56675.01 29 2020-05-03 2020-05-30
FITZPATRICK, S 202006 23397.38 20 2020-06-03 2020-06-24
FITZPATRICK, S 202007 23903.39 16 2020-07-01 2020-07-31
FITZPATRICK, S 202008 26138.85 26 2020-08-02 2020-08-28
FITZPATRICK, S 202009 34060.28 26 2020-09-02 2020-09-28
FITZPATRICK, S 202011 29988.80 45 2020-11-02 2020-11-25
FITZPATRICK, S 202012 31557.68 22 2020-12-01 2020-12-22
FUKSA, E 202005 13101.31 20 2020-05-01 2020-05-29
FUKSA, E 202006 15819.08 14 2020-06-01 2020-06-25
FUKSA, E 202007 22351.81 11 2020-07-01 2020-07-31
FUKSA, E 202008 10657.14 13 2020-08-05 2020-08-31
FUKSA, E 202009 20120.70 33 2020-09-01 2020-09-30
GERLT, T 202008 21749.15 67 2020-08-05 2020-08-29
GILROY, L 202009 13328.93 6 2020-09-01 2020-09-29
GILROY, L 202011 14621.82 4 2020-11-06 2020-11-19
GRAFFMAN, M 202006 11368.27 11 2020-06-08 2020-06-30
GRAHAM, O 202011 11167.31 20 2020-11-04 2020-11-30
GROCE, M 202001 11285.09 65 2020-01-02 2020-01-31
GROCE, M 202002 10142.60 19 2020-02-01 2020-02-28
HALL, G 202001 10596.93 14 2020-01-07 2020-01-31
HALL, G 202012 12842.38 20 2020-12-01 2020-12-29
HANCOCK, K 202010 11428.12 47 2020-10-02 2020-10-30
HENDERSON, K 202001 11628.43 20 2020-01-06 2020-01-29
HENDERSON, K 202002 17932.40 18 2020-02-03 2020-02-28
HENDERSON, K 202009 14228.45 18 2020-09-01 2020-09-26
HOLLIDAY, J 202002 14303.50 33 2020-02-01 2020-02-29
HOLLIDAY, J 202003 13885.46 18 2020-03-02 2020-03-28
HOLMES, K 202003 15017.01 3 2020-03-27 2020-03-27
HOLMES, K 202004 57757.42 14 2020-04-02 2020-04-30
HOLMES, K 202005 29911.80 2 2020-05-14 2020-05-26
HOYT, P 202001 11452.93 7 2020-01-04 2020-01-31
HOYT, P 202002 14367.05 5 2020-02-10 2020-02-29
HUGHES, S 202010 10712.57 59 2020-10-01 2020-10-30
JACOB, J 202001 20031.10 15 2020-01-03 2020-01-31
JONES, C 202012 11218.25 8 2020-12-03 2020-12-29
KINDRED, A 202001 12927.37 35 2020-01-03 2020-01-31
KINDRED, A 202002 14552.19 28 2020-02-02 2020-02-28
KINDSCHI, J 202002 13339.82 1 2020-02-18 2020-02-18
KINDSCHI, J 202006 12596.67 87 2020-06-03 2020-06-29
KINDSCHI, J 202007 10085.25 77 2020-07-01 2020-07-30
KING, S 202003 12546.43 12 2020-03-02 2020-03-27
KING, S 202006 11560.95 15 2020-06-08 2020-06-24
KING, S 202008 10052.86 10 2020-08-03 2020-08-31
KING, S 202009 10426.36 49 2020-09-02 2020-09-30
KING, S 202011 13861.02 16 2020-11-03 2020-11-24
KING, S 202012 12327.59 25 2020-12-01 2020-12-23
KISSMAN, A 202001 15111.12 27 2020-01-06 2020-01-31
KISSMAN, A 202002 13995.70 31 2020-02-01 2020-02-29
KISSMAN, A 202003 11538.42 21 2020-03-02 2020-03-25
KISSMAN, A 202009 17228.19 13 2020-09-09 2020-09-30
KISSMAN, A 202010 10854.93 18 2020-10-01 2020-10-30
LEFLORE, K 202007 11128.07 17 2020-07-01 2020-07-31
LEISTER, J 202001 12674.28 42 2020-01-01 2020-01-31
LEISTER, J 202003 12375.94 51 2020-03-02 2020-03-31
LEISTER, J 202007 10746.69 20 2020-07-08 2020-07-31
LIANG, Y 202001 17464.23 28 2020-01-03 2020-01-31
LIANG, Y 202008 15339.83 27 2020-08-01 2020-08-27
LIVELY, A 202006 14213.99 5 2020-06-09 2020-06-29
LOFFI, R 202003 15240.21 16 2020-03-03 2020-03-23
LYDA, S 202006 11252.89 14 2020-06-15 2020-06-30
MAFI, G 202001 11594.72 29 2020-01-02 2020-01-31
MARSHALL, S 202001 12681.03 20 2020-01-03 2020-01-31
MARSHALL, S 202007 15889.02 17 2020-07-02 2020-07-29
MARSHALL, S 202012 11376.01 17 2020-12-11 2020-12-30
MATTHEWS, W 202001 260548.71 890 2020-01-17 2020-01-31
MATTHEWS, W 202002 321026.29 1055 2020-02-03 2020-02-29
MATTHEWS, W 202003 41539.19 299 2020-03-02 2020-03-30
MCAULIFF, D 202003 10753.62 24 2020-03-04 2020-03-31
MCAULIFF, D 202011 10371.17 16 2020-11-02 2020-11-24
MILLER, R 202007 11566.66 26 2020-07-06 2020-07-29
MITCHELL, L 202002 12498.60 10 2020-02-07 2020-02-28
MITCHELL, T 202001 16143.17 68 2020-01-05 2020-01-31
O MEALEY, S 202003 11237.52 8 2020-03-02 2020-03-31
PAGE, A 202011 12977.95 41 2020-11-02 2020-11-30
PAGE, A 202012 23163.81 41 2020-12-01 2020-12-30
PERRIN, T 202006 14833.03 16 2020-06-03 2020-06-19
PERRIN, T 202011 15879.95 9 2020-11-01 2020-11-17
PERRY, S 202003 12395.38 1 2020-03-11 2020-03-11
PESTRAK, S 202002 22643.27 127 2020-02-01 2020-02-29
PESTRAK, S 202003 21851.24 38 2020-03-01 2020-03-30
PETERSON, J 202002 16141.97 61 2020-02-03 2020-02-29
PETERSON, J 202004 10161.84 28 2020-04-02 2020-04-28
PETERSON, J 202005 10537.01 17 2020-05-03 2020-05-29
PETERSON, J 202011 10904.47 35 2020-11-01 2020-11-30
PETERSON, J 202012 13197.57 28 2020-12-04 2020-12-23
PETRO, A 202004 10036.64 8 2020-04-02 2020-04-29
POLLARD, K 202002 18264.14 39 2020-02-02 2020-02-29
POLLARD, K 202008 11881.03 14 2020-08-02 2020-08-28
PRESLEY, A 202001 12433.55 21 2020-01-06 2020-01-31
PRESLEY, A 202005 10288.29 5 2020-05-21 2020-05-29
PRESLEY, A 202008 17623.25 33 2020-08-02 2020-08-27
PRESLEY, A 202009 25428.60 27 2020-09-02 2020-09-26
PRESLEY, A 202010 21946.20 17 2020-10-01 2020-10-31
PRESLEY, A 202011 14534.83 12 2020-11-02 2020-11-30
PRESLEY, A 202012 11103.35 17 2020-12-01 2020-12-28
REUTER, D 202001 216488.60 750 2020-01-02 2020-01-20
ROUSER, K 202004 10019.16 39 2020-04-01 2020-04-30
ROUSER, K 202011 11804.69 38 2020-11-03 2020-11-30
RUTHERFORD, T 202001 22037.46 115 2020-01-02 2020-01-31
RUTHERFORD, T 202002 24300.37 115 2020-02-03 2020-02-28
RUTHERFORD, T 202003 24979.99 103 2020-03-02 2020-03-20
SCHLER, L 202008 10137.58 14 2020-08-01 2020-08-31
SISNEY, D 202007 15855.10 11 2020-07-01 2020-07-29
SMITH, C 202009 15325.93 19 2020-09-03 2020-09-30
SMITH, C 202012 10626.68 16 2020-12-03 2020-12-30
STANPHILL, S 202008 10423.92 19 2020-08-03 2020-08-31
STANUSH, S 202001 11706.55 23 2020-01-10 2020-01-30
STANUSH, S 202002 13875.52 31 2020-02-03 2020-02-29
STEANSON, A 202001 17234.09 31 2020-01-02 2020-01-30
STEANSON, A 202002 10426.13 32 2020-02-04 2020-02-27
STEANSON, A 202004 11285.46 24 2020-04-07 2020-04-25
STOVER, C 202001 11520.00 7 2020-01-23 2020-01-25
STRAH, T 202002 10233.87 29 2020-02-03 2020-02-29
SUNKAR, R 202003 10030.85 14 2020-03-02 2020-03-31
TANNER, J 202006 15572.72 24 2020-06-01 2020-06-30
TANNER, J 202007 12430.10 36 2020-07-02 2020-07-31
TANNER, J 202008 13197.42 32 2020-08-01 2020-08-31
TANNER, J 202009 12393.33 21 2020-09-02 2020-09-30
TANNER, J 202011 13005.50 22 2020-11-02 2020-11-24
THACKER, G 202006 24284.59 22 2020-06-01 2020-06-30
THACKER, G 202007 15742.70 28 2020-07-01 2020-07-31
THACKER, G 202008 17028.39 21 2020-08-03 2020-08-31
TORNAKIAN, M 202001 42529.25 178 2020-01-02 2020-01-31
TORNAKIAN, M 202003 21117.33 67 2020-03-02 2020-03-31
TORNAKIAN, M 202004 13556.36 73 2020-04-01 2020-04-30
TORNAKIAN, M 202005 23835.62 66 2020-05-01 2020-05-29
TORNAKIAN, M 202007 19243.12 73 2020-07-01 2020-07-31
TORNAKIAN, M 202008 21065.33 90 2020-08-03 2020-08-31
TREANOR, M 202008 10445.00 2 2020-08-27 2020-08-28
TRIBBLE, T 202001 10967.82 9 2020-01-07 2020-01-30
TROESTER, L 202001 51346.80 140 2020-01-01 2020-01-31
TROESTER, L 202002 32520.91 89 2020-02-01 2020-02-29
TROESTER, L 202003 20460.46 41 2020-03-01 2020-03-16
TURNER, D 202001 13596.34 70 2020-01-02 2020-01-30
TURNER, D 202002 15181.81 40 2020-02-03 2020-02-27
TURNER, D 202008 10389.18 38 2020-08-02 2020-08-31
TURNER, D 202011 11284.15 34 2020-11-01 2020-11-30
TURNER, D 202012 12066.69 45 2020-12-01 2020-12-28
VICOSA BAUERMANN, F 202007 13893.32 34 2020-07-07 2020-07-31
WALLACE, L 202009 14192.47 28 2020-09-02 2020-09-29
WALTON, D 202006 20022.03 27 2020-06-05 2020-06-30
WELCH, S 202007 13160.38 22 2020-07-06 2020-07-31
WEST, B 202003 14353.65 14 2020-03-02 2020-03-31
WEST, B 202011 11162.99 13 2020-11-02 2020-11-30
WILLIAMS, J 202009 14918.25 12 2020-09-03 2020-09-30
WILLIAMS, J 202012 27100.00 4 2020-12-09 2020-12-10
WILLIAMSON, R 202007 12036.53 15 2020-07-07 2020-07-31
WOZNIAK, K 202001 10916.12 13 2020-01-10 2020-01-31
WOZNIAK, K 202006 11236.49 13 2020-06-03 2020-06-27
WRIGHT, J 202007 10445.65 10 2020-07-03 2020-07-30
YARBROUGH-TESSMAN, V 202001 19925.01 22 2020-01-03 2020-01-31
YARBROUGH-TESSMAN, V 202003 22343.91 31 2020-03-02 2020-03-31
YARBROUGH-TESSMAN, V 202004 10842.99 13 2020-04-01 2020-04-24
YARBROUGH-TESSMAN, V 202007 20125.81 25 2020-07-01 2020-07-29
YARBROUGH-TESSMAN, V 202008 14323.99 19 2020-08-03 2020-08-27
YARBROUGH-TESSMAN, V 202009 14385.12 20 2020-09-02 2020-09-30
YARBROUGH-TESSMAN, V 202011 14179.26 22 2020-11-02 2020-11-30
YARBROUGH-TESSMAN, V 202012 12795.94 19 2020-12-01 2020-12-28
YORK, A 202003 12410.15 18 2020-03-04 2020-03-30
# individual transactions are not allowed to exceed $5,000 each
# check by cardholder + transaction date for daily total charge
pcarddate <- pcards %>%
  select(Cardholder.Name, Transaction.Date, Amount) %>%
  group_by(Cardholder.Name, Transaction.Date) %>%
  summarize(Amount_date = sum(Amount)
    ,num_rows_date = n()
    ,tran_date_min = min(Transaction.Date)
    ,tran_date_max = max(Transaction.Date)
    ) %>%
  filter(Amount_date > 5000)
## `summarise()` has grouped output by 'Cardholder.Name'. You can override using the `.groups` argument.
# write.csv(pcarddate,"pcarddate_test.csv", row.names=FALSE)
knitr::kable(pcarddate, caption = "Cardholder Total Daily Transactions > $5,000")  %>%
  kableExtra::kable_styling(latex_options = c("striped", "hold_position"), full_width = F) %>%
  kableExtra:: scroll_box(height = "200px")
Cardholder Total Daily Transactions > $5,000
Cardholder.Name Transaction.Date Amount_date num_rows_date tran_date_min tran_date_max
AARON, D 2020-06-16 6735.00 2 2020-06-16 2020-06-16
AL-HARAKE, M 2020-02-21 5700.00 1 2020-02-21 2020-02-21
ALAN, J 2020-03-25 8824.76 3 2020-03-25 2020-03-25
ANDERSON, K 2020-02-21 7596.32 4 2020-02-21 2020-02-21
ARMSTRONG, P 2020-01-13 6303.52 5 2020-01-13 2020-01-13
ARMSTRONG, P 2020-01-16 10229.30 2 2020-01-16 2020-01-16
ARMSTRONG, P 2020-01-24 5964.08 2 2020-01-24 2020-01-24
ARMSTRONG, P 2020-02-20 6668.09 1 2020-02-20 2020-02-20
ARMSTRONG, P 2020-03-14 5981.46 2 2020-03-14 2020-03-14
ARMSTRONG, P 2020-04-08 7752.22 1 2020-04-08 2020-04-08
ARMSTRONG, P 2020-06-09 8076.71 2 2020-06-09 2020-06-09
BAILEY, J 2020-03-24 5569.62 2 2020-03-24 2020-03-24
BAUM, K 2020-06-25 6568.47 1 2020-06-25 2020-06-25
BAYLES, M 2020-01-09 5870.32 2 2020-01-09 2020-01-09
BEACH, D 2020-09-22 7314.90 2 2020-09-22 2020-09-22
BERG, M 2020-03-11 5321.23 6 2020-03-11 2020-03-11
BLACKSHIRE, P 2020-09-09 5498.83 10 2020-09-09 2020-09-09
BOETTCHER, C 2020-07-07 5026.69 3 2020-07-07 2020-07-07
BRATTON, A 2020-02-19 5968.51 18 2020-02-19 2020-02-19
BROWN, A 2020-08-12 5937.00 12 2020-08-12 2020-08-12
BUTLER, M 2020-02-12 5606.61 2 2020-02-12 2020-02-12
BUTLER, M 2020-11-10 10000.00 1 2020-11-10 2020-11-10
CALLAHAM, K 2020-05-11 6003.13 3 2020-05-11 2020-05-11
CANAMARES, J 2020-01-16 6024.64 3 2020-01-16 2020-01-16
CHAPMAN, M 2020-01-11 5083.41 10 2020-01-11 2020-01-11
COOPER, K 2020-02-25 9153.93 5 2020-02-25 2020-02-25
COOPER, K 2020-05-27 6393.74 4 2020-05-27 2020-05-27
COOPER, K 2020-09-16 5567.81 3 2020-09-16 2020-09-16
CROSTHWAIT, M 2020-10-15 6676.28 6 2020-10-15 2020-10-15
DALTON, A 2020-03-06 7908.95 4 2020-03-06 2020-03-06
DOWNING, S 2020-11-12 5645.79 8 2020-11-12 2020-11-12
DOWNING, S 2020-11-13 5448.45 6 2020-11-13 2020-11-13
DUCKWORTH, A 2020-01-22 7640.49 15 2020-01-22 2020-01-22
DUCKWORTH, A 2020-01-23 7388.80 20 2020-01-23 2020-01-23
DUCKWORTH, A 2020-02-04 8296.48 21 2020-02-04 2020-02-04
DUCKWORTH, A 2020-02-06 7169.69 17 2020-02-06 2020-02-06
DUCKWORTH, A 2020-02-10 10464.51 17 2020-02-10 2020-02-10
DUCKWORTH, A 2020-02-11 9347.99 20 2020-02-11 2020-02-11
DUCKWORTH, A 2020-02-14 5106.47 17 2020-02-14 2020-02-14
DUCKWORTH, A 2020-02-19 6888.62 9 2020-02-19 2020-02-19
DUCKWORTH, A 2020-02-20 5258.65 21 2020-02-20 2020-02-20
DUCKWORTH, A 2020-02-25 5204.78 14 2020-02-25 2020-02-25
DUCKWORTH, A 2020-03-10 6707.43 5 2020-03-10 2020-03-10
DUCKWORTH, A 2020-03-11 5353.37 15 2020-03-11 2020-03-11
DUCKWORTH, A 2020-03-19 5138.45 14 2020-03-19 2020-03-19
DUCKWORTH, A 2020-05-18 8607.16 9 2020-05-18 2020-05-18
DUCKWORTH, A 2020-05-26 5638.14 17 2020-05-26 2020-05-26
DUCKWORTH, A 2020-06-03 6099.18 15 2020-06-03 2020-06-03
DUCKWORTH, A 2020-06-11 9889.71 20 2020-06-11 2020-06-11
DUCKWORTH, A 2020-06-12 6713.23 29 2020-06-12 2020-06-12
DUCKWORTH, A 2020-06-15 5413.29 18 2020-06-15 2020-06-15
DUCKWORTH, A 2020-06-18 6447.62 21 2020-06-18 2020-06-18
DUCKWORTH, A 2020-06-19 6477.95 13 2020-06-19 2020-06-19
DUCKWORTH, A 2020-07-16 5749.22 14 2020-07-16 2020-07-16
DUCKWORTH, A 2020-07-21 7278.95 30 2020-07-21 2020-07-21
DUCKWORTH, A 2020-08-04 7201.29 9 2020-08-04 2020-08-04
DUCKWORTH, A 2020-08-06 12746.52 25 2020-08-06 2020-08-06
DUCKWORTH, A 2020-08-13 7393.56 32 2020-08-13 2020-08-13
DUCKWORTH, A 2020-08-18 7315.08 25 2020-08-18 2020-08-18
DUCKWORTH, A 2020-08-20 11535.84 29 2020-08-20 2020-08-20
DUCKWORTH, A 2020-08-21 6518.37 20 2020-08-21 2020-08-21
DUCKWORTH, A 2020-08-24 6348.34 25 2020-08-24 2020-08-24
DUCKWORTH, A 2020-08-25 12356.74 38 2020-08-25 2020-08-25
DUCKWORTH, A 2020-08-31 6543.71 15 2020-08-31 2020-08-31
DUCKWORTH, A 2020-09-09 5247.18 18 2020-09-09 2020-09-09
DUCKWORTH, A 2020-09-10 5903.54 17 2020-09-10 2020-09-10
DUCKWORTH, A 2020-09-15 7118.10 20 2020-09-15 2020-09-15
DUCKWORTH, A 2020-09-22 6417.21 13 2020-09-22 2020-09-22
DUCKWORTH, A 2020-09-23 5123.59 17 2020-09-23 2020-09-23
DUCKWORTH, A 2020-09-29 9902.92 42 2020-09-29 2020-09-29
DUCKWORTH, A 2020-10-01 6744.54 14 2020-10-01 2020-10-01
DUCKWORTH, A 2020-10-07 6925.74 21 2020-10-07 2020-10-07
DUCKWORTH, A 2020-10-13 8279.23 15 2020-10-13 2020-10-13
DUCKWORTH, A 2020-10-15 8419.12 24 2020-10-15 2020-10-15
DUCKWORTH, A 2020-10-22 8019.53 22 2020-10-22 2020-10-22
DUCKWORTH, A 2020-10-29 5606.23 19 2020-10-29 2020-10-29
DUCKWORTH, A 2020-11-09 5491.84 9 2020-11-09 2020-11-09
DUCKWORTH, A 2020-11-12 6560.07 15 2020-11-12 2020-11-12
DUCKWORTH, A 2020-11-24 6704.53 12 2020-11-24 2020-11-24
DUCKWORTH, A 2020-12-07 6239.60 12 2020-12-07 2020-12-07
DUCKWORTH, A 2020-12-10 8948.28 28 2020-12-10 2020-12-10
DUCKWORTH, A 2020-12-14 5093.62 13 2020-12-14 2020-12-14
DUNCAN, J 2020-06-18 5194.50 2 2020-06-18 2020-06-18
DUNCAN, J 2020-08-28 7399.00 2 2020-08-28 2020-08-28
DUNN, B 2020-06-26 5554.00 1 2020-06-26 2020-06-26
EDWARDS, M 2020-02-04 5778.64 2 2020-02-04 2020-02-04
EDWARDS, M 2020-05-28 5219.00 4 2020-05-28 2020-05-28
EDWARDS, M 2020-07-07 5351.32 9 2020-07-07 2020-07-07
EDWARDS, M 2020-08-06 5049.15 4 2020-08-06 2020-08-06
ESPOSITO, B 2020-01-06 9694.36 1 2020-01-06 2020-01-06
ESPOSITO, B 2020-02-06 7143.52 3 2020-02-06 2020-02-06
ESPOSITO, B 2020-02-10 6393.69 2 2020-02-10 2020-02-10
ESPOSITO, B 2020-03-16 6563.05 3 2020-03-16 2020-03-16
FAIRBANKS, W 2020-05-27 6452.50 3 2020-05-27 2020-05-27
FITZPATRICK, S 2020-01-03 5097.43 2 2020-01-03 2020-01-03
FITZPATRICK, S 2020-01-17 6365.07 3 2020-01-17 2020-01-17
FITZPATRICK, S 2020-01-23 6378.12 5 2020-01-23 2020-01-23
FITZPATRICK, S 2020-01-31 8270.69 4 2020-01-31 2020-01-31
FITZPATRICK, S 2020-02-20 7185.00 2 2020-02-20 2020-02-20
FITZPATRICK, S 2020-04-09 6133.20 3 2020-04-09 2020-04-09
FITZPATRICK, S 2020-04-22 5873.90 2 2020-04-22 2020-04-22
FITZPATRICK, S 2020-04-24 5860.50 3 2020-04-24 2020-04-24
FITZPATRICK, S 2020-05-13 12863.00 6 2020-05-13 2020-05-13
FITZPATRICK, S 2020-05-15 5104.73 2 2020-05-15 2020-05-15
FITZPATRICK, S 2020-05-18 6233.01 2 2020-05-18 2020-05-18
FITZPATRICK, S 2020-05-28 13479.38 4 2020-05-28 2020-05-28
FITZPATRICK, S 2020-05-29 8025.42 3 2020-05-29 2020-05-29
FITZPATRICK, S 2020-06-16 6012.92 2 2020-06-16 2020-06-16
FITZPATRICK, S 2020-07-30 11641.18 7 2020-07-30 2020-07-30
FITZPATRICK, S 2020-08-11 6630.00 5 2020-08-11 2020-08-11
FITZPATRICK, S 2020-08-24 5182.04 3 2020-08-24 2020-08-24
FITZPATRICK, S 2020-09-16 12300.53 10 2020-09-16 2020-09-16
FITZPATRICK, S 2020-09-23 8453.52 4 2020-09-23 2020-09-23
FITZPATRICK, S 2020-11-03 15488.23 13 2020-11-03 2020-11-03
FITZPATRICK, S 2020-11-05 5363.64 3 2020-11-05 2020-11-05
FITZPATRICK, S 2020-12-21 13303.83 7 2020-12-21 2020-12-21
FUKSA, E 2020-06-09 5658.78 1 2020-06-09 2020-06-09
FUKSA, E 2020-07-09 7480.00 1 2020-07-09 2020-07-09
FUKSA, E 2020-09-03 5782.11 6 2020-09-03 2020-09-03
GERLT, T 2020-02-04 6138.34 2 2020-02-04 2020-02-04
GERLT, T 2020-08-07 8533.61 24 2020-08-07 2020-08-07
GILROY, L 2020-05-21 5573.60 3 2020-05-21 2020-05-21
GRAHAM, O 2020-11-10 9439.92 16 2020-11-10 2020-11-10
GREER, B 2020-04-23 6171.43 2 2020-04-23 2020-04-23
HANCOCK, K 2020-09-18 6201.91 2 2020-09-18 2020-09-18
HANSON, C 2020-04-16 5063.00 3 2020-04-16 2020-04-16
HEINEMANN, R 2020-03-18 5352.70 2 2020-03-18 2020-03-18
HENDERSON, K 2020-02-04 5297.19 2 2020-02-04 2020-02-04
HENDERSON, K 2020-05-20 5133.88 2 2020-05-20 2020-05-20
HERGENREDER, R 2020-01-23 5075.83 2 2020-01-23 2020-01-23
HOLLIDAY, J 2020-03-02 5447.96 2 2020-03-02 2020-03-02
HOLMES, K 2020-03-27 15017.01 3 2020-03-27 2020-03-27
HOLMES, K 2020-04-07 10299.09 2 2020-04-07 2020-04-07
HOLMES, K 2020-04-15 9299.05 1 2020-04-15 2020-04-15
HOLMES, K 2020-04-30 23700.00 1 2020-04-30 2020-04-30
HOLMES, K 2020-05-14 8750.00 1 2020-05-14 2020-05-14
HOLMES, K 2020-05-26 21161.80 1 2020-05-26 2020-05-26
JACKSON, R 2020-02-21 6516.90 26 2020-02-21 2020-02-21
JACOB, J 2020-01-20 6549.52 2 2020-01-20 2020-01-20
JOHNSON, N 2020-11-10 5318.93 3 2020-11-10 2020-11-10
JOYCE, J 2020-08-11 6000.00 2 2020-08-11 2020-08-11
KINDSCHI, J 2020-02-18 13339.82 1 2020-02-18 2020-02-18
KING, S 2020-11-18 7113.77 3 2020-11-18 2020-11-18
KISSMAN, A 2020-09-24 13348.80 4 2020-09-24 2020-09-24
LEFLORE, K 2020-08-21 5017.80 2 2020-08-21 2020-08-21
LEISTER, J 2020-08-27 5013.52 5 2020-08-27 2020-08-27
LIANG, Y 2020-01-07 5187.22 2 2020-01-07 2020-01-07
LINSENMEYER, J 2020-02-06 5255.00 2 2020-02-06 2020-02-06
LOFFI, R 2020-03-09 5330.48 2 2020-03-09 2020-03-09
LOFFI, R 2020-03-23 5964.00 1 2020-03-23 2020-03-23
LOPER, K 2020-06-16 5270.00 2 2020-06-16 2020-06-16
LOWERY, A 2020-06-09 6654.50 2 2020-06-09 2020-06-09
LYDA, S 2020-06-15 5234.70 2 2020-06-15 2020-06-15
MAHONEY, A 2020-02-25 6190.51 21 2020-02-25 2020-02-25
MARSHALL, S 2020-01-10 6784.56 4 2020-01-10 2020-01-10
MARSHALL, S 2020-12-16 6126.71 8 2020-12-16 2020-12-16
MATERER, N 2020-03-19 7600.00 1 2020-03-19 2020-03-19
MATTHEWS, W 2020-01-17 15566.20 59 2020-01-17 2020-01-17
MATTHEWS, W 2020-01-21 25907.35 101 2020-01-21 2020-01-21
MATTHEWS, W 2020-01-22 40039.04 132 2020-01-22 2020-01-22
MATTHEWS, W 2020-01-23 36300.88 123 2020-01-23 2020-01-23
MATTHEWS, W 2020-01-24 12944.70 45 2020-01-24 2020-01-24
MATTHEWS, W 2020-01-27 15362.54 52 2020-01-27 2020-01-27
MATTHEWS, W 2020-01-28 36559.00 126 2020-01-28 2020-01-28
MATTHEWS, W 2020-01-29 23120.00 88 2020-01-29 2020-01-29
MATTHEWS, W 2020-01-30 29054.65 81 2020-01-30 2020-01-30
MATTHEWS, W 2020-01-31 15329.15 58 2020-01-31 2020-01-31
MATTHEWS, W 2020-02-03 15743.25 48 2020-02-03 2020-02-03
MATTHEWS, W 2020-02-04 14341.46 51 2020-02-04 2020-02-04
MATTHEWS, W 2020-02-05 5967.03 29 2020-02-05 2020-02-05
MATTHEWS, W 2020-02-06 22539.09 65 2020-02-06 2020-02-06
MATTHEWS, W 2020-02-07 16100.52 60 2020-02-07 2020-02-07
MATTHEWS, W 2020-02-10 22769.79 54 2020-02-10 2020-02-10
MATTHEWS, W 2020-02-11 20025.30 60 2020-02-11 2020-02-11
MATTHEWS, W 2020-02-12 24535.84 86 2020-02-12 2020-02-12
MATTHEWS, W 2020-02-13 20085.91 50 2020-02-13 2020-02-13
MATTHEWS, W 2020-02-14 25001.11 62 2020-02-14 2020-02-14
MATTHEWS, W 2020-02-17 12598.22 39 2020-02-17 2020-02-17
MATTHEWS, W 2020-02-18 18238.54 64 2020-02-18 2020-02-18
MATTHEWS, W 2020-02-19 13417.25 43 2020-02-19 2020-02-19
MATTHEWS, W 2020-02-20 20447.09 76 2020-02-20 2020-02-20
MATTHEWS, W 2020-02-21 13957.81 51 2020-02-21 2020-02-21
MATTHEWS, W 2020-02-24 5161.37 21 2020-02-24 2020-02-24
MATTHEWS, W 2020-02-25 14189.68 55 2020-02-25 2020-02-25
MATTHEWS, W 2020-02-26 10352.66 31 2020-02-26 2020-02-26
MATTHEWS, W 2020-02-27 11123.55 68 2020-02-27 2020-02-27
MATTHEWS, W 2020-02-28 8358.12 24 2020-02-28 2020-02-28
MATTHEWS, W 2020-03-02 11901.34 43 2020-03-02 2020-03-02
MATTHEWS, W 2020-03-03 7181.24 11 2020-03-03 2020-03-03
MATTHEWS, W 2020-03-04 12296.53 40 2020-03-04 2020-03-04
MATTHEWS, W 2020-03-09 6624.29 42 2020-03-09 2020-03-09
MATTHEWS, W 2020-03-10 6117.09 22 2020-03-10 2020-03-10
MILLER, T 2020-11-18 6318.73 3 2020-11-18 2020-11-18
MITCHELL, T 2020-01-31 5576.77 14 2020-01-31 2020-01-31
NIXON, J 2020-07-07 5064.01 4 2020-07-07 2020-07-07
NOTEWARE, S 2020-10-08 5290.00 18 2020-10-08 2020-10-08
O MEALEY, S 2020-03-31 6000.00 2 2020-03-31 2020-03-31
PAGE, A 2020-11-30 5681.62 3 2020-11-30 2020-11-30
PAGE, A 2020-12-21 13119.60 4 2020-12-21 2020-12-21
PERRIN, T 2020-03-03 5225.00 2 2020-03-03 2020-03-03
PERRIN, T 2020-06-04 5507.42 9 2020-06-04 2020-06-04
PERRIN, T 2020-11-04 8518.29 3 2020-11-04 2020-11-04
PERRY, S 2020-03-11 12395.38 1 2020-03-11 2020-03-11
PESTRAK, S 2020-03-18 10000.00 1 2020-03-18 2020-03-18
PESTRAK, S 2020-04-07 5531.15 3 2020-04-07 2020-04-07
PETERSON, J 2020-05-03 6016.50 2 2020-05-03 2020-05-03
PITCHER, M 2020-03-27 5625.00 3 2020-03-27 2020-03-27
POLLARD, K 2020-02-12 9351.54 5 2020-02-12 2020-02-12
PRATT, K 2020-06-17 5280.50 7 2020-06-17 2020-06-17
PRESLEY, A 2020-08-03 7023.13 6 2020-08-03 2020-08-03
PRESLEY, A 2020-09-21 9309.51 4 2020-09-21 2020-09-21
PRICE, C 2020-10-05 6874.50 2 2020-10-05 2020-10-05
REUTER, D 2020-01-02 29777.73 78 2020-01-02 2020-01-02
REUTER, D 2020-01-03 7319.37 49 2020-01-03 2020-01-03
REUTER, D 2020-01-06 15662.16 41 2020-01-06 2020-01-06
REUTER, D 2020-01-07 11731.55 48 2020-01-07 2020-01-07
REUTER, D 2020-01-08 37752.50 126 2020-01-08 2020-01-08
REUTER, D 2020-01-09 7260.26 32 2020-01-09 2020-01-09
REUTER, D 2020-01-10 7507.54 35 2020-01-10 2020-01-10
REUTER, D 2020-01-13 33103.78 102 2020-01-13 2020-01-13
REUTER, D 2020-01-14 24705.35 62 2020-01-14 2020-01-14
REUTER, D 2020-01-15 17207.12 74 2020-01-15 2020-01-15
REUTER, D 2020-01-16 15226.18 56 2020-01-16 2020-01-16
REUTER, D 2020-01-17 8202.75 41 2020-01-17 2020-01-17
ROBEDEAUX, M 2020-02-13 7615.40 4 2020-02-13 2020-02-13
ROUSER, K 2020-01-30 5008.22 2 2020-01-30 2020-01-30
RUTHERFORD, T 2020-03-19 5909.27 18 2020-03-19 2020-03-19
SCHLER, L 2020-01-29 5829.49 2 2020-01-29 2020-01-29
SMITH, C 2020-09-16 5481.88 3 2020-09-16 2020-09-16
SPIVA, A 2020-09-25 6691.09 3 2020-09-25 2020-09-25
STANUSH, S 2020-03-27 8020.80 1 2020-03-27 2020-03-27
STEANSON, A 2020-01-08 5119.06 4 2020-01-08 2020-01-08
STOVER, C 2020-01-23 7968.00 4 2020-01-23 2020-01-23
SULLIVAN, J 2020-12-16 5480.00 2 2020-12-16 2020-12-16
TANNER, J 2020-07-09 6106.10 4 2020-07-09 2020-07-09
TANNER, J 2020-11-03 5884.70 2 2020-11-03 2020-11-03
THACKER, G 2020-06-11 10942.32 2 2020-06-11 2020-06-11
THACKER, G 2020-06-15 6845.74 2 2020-06-15 2020-06-15
THACKER, G 2020-07-01 7693.78 4 2020-07-01 2020-07-01
THACKER, G 2020-08-04 5184.00 1 2020-08-04 2020-08-04
THACKER, G 2020-08-05 6932.26 2 2020-08-05 2020-08-05
TIKALSKY, P 2020-07-02 7261.50 1 2020-07-02 2020-07-02
TORNAKIAN, M 2020-01-09 6447.28 20 2020-01-09 2020-01-09
TORNAKIAN, M 2020-03-25 6855.18 19 2020-03-25 2020-03-25
TORNAKIAN, M 2020-03-30 7185.85 13 2020-03-30 2020-03-30
TORNAKIAN, M 2020-04-02 6164.40 14 2020-04-02 2020-04-02
TORNAKIAN, M 2020-05-12 5609.28 5 2020-05-12 2020-05-12
TORNAKIAN, M 2020-08-13 5501.19 7 2020-08-13 2020-08-13
TORNAKIAN, M 2020-08-24 6231.54 6 2020-08-24 2020-08-24
TREANOR, M 2020-08-27 9250.00 1 2020-08-27 2020-08-27
TRIBBLE, T 2020-01-07 6163.84 3 2020-01-07 2020-01-07
TROESTER, L 2020-01-10 6244.12 11 2020-01-10 2020-01-10
TROESTER, L 2020-01-20 5188.92 8 2020-01-20 2020-01-20
TROESTER, L 2020-03-07 5086.04 5 2020-03-07 2020-03-07
TROESTER, L 2020-03-09 6615.12 5 2020-03-09 2020-03-09
TURNER, D 2020-11-04 5153.51 2 2020-11-04 2020-11-04
TURNER, D 2020-12-01 5876.58 8 2020-12-01 2020-12-01
WALTON, D 2020-06-22 11550.00 3 2020-06-22 2020-06-22
WARREN, J 2020-07-21 5419.51 5 2020-07-21 2020-07-21
WHITMORE, D 2020-02-25 5175.00 2 2020-02-25 2020-02-25
WHITMORE, D 2020-03-06 5600.75 3 2020-03-06 2020-03-06
WHITMORE, D 2020-04-02 5867.00 5 2020-04-02 2020-04-02
WHITMORE, D 2020-06-26 7346.89 5 2020-06-26 2020-06-26
WHITMORE, D 2020-11-06 6039.00 8 2020-11-06 2020-11-06
WILLIAMS, J 2020-12-09 10000.00 1 2020-12-09 2020-12-09
WILLIAMS, J 2020-12-10 17100.00 3 2020-12-10 2020-12-10
WOOLDRIDGE, C 2020-03-09 7682.00 2 2020-03-09 2020-03-09
WOZNIAK, K 2020-06-27 5236.17 5 2020-06-27 2020-06-27
YARBROUGH-TESSMAN, V 2020-01-09 8353.00 5 2020-01-09 2020-01-09
YARBROUGH-TESSMAN, V 2020-03-05 9746.14 7 2020-03-05 2020-03-05
YARBROUGH-TESSMAN, V 2020-06-25 5068.56 3 2020-06-25 2020-06-25
YARBROUGH-TESSMAN, V 2020-07-29 6483.94 5 2020-07-29 2020-07-29
YARBROUGH-TESSMAN, V 2020-09-16 5787.61 5 2020-09-16 2020-09-16
YARBROUGH-TESSMAN, V 2020-11-12 7147.57 5 2020-11-12 2020-11-12
# individual transactions are not allowed to exceed $5,000 each
# check by cardholder + transaction date + merchant for daily total charge per merchant, to catch structuring payments
pcardmerc <- pcards %>%
  select(Cardholder.Name, Transaction.Date, Merchant, Amount) %>%
  group_by(Cardholder.Name, Transaction.Date, Merchant) %>%
  summarize(Amount_merc = sum(Amount)
    ,num_rows_merc = n()
    ,tran_merc_min = min(Transaction.Date)
    ,tran_merc_max = max(Transaction.Date)
    )%>%
  filter(Amount_merc > 5000)
## `summarise()` has grouped output by 'Cardholder.Name', 'Transaction.Date'. You can override using the `.groups` argument.
# write.csv(pcardmerc,"pcardmerc_test.csv", row.names=FALSE)
knitr::kable(pcardmerc, caption = "Cardholder + Merchant Total Daily Transactions > $5,000")  %>%
  kableExtra::kable_styling(latex_options = c("striped", "hold_position"), full_width = F) %>%
  kableExtra:: scroll_box(height = "200px")
Cardholder + Merchant Total Daily Transactions > $5,000
Cardholder.Name Transaction.Date Merchant Amount_merc num_rows_merc tran_merc_min tran_merc_max
AL-HARAKE, M 2020-02-21 HATFIELD & COMPANY INC 5700.00 1 2020-02-21 2020-02-21
ALAN, J 2020-03-25 MIDWEST VETERINARY SUPPL 5454.66 2 2020-03-25 2020-03-25
ARMSTRONG, P 2020-01-13 TEXAS A&M HOTEL & CONFER 5227.52 3 2020-01-13 2020-01-13
ARMSTRONG, P 2020-01-16 HILTON GARDEN INN 5290.59 1 2020-01-16 2020-01-16
ARMSTRONG, P 2020-01-24 MARRIOTT MORGANTOWN 5209.08 1 2020-01-24 2020-01-24
ARMSTRONG, P 2020-02-20 OMNI FORT WORTH HOTEL 6668.09 1 2020-02-20 2020-02-20
ARMSTRONG, P 2020-04-08 HILTON GARDEN INN 7752.22 1 2020-04-08 2020-04-08
ARMSTRONG, P 2020-06-09 WESTIN KANSAS CITY 8076.71 2 2020-06-09 2020-06-09
BAUM, K 2020-06-25 SP PC FACE SHIELD LL 6568.47 1 2020-06-25 2020-06-25
BAYLES, M 2020-01-09 AGRONOMIX SOFTWARE INC 5870.32 2 2020-01-09 2020-01-09
BLACKSHIRE, P 2020-09-09 GRIMSLEYS INC 5498.83 10 2020-09-09 2020-09-09
BUTLER, M 2020-11-10 HILTON GARDEN INN 10000.00 1 2020-11-10 2020-11-10
CANAMARES, J 2020-01-16 HARD ROCK SALES&CATERING 5810.94 2 2020-01-16 2020-01-16
CHAPMAN, M 2020-01-11 HYATT REGENCY NEW ORLNS 5083.41 10 2020-01-11 2020-01-11
COOPER, K 2020-02-25 PHONAK HEARING SYS 9153.93 5 2020-02-25 2020-02-25
COOPER, K 2020-05-27 PHONAK HEARING SYS 6393.74 4 2020-05-27 2020-05-27
COOPER, K 2020-09-16 PHONAK HEARING SYS 5567.81 3 2020-09-16 2020-09-16
DUCKWORTH, A 2020-02-10 GRAINGER 6915.06 7 2020-02-10 2020-02-10
DUCKWORTH, A 2020-02-11 IN STILLWATER STEEL & SU 5031.10 2 2020-02-11 2020-02-11
DUCKWORTH, A 2020-02-19 UNITED REFRIG BR #89 5025.25 2 2020-02-19 2020-02-19
DUCKWORTH, A 2020-08-04 UNITED REFRIG BR #89 6065.00 2 2020-08-04 2020-08-04
DUCKWORTH, A 2020-08-25 STILLWATER WINNELSON C 6120.11 16 2020-08-25 2020-08-25
DUNN, B 2020-06-26 IN GREEN MOUNTAIN TECHNO 5554.00 1 2020-06-26 2020-06-26
ESPOSITO, B 2020-01-06 THE READ HOUSE 9694.36 1 2020-01-06 2020-01-06
ESPOSITO, B 2020-02-06 ACE EXPRESS COACHES LLC 5587.75 1 2020-02-06 2020-02-06
FITZPATRICK, S 2020-01-23 JCE EHRLICH-RENTOKIL 5337.72 3 2020-01-23 2020-01-23
FITZPATRICK, S 2020-02-20 KONICA MINOLTA USA 7185.00 2 2020-02-20 2020-02-20
FITZPATRICK, S 2020-12-21 IN JESCO ELECTRIC, INC. 5316.06 2 2020-12-21 2020-12-21
FUKSA, E 2020-06-09 TFS FISHER SCI HUS 5658.78 1 2020-06-09 2020-06-09
FUKSA, E 2020-07-09 NCURA 7480.00 1 2020-07-09 2020-07-09
FUKSA, E 2020-09-03 COLUMBIA BUSINESS FORMS L 5480.00 2 2020-09-03 2020-09-03
GERLT, T 2020-08-07 PIVOT HEALTH 8533.61 24 2020-08-07 2020-08-07
GRAHAM, O 2020-11-10 IN DEARINGER PRINTING & 6797.67 12 2020-11-10 2020-11-10
HANCOCK, K 2020-09-18 HILTON GARDEN INN 5852.52 1 2020-09-18 2020-09-18
HOLMES, K 2020-03-27 GOSAFE 14761.25 2 2020-03-27 2020-03-27
HOLMES, K 2020-04-07 SQ ARGO MEDICAL 10390.00 1 2020-04-07 2020-04-07
HOLMES, K 2020-04-15 SQ ARGO MEDICAL 9299.05 1 2020-04-15 2020-04-15
HOLMES, K 2020-04-30 IN TRIOVA PHARMACEUTICAL 23700.00 1 2020-04-30 2020-04-30
HOLMES, K 2020-05-14 IN TRIOVA PHARMACEUTICAL 8750.00 1 2020-05-14 2020-05-14
HOLMES, K 2020-05-26 GOSAFE 21161.80 1 2020-05-26 2020-05-26
JOYCE, J 2020-08-11 WWW.OUP.COM 6000.00 2 2020-08-11 2020-08-11
KINDSCHI, J 2020-02-18 VETERANS AFFRS DMC 13339.82 1 2020-02-18 2020-02-18
KISSMAN, A 2020-09-24 VESPOLI USA INC. 8900.00 2 2020-09-24 2020-09-24
LOFFI, R 2020-03-23 HOME2 SUITES STILLWATER 5964.00 1 2020-03-23 2020-03-23
LOPER, K 2020-06-16 SQ THE COLLEGE AGE 5270.00 2 2020-06-16 2020-06-16
MAHONEY, A 2020-02-25 TRANDANG CO. LTD(SINH TOU 5651.01 3 2020-02-25 2020-02-25
MARSHALL, S 2020-01-10 CAPSIM MANAGEMENT SIMU 6154.86 2 2020-01-10 2020-01-10
MATERER, N 2020-03-19 WWW.BEYONDLABZ.COM 7600.00 1 2020-03-19 2020-03-19
MATTHEWS, W 2020-02-14 DELTA AIR 0067502110031 7165.45 1 2020-02-14 2020-02-14
NIXON, J 2020-07-07 B & C BUSINESS PRODUCTS 5064.01 4 2020-07-07 2020-07-07
O MEALEY, S 2020-03-31 EF EDUCATION FIRST 6000.00 2 2020-03-31 2020-03-31
PAGE, A 2020-11-30 VILLAGE TOURS LLC - CHART 5380.00 1 2020-11-30 2020-11-30
PAGE, A 2020-12-21 VILLAGE TOURS LLC - CHART 12239.60 2 2020-12-21 2020-12-21
PERRY, S 2020-03-11 DISNEY RESORTS-RESE 12395.38 1 2020-03-11 2020-03-11
PESTRAK, S 2020-03-18 HILTON BIRMINGHAM AT U 10000.00 1 2020-03-18 2020-03-18
PITCHER, M 2020-03-27 OK DEPT OF LABOR 5625.00 3 2020-03-27 2020-03-27
POLLARD, K 2020-02-12 LUX BUS AMERICA 5400.00 1 2020-02-12 2020-02-12
PRATT, K 2020-06-17 ESKIMO JOES PROMO PROD 5280.50 7 2020-06-17 2020-06-17
PRICE, C 2020-10-05 APPLE.COM/BILL 6874.50 2 2020-10-05 2020-10-05
REUTER, D 2020-01-14 AMERICAN AIR0017489897739 5476.05 1 2020-01-14 2020-01-14
SCHLER, L 2020-01-29 OPTIMUM ENERGY SOLUTIONS 5829.49 2 2020-01-29 2020-01-29
STANUSH, S 2020-03-27 MARRIOTTALBUQUERQUE 8020.80 1 2020-03-27 2020-03-27
STOVER, C 2020-01-23 RESIDENCE INN STILLWAT 7968.00 4 2020-01-23 2020-01-23
SULLIVAN, J 2020-12-16 COLUMBIA BUSINESS FORMS L 5480.00 2 2020-12-16 2020-12-16
THACKER, G 2020-06-11 EMIST DISINFECTION SOL 10886.49 1 2020-06-11 2020-06-11
THACKER, G 2020-08-04 SAVE RITE MEDICAL LLC 5184.00 1 2020-08-04 2020-08-04
THACKER, G 2020-08-05 SAVE RITE MEDICAL LLC 6912.00 1 2020-08-05 2020-08-05
TIKALSKY, P 2020-07-02 TULSA PLASTICS 7261.50 1 2020-07-02 2020-07-02
TORNAKIAN, M 2020-08-24 LOWES #00241 5687.31 4 2020-08-24 2020-08-24
TREANOR, M 2020-08-27 EBAY O 04-05635-97742 9250.00 1 2020-08-27 2020-08-27
TRIBBLE, T 2020-01-07 PROFESSIONAL TURF PRODUC 5498.82 2 2020-01-07 2020-01-07
TROESTER, L 2020-03-09 EDDIE V S 0178517 5815.13 1 2020-03-09 2020-03-09
WHITMORE, D 2020-02-25 NATIONAL ASSOCIATION FOR 5175.00 2 2020-02-25 2020-02-25
WILLIAMS, J 2020-12-09 AMAZON.COM FG5XA1C03 AMZN 10000.00 1 2020-12-09 2020-12-09
WILLIAMS, J 2020-12-10 AMAZON.COM PA94U6CN3 AMZN 7000.00 1 2020-12-10 2020-12-10
WILLIAMS, J 2020-12-10 AMAZON.COM VM5Z54QX3 10000.00 1 2020-12-10 2020-12-10
WOOLDRIDGE, C 2020-03-09 RAIL EUROPE NORTH AMER 7682.00 2 2020-03-09 2020-03-09
WOZNIAK, K 2020-06-27 TFS FISHERSCI ECOM HUS 5236.17 5 2020-06-27 2020-06-27
# (continued from above)
# create summary file from segmentation by cardholder + transaction date + merchant for daily total charge per merchant, to catch structuring payments
pcardmercsum <- pcardmerc %>%
  select(Cardholder.Name, Merchant, Amount_merc, num_rows_merc, tran_merc_min, tran_merc_max) %>%
  group_by(Cardholder.Name, Merchant) %>%
  summarize(Amount_merc_sum = sum(Amount_merc)
    ,num_rows_merc_cnt = n()
    ,num_rows_merc_sum = sum(num_rows_merc)
    ,tran_merc_min = min(tran_merc_min)
    ,tran_merc_max = max(tran_merc_max)
    )
## Adding missing grouping variables: `Transaction.Date`
## `summarise()` has grouped output by 'Cardholder.Name'. You can override using the `.groups` argument.
# write.csv(pcardmercsum,"pcardmercsum_test.csv", row.names=FALSE)
knitr::kable(pcardmercsum, caption = "Cardholder + Merchant Total Daily Transactions > $5,000 - Summary")  %>%
  kableExtra::kable_styling(latex_options = c("striped", "hold_position"), full_width = F) %>%
  kableExtra:: scroll_box(height = "200px")
Cardholder + Merchant Total Daily Transactions > $5,000 - Summary
Cardholder.Name Merchant Amount_merc_sum num_rows_merc_cnt num_rows_merc_sum tran_merc_min tran_merc_max
AL-HARAKE, M HATFIELD & COMPANY INC 5700.00 1 1 2020-02-21 2020-02-21
ALAN, J MIDWEST VETERINARY SUPPL 5454.66 1 2 2020-03-25 2020-03-25
ARMSTRONG, P HILTON GARDEN INN 13042.81 2 2 2020-01-16 2020-04-08
ARMSTRONG, P MARRIOTT MORGANTOWN 5209.08 1 1 2020-01-24 2020-01-24
ARMSTRONG, P OMNI FORT WORTH HOTEL 6668.09 1 1 2020-02-20 2020-02-20
ARMSTRONG, P TEXAS A&M HOTEL & CONFER 5227.52 1 3 2020-01-13 2020-01-13
ARMSTRONG, P WESTIN KANSAS CITY 8076.71 1 2 2020-06-09 2020-06-09
BAUM, K SP PC FACE SHIELD LL 6568.47 1 1 2020-06-25 2020-06-25
BAYLES, M AGRONOMIX SOFTWARE INC 5870.32 1 2 2020-01-09 2020-01-09
BLACKSHIRE, P GRIMSLEYS INC 5498.83 1 10 2020-09-09 2020-09-09
BUTLER, M HILTON GARDEN INN 10000.00 1 1 2020-11-10 2020-11-10
CANAMARES, J HARD ROCK SALES&CATERING 5810.94 1 2 2020-01-16 2020-01-16
CHAPMAN, M HYATT REGENCY NEW ORLNS 5083.41 1 10 2020-01-11 2020-01-11
COOPER, K PHONAK HEARING SYS 21115.48 3 12 2020-02-25 2020-09-16
DUCKWORTH, A GRAINGER 6915.06 1 7 2020-02-10 2020-02-10
DUCKWORTH, A IN STILLWATER STEEL & SU 5031.10 1 2 2020-02-11 2020-02-11
DUCKWORTH, A STILLWATER WINNELSON C 6120.11 1 16 2020-08-25 2020-08-25
DUCKWORTH, A UNITED REFRIG BR #89 11090.25 2 4 2020-02-19 2020-08-04
DUNN, B IN GREEN MOUNTAIN TECHNO 5554.00 1 1 2020-06-26 2020-06-26
ESPOSITO, B ACE EXPRESS COACHES LLC 5587.75 1 1 2020-02-06 2020-02-06
ESPOSITO, B THE READ HOUSE 9694.36 1 1 2020-01-06 2020-01-06
FITZPATRICK, S IN JESCO ELECTRIC, INC. 5316.06 1 2 2020-12-21 2020-12-21
FITZPATRICK, S JCE EHRLICH-RENTOKIL 5337.72 1 3 2020-01-23 2020-01-23
FITZPATRICK, S KONICA MINOLTA USA 7185.00 1 2 2020-02-20 2020-02-20
FUKSA, E COLUMBIA BUSINESS FORMS L 5480.00 1 2 2020-09-03 2020-09-03
FUKSA, E NCURA 7480.00 1 1 2020-07-09 2020-07-09
FUKSA, E TFS FISHER SCI HUS 5658.78 1 1 2020-06-09 2020-06-09
GERLT, T PIVOT HEALTH 8533.61 1 24 2020-08-07 2020-08-07
GRAHAM, O IN DEARINGER PRINTING & 6797.67 1 12 2020-11-10 2020-11-10
HANCOCK, K HILTON GARDEN INN 5852.52 1 1 2020-09-18 2020-09-18
HOLMES, K GOSAFE 35923.05 2 3 2020-03-27 2020-05-26
HOLMES, K IN TRIOVA PHARMACEUTICAL 32450.00 2 2 2020-04-30 2020-05-14
HOLMES, K SQ ARGO MEDICAL 19689.05 2 2 2020-04-07 2020-04-15
JOYCE, J WWW.OUP.COM 6000.00 1 2 2020-08-11 2020-08-11
KINDSCHI, J VETERANS AFFRS DMC 13339.82 1 1 2020-02-18 2020-02-18
KISSMAN, A VESPOLI USA INC. 8900.00 1 2 2020-09-24 2020-09-24
LOFFI, R HOME2 SUITES STILLWATER 5964.00 1 1 2020-03-23 2020-03-23
LOPER, K SQ THE COLLEGE AGE 5270.00 1 2 2020-06-16 2020-06-16
MAHONEY, A TRANDANG CO. LTD(SINH TOU 5651.01 1 3 2020-02-25 2020-02-25
MARSHALL, S CAPSIM MANAGEMENT SIMU 6154.86 1 2 2020-01-10 2020-01-10
MATERER, N WWW.BEYONDLABZ.COM 7600.00 1 1 2020-03-19 2020-03-19
MATTHEWS, W DELTA AIR 0067502110031 7165.45 1 1 2020-02-14 2020-02-14
NIXON, J B & C BUSINESS PRODUCTS 5064.01 1 4 2020-07-07 2020-07-07
O MEALEY, S EF EDUCATION FIRST 6000.00 1 2 2020-03-31 2020-03-31
PAGE, A VILLAGE TOURS LLC - CHART 17619.60 2 3 2020-11-30 2020-12-21
PERRY, S DISNEY RESORTS-RESE 12395.38 1 1 2020-03-11 2020-03-11
PESTRAK, S HILTON BIRMINGHAM AT U 10000.00 1 1 2020-03-18 2020-03-18
PITCHER, M OK DEPT OF LABOR 5625.00 1 3 2020-03-27 2020-03-27
POLLARD, K LUX BUS AMERICA 5400.00 1 1 2020-02-12 2020-02-12
PRATT, K ESKIMO JOES PROMO PROD 5280.50 1 7 2020-06-17 2020-06-17
PRICE, C APPLE.COM/BILL 6874.50 1 2 2020-10-05 2020-10-05
REUTER, D AMERICAN AIR0017489897739 5476.05 1 1 2020-01-14 2020-01-14
SCHLER, L OPTIMUM ENERGY SOLUTIONS 5829.49 1 2 2020-01-29 2020-01-29
STANUSH, S MARRIOTTALBUQUERQUE 8020.80 1 1 2020-03-27 2020-03-27
STOVER, C RESIDENCE INN STILLWAT 7968.00 1 4 2020-01-23 2020-01-23
SULLIVAN, J COLUMBIA BUSINESS FORMS L 5480.00 1 2 2020-12-16 2020-12-16
THACKER, G EMIST DISINFECTION SOL 10886.49 1 1 2020-06-11 2020-06-11
THACKER, G SAVE RITE MEDICAL LLC 12096.00 2 2 2020-08-04 2020-08-05
TIKALSKY, P TULSA PLASTICS 7261.50 1 1 2020-07-02 2020-07-02
TORNAKIAN, M LOWES #00241 5687.31 1 4 2020-08-24 2020-08-24
TREANOR, M EBAY O 04-05635-97742 9250.00 1 1 2020-08-27 2020-08-27
TRIBBLE, T PROFESSIONAL TURF PRODUC 5498.82 1 2 2020-01-07 2020-01-07
TROESTER, L EDDIE V S 0178517 5815.13 1 1 2020-03-09 2020-03-09
WHITMORE, D NATIONAL ASSOCIATION FOR 5175.00 1 2 2020-02-25 2020-02-25
WILLIAMS, J AMAZON.COM FG5XA1C03 AMZN 10000.00 1 1 2020-12-09 2020-12-09
WILLIAMS, J AMAZON.COM PA94U6CN3 AMZN 7000.00 1 1 2020-12-10 2020-12-10
WILLIAMS, J AMAZON.COM VM5Z54QX3 10000.00 1 1 2020-12-10 2020-12-10
WOOLDRIDGE, C RAIL EUROPE NORTH AMER 7682.00 1 2 2020-03-09 2020-03-09
WOZNIAK, K TFS FISHERSCI ECOM HUS 5236.17 1 5 2020-06-27 2020-06-27
# individual transactions are not allowed to exceed $5,000 each
# just pull out raw transaction that exceed threshold
pcard5k <- pcards %>%
  select(Cardholder.Name, Transaction.Date, Merchant, Amount) %>%
  filter(Amount > 5000) %>%
  arrange(Cardholder.Name, Transaction.Date, -Amount)
# write.csv(pcard5k,"pcard5k_test.csv", row.names=FALSE)
knitr::kable(pcard5k, caption = "Individual Transactions > $5,000")  %>%
  kableExtra::kable_styling(latex_options = c("striped", "hold_position"), full_width = F) %>%
  kableExtra:: scroll_box(height = "200px")
Individual Transactions > $5,000
Cardholder.Name Transaction.Date Merchant Amount
AL-HARAKE, M 2020-02-21 HATFIELD & COMPANY INC 5700.00
ARMSTRONG, P 2020-01-13 TEXAS A&M HOTEL & CONFER 5164.73
ARMSTRONG, P 2020-01-16 HILTON GARDEN INN 5290.59
ARMSTRONG, P 2020-01-24 MARRIOTT MORGANTOWN 5209.08
ARMSTRONG, P 2020-02-20 OMNI FORT WORTH HOTEL 6668.09
ARMSTRONG, P 2020-04-08 HILTON GARDEN INN 7752.22
ARMSTRONG, P 2020-06-09 WESTIN KANSAS CITY 5953.86
BAUM, K 2020-06-25 SP PC FACE SHIELD LL 6568.47
BUTLER, M 2020-11-10 HILTON GARDEN INN 10000.00
DUNN, B 2020-06-26 IN GREEN MOUNTAIN TECHNO 5554.00
ESPOSITO, B 2020-01-06 THE READ HOUSE 9694.36
ESPOSITO, B 2020-02-06 ACE EXPRESS COACHES LLC 5587.75
FUKSA, E 2020-06-09 TFS FISHER SCI HUS 5658.78
FUKSA, E 2020-07-09 NCURA 7480.00
HANCOCK, K 2020-09-18 HILTON GARDEN INN 5852.52
HOLMES, K 2020-03-27 GOSAFE 16018.47
HOLMES, K 2020-04-07 SQ ARGO MEDICAL 10390.00
HOLMES, K 2020-04-15 SQ ARGO MEDICAL 9299.05
HOLMES, K 2020-04-30 IN TRIOVA PHARMACEUTICAL 23700.00
HOLMES, K 2020-05-14 IN TRIOVA PHARMACEUTICAL 8750.00
HOLMES, K 2020-05-26 GOSAFE 21161.80
KINDSCHI, J 2020-02-18 VETERANS AFFRS DMC 13339.82
LOFFI, R 2020-03-23 HOME2 SUITES STILLWATER 5964.00
MATERER, N 2020-03-19 WWW.BEYONDLABZ.COM 7600.00
MATTHEWS, W 2020-02-14 DELTA AIR 0067502110031 7165.45
PAGE, A 2020-11-30 VILLAGE TOURS LLC - CHART 5380.00
PAGE, A 2020-12-21 VILLAGE TOURS LLC - CHART 7789.60
PERRY, S 2020-03-11 DISNEY RESORTS-RESE 12395.38
PESTRAK, S 2020-03-18 HILTON BIRMINGHAM AT U 10000.00
POLLARD, K 2020-02-12 LUX BUS AMERICA 5400.00
REUTER, D 2020-01-14 AMERICAN AIR0017489897739 5476.05
STANUSH, S 2020-03-27 MARRIOTTALBUQUERQUE 8020.80
STOVER, C 2020-01-23 RESIDENCE INN STILLWAT 6912.00
THACKER, G 2020-06-11 EMIST DISINFECTION SOL 10886.49
THACKER, G 2020-08-04 SAVE RITE MEDICAL LLC 5184.00
THACKER, G 2020-08-05 SAVE RITE MEDICAL LLC 6912.00
TIKALSKY, P 2020-07-02 TULSA PLASTICS 7261.50
TREANOR, M 2020-08-27 EBAY O 04-05635-97742 9250.00
TROESTER, L 2020-03-09 EDDIE V S 0178517 5815.13
WILLIAMS, J 2020-12-09 AMAZON.COM FG5XA1C03 AMZN 10000.00
WILLIAMS, J 2020-12-10 AMAZON.COM VM5Z54QX3 10000.00
WILLIAMS, J 2020-12-10 AMAZON.COM PA94U6CN3 AMZN 7000.00
# (continued from above)
# create summary file from segmentation by charge per transaction exceeding 5k
pcard5ksum <- pcard5k %>%
  select(Cardholder.Name, Amount, Transaction.Date) %>%
  group_by(Cardholder.Name) %>%
  summarize(Amount_5k = sum(Amount)
    ,num_rows_5k = n()
    ,tran_5k_min = min(Transaction.Date)
    ,tran_5k_max = max(Transaction.Date)
    ) %>%
  arrange(-num_rows_5k, -Amount_5k, Cardholder.Name)
# write.csv(pcard5ksum,"pcard5ksum_test.csv", row.names=FALSE)
knitr::kable(pcard5ksum, caption = "Transactions > $5,000 - Summary") %>%
  kableExtra::kable_styling(latex_options = c("striped", "hold_position"), full_width = F) %>%
  kableExtra:: scroll_box(height = "200px")
Transactions > $5,000 - Summary
Cardholder.Name Amount_5k num_rows_5k tran_5k_min tran_5k_max
HOLMES, K 89319.32 6 2020-03-27 2020-05-26
ARMSTRONG, P 36038.57 6 2020-01-13 2020-06-09
WILLIAMS, J 27000.00 3 2020-12-09 2020-12-10
THACKER, G 22982.49 3 2020-06-11 2020-08-05
ESPOSITO, B 15282.11 2 2020-01-06 2020-02-06
PAGE, A 13169.60 2 2020-11-30 2020-12-21
FUKSA, E 13138.78 2 2020-06-09 2020-07-09
KINDSCHI, J 13339.82 1 2020-02-18 2020-02-18
PERRY, S 12395.38 1 2020-03-11 2020-03-11
BUTLER, M 10000.00 1 2020-11-10 2020-11-10
PESTRAK, S 10000.00 1 2020-03-18 2020-03-18
TREANOR, M 9250.00 1 2020-08-27 2020-08-27
STANUSH, S 8020.80 1 2020-03-27 2020-03-27
MATERER, N 7600.00 1 2020-03-19 2020-03-19
TIKALSKY, P 7261.50 1 2020-07-02 2020-07-02
MATTHEWS, W 7165.45 1 2020-02-14 2020-02-14
STOVER, C 6912.00 1 2020-01-23 2020-01-23
BAUM, K 6568.47 1 2020-06-25 2020-06-25
LOFFI, R 5964.00 1 2020-03-23 2020-03-23
HANCOCK, K 5852.52 1 2020-09-18 2020-09-18
TROESTER, L 5815.13 1 2020-03-09 2020-03-09
AL-HARAKE, M 5700.00 1 2020-02-21 2020-02-21
DUNN, B 5554.00 1 2020-06-26 2020-06-26
REUTER, D 5476.05 1 2020-01-14 2020-01-14
POLLARD, K 5400.00 1 2020-02-12 2020-02-12


NLP with conference calls

Citations:

# Load one conference call as a text file
save <- read_file(r"(SAVEQ22020.txt)")

# Explore
summary(save)
##    Length     Class      Mode 
##         1 character character
# view
save %>% stringr::str_sub(1, 500)
## [1] "SAVE earnings call for the period ending June 30, 2020.\r\n\r\nSpirit Airlines (NYSE:SAVE)\r\n\r\nQ2 2020 Earnings Call\r\n\r\nJul 23, 2020, 10:00 a.m. ET\r\nContents:\r\nPrepared Remarks\r\nQuestions and Answers\r\nCall Participants\r\nPrepared Remarks:\r\n\r\nOperator\r\nWelcome to the 2Q 2020 conference call. My name is Erin, and I'll be your operator for today's call. [Operator instructions] I'll now turn the call over to Maria Cuartas, senior analyst, investor relations. Ma'am, you may begin.\r\nMaria Cuartas -- Senior "
# Count characters
save %>% stringr::str_count()
## [1] 60575
# Look for keywords
# detect keywords
save %>% stringr::str_detect(c('pandemic', 'Pandemic', 'covid', 'Covid', 'covid-19', 'Covid-19', 'covid 19', 'COVID'))
## [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
# based on above count instances of keywords
toupper(save) %>% stringr::str_count(c('COVID','PANDEMIC'))
## [1] 10  6
# count the number of matches of a substring
save %>% stringr::str_count('COVID')
## [1] 10
# Where is this keyword mentioned?
save %>% stringr::str_locate_all('COVID')
## [[1]]
##       start   end
##  [1,]  2541  2545
##  [2,]  3684  3688
##  [3,]  6072  6076
##  [4,] 11512 11516
##  [5,] 12160 12164
##  [6,] 12816 12820
##  [7,] 30382 30386
##  [8,] 41817 41821
##  [9,] 41865 41869
## [10,] 52268 52272
# View surrounding text (requires regex)
save %>% stringr::str_extract_all(".{50}(COVID).{50}")
## [[1]]
## [1] "pirit team has responded to the changes driven by COVID-19. Many of our procedures, processes and work en"
## [2] "n. Given the significant decline in demand due to COVID-19, we dramatically reduced our flight schedule f"
## [3] "ght attendant workforce prior to the onset of the COVID-19 pandemic. Enhancing and preserving liquidity r"
## [4] "ed negative in mid-June regarding the increase of COVID-19 cases, demand for July pulled off a bit. Havin"
## [5] "ervice to all the destinations we served prior to COVID-19. In the international markets where we have re"
## [6] "re been a difference in demand progression in the COVID hotspot states, if you will, pardon me, versus th"
## [7] "g between the time when you actually hit your pre-COVID capacity levels and when you hit your pre-COVID u"
## [8] "tion of your customers are older just in light of COVID? Did that shift as demand came back? And then hav"
# Tokenize the text
# Change to a tibble (tidy dataframe)
tokens <- tibble(save)

# Tokenize
tokens <- tokens %>% tidytext::unnest_tokens(output=word, input=save, token='words', to_lower=TRUE)

# add order of the words
tokens <- tokens %>% mutate(order = row_number())

# Count tokens
tokens %>% nrow()
## [1] 10440
# First few words
tokens[1:15, ]
## # A tibble: 15 x 2
##    word      order
##    <chr>     <int>
##  1 save          1
##  2 earnings      2
##  3 call          3
##  4 for           4
##  5 the           5
##  6 period        6
##  7 ending        7
##  8 june          8
##  9 30            9
## 10 2020         10
## 11 spirit       11
## 12 airlines     12
## 13 nyse:save    13
## 14 q2           14
## 15 2020         15
# count the number of matches of a substring
tokens %>% dplyr::filter(word == str_sub('covid')) %>% count()
## # A tibble: 1 x 1
##       n
##   <int>
## 1    10
# or
tokens %>% filter(toupper(word) == 'COVID') %>% count()
## # A tibble: 1 x 1
##       n
##   <int>
## 1    10
# Where is this keyword mentioned?
tokens %>% dplyr::filter(word == str_sub('covid'))
## # A tibble: 10 x 2
##    word  order
##    <chr> <int>
##  1 covid   413
##  2 covid   593
##  3 covid   984
##  4 covid  1886
##  5 covid  1999
##  6 covid  2115
##  7 covid  5163
##  8 covid  7171
##  9 covid  7180
## 10 covid  9013
# or
tokens %>% filter(toupper(word) == 'COVID')
## # A tibble: 10 x 2
##    word  order
##    <chr> <int>
##  1 covid   413
##  2 covid   593
##  3 covid   984
##  4 covid  1886
##  5 covid  1999
##  6 covid  2115
##  7 covid  5163
##  8 covid  7171
##  9 covid  7180
## 10 covid  9013
# Remove stop words

# Look at the most important frequent words
tokens %>% 
  group_by(word) %>% 
  summarize(count = n()) %>%
  arrange(desc(count)) %>% 
  filter(count > 50) %>% 
  mutate(token = reorder(word, count)) %>% 
  ggplot(aes(x=count, y=token)) +
  geom_col()

# Load custom stopwords
custom_stop_words <- read_csv(r"(stop_words_list.csv)", col_names = FALSE)
## Rows: 1087 Columns: 1
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): X1
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Remove stop words
tokens <- tokens %>% 
  anti_join(custom_stop_words, by = c('word'='X1'))

tokens %>% nrow()
## [1] 3923
tokens %>% 
  group_by(word) %>% 
  summarize(count = n()) %>%
  arrange(desc(count)) %>% 
  filter(count > 20) %>% 
  mutate(token = reorder(word, count)) %>% 
  ggplot(aes(x=count, y=token)) +
  geom_col()

# Stemming and Lemmatizing

# look at similar words
arrange(tokens, word)[316:325, ]
## # A tibble: 10 x 2
##    word      order
##    <chr>     <int>
##  1 airlines   9555
##  2 airlines   9684
##  3 airlines   9739
##  4 airplane   8567
##  5 airplanes  3904
##  6 airplanes  3917
##  7 airplanes  7473
##  8 airplanes  7493
##  9 airport    6390
## 10 airport    6655
#Stem the tokens
stemmed <- tokens %>% mutate(stem = SnowballC::wordStem(word))

# look at similar words now
arrange(stemmed, word)[316:325, ]
## # A tibble: 10 x 3
##    word      order stem   
##    <chr>     <int> <chr>  
##  1 airlines   9555 airlin 
##  2 airlines   9684 airlin 
##  3 airlines   9739 airlin 
##  4 airplane   8567 airplan
##  5 airplanes  3904 airplan
##  6 airplanes  3917 airplan
##  7 airplanes  7473 airplan
##  8 airplanes  7493 airplan
##  9 airport    6390 airport
## 10 airport    6655 airport
stemmed %>% 
  group_by(stem) %>% 
  summarize(count = n()) %>%
  arrange(desc(count)) %>% 
  filter(count > 20) %>% 
  mutate(token = reorder(stem, count)) %>% 
  ggplot(aes(x=count, y=token)) +
  geom_col()

# Key words

set.seed(77)

stemmed %>% 
  group_by(word) %>% 
  summarize(count = n()) %>% 
  with(wordcloud(words=word, freq=count, min.freq=1, max.words=100, random.order=F, rot.per=0.30, colors=brewer.pal(8, "Dark2")))

# Sentiment total

# load finance sentiment list and explore it
lm_dict <- tidytext::get_sentiments('loughran')

# view dictionary
lm_dict %>% group_by(sentiment) %>% summarize(count = n())
## # A tibble: 6 x 2
##   sentiment    count
##   <chr>        <int>
## 1 constraining   184
## 2 litigious      904
## 3 negative      2355
## 4 positive       354
## 5 superfluous     56
## 6 uncertainty    297
# Add sentiment
sentimented <- stemmed %>% 
  inner_join(lm_dict, by = 'word')

# Explore totals
sentimented %>% 
  group_by(sentiment) %>% 
  summarize(count = n(), percent = count/nrow(sentimented))
## # A tibble: 5 x 3
##   sentiment    count percent
##   <chr>        <int>   <dbl>
## 1 constraining    11   0.044
## 2 litigious       10   0.04 
## 3 negative        87   0.348
## 4 positive       105   0.42 
## 5 uncertainty     37   0.148
sentimented %>% 
  group_by(sentiment) %>% 
  summarize(count = n(), percent = count/nrow(sentimented)) %>% 
  ggplot(aes(x='', y=percent, fill=sentiment)) +
  geom_bar(width=1, stat='identity')

# Compare multiple companies

# bring in the data
## used `readChar()` because of encoding error in `read_file()`
file <- r"(AMZNQ42019.txt)"
amazon <- readChar(file, file.info(file)$size)

file <- r"(GOOGLQ42019.txt)"
google <- readChar(file, file.info(file)$size)

file <- r"(LOWQ42019.txt)"
lowes <- readChar(file, file.info(file)$size)

file <- r"(TRIPQ42019.txt)"
trip <- readChar(file, file.info(file)$size)

# Load custom stopwords (if did not do above)
custom_stop_words <- read_csv(r"(stop_words_list.csv)", col_names = FALSE)
## Rows: 1087 Columns: 1
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): X1
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# load finance sentiment list (if did not do above)
lm_dict <- tidytext::get_sentiments('loughran')

# Make one dataframe with all conference calls 
amaznQ42019 <- tibble(amazon) %>% #Create dataframe from the text string
  unnest_tokens(sentence, amazon, token='sentences') %>% #Break into sentences tokens
  mutate(sentence_num = row_number(), call = 'amaznQ42019') %>% #Create sentence number
  unnest_tokens(word, sentence, token='words') %>% #Break into word tokens
  mutate(word_num = row_number()) %>% #Number words
  anti_join(custom_stop_words, by=c('word' = 'X1')) %>% #Remove stop words
  inner_join(lm_dict, by='word') #Add sentiment (keep only sentiment words)

googlQ42019 <- tibble(google) %>% 
  unnest_tokens(sentence, google, token='sentences') %>% 
  mutate(sentence_num = row_number(), call = 'googlQ42019') %>% 
  unnest_tokens(word, sentence) %>% 
  mutate(word_num = row_number()) %>% 
  anti_join(custom_stop_words, by=c('word' = 'X1')) %>% 
  inner_join(lm_dict)
## Joining, by = "word"
lowQ42019 <- tibble(lowes) %>% 
  unnest_tokens(sentence, lowes, token='sentences') %>% 
  mutate(sentence_num = row_number(), call = 'lowQ42019') %>% 
  unnest_tokens(word, sentence) %>% 
  mutate(word_num = row_number()) %>% 
  anti_join(custom_stop_words, by=c('word' = 'X1')) %>% 
  inner_join(lm_dict)
## Joining, by = "word"
tripQ42019 <- tibble(trip) %>% 
  unnest_tokens(sentence, trip, token='sentences') %>% 
  mutate(sentence_num = row_number(), call = 'tripQ42019') %>% 
  unnest_tokens(word, sentence) %>% 
  mutate(word_num = row_number()) %>% 
  anti_join(custom_stop_words, by=c('word' = 'X1')) %>% 
  inner_join(lm_dict)
## Joining, by = "word"
all_firms <- bind_rows(amaznQ42019, googlQ42019, lowQ42019, tripQ42019)
print(all_firms)
## # A tibble: 911 x 5
##    sentence_num call        word          word_num sentiment  
##           <int> <chr>       <chr>            <int> <chr>      
##  1            2 amaznQ42019 questions           18 negative   
##  2            3 amaznQ42019 good                31 positive   
##  3            7 amaznQ42019 questions           92 negative   
##  4           11 amaznQ42019 questions          165 negative   
##  5           12 amaznQ42019 differ             185 uncertainty
##  6           16 amaznQ42019 assumptions        295 uncertainty
##  7           17 amaznQ42019 unpredictable      300 negative   
##  8           17 amaznQ42019 unpredictable      300 uncertainty
##  9           17 amaznQ42019 fluctuations       310 uncertainty
## 10           18 amaznQ42019 assumes            351 uncertainty
## # ... with 901 more rows
# Sentiment
amazon1 <- all_firms %>% 
  filter(call=='amaznQ42019') %>% #Just Amazon
  group_by(call, sentiment) %>% 
  summarize(count = n(), #Count
            percent = count/(all_firms %>% filter(call=='amaznQ42019') %>% nrow())) #Percent for just Amazon
## `summarise()` has grouped output by 'call'. You can override using the `.groups` argument.
google1 <- all_firms %>% 
  filter(call=='googlQ42019') %>%
  group_by(call, sentiment) %>% 
  summarize(count = n(), percent = count/(all_firms %>% filter(call=='googlQ42019') %>% nrow()))
## `summarise()` has grouped output by 'call'. You can override using the `.groups` argument.
low1 <- all_firms %>% 
  filter(call=='lowQ42019') %>%
  group_by(call, sentiment) %>% 
  summarize(count = n(), percent = count/(all_firms %>% filter(call=='lowQ42019') %>% nrow()))
## `summarise()` has grouped output by 'call'. You can override using the `.groups` argument.
trip1 <- all_firms %>% 
  filter(call=='tripQ42019') %>%
  group_by(call, sentiment) %>% 
  summarize(count = n(), percent = count/(all_firms %>% filter(call=='tripQ42019') %>% nrow()))
## `summarise()` has grouped output by 'call'. You can override using the `.groups` argument.
percentages <- bind_rows(amazon1, google1, low1, trip1)
print(percentages)
## # A tibble: 20 x 4
## # Groups:   call [4]
##    call        sentiment    count percent
##    <chr>       <chr>        <int>   <dbl>
##  1 amaznQ42019 constraining     2 0.0183 
##  2 amaznQ42019 litigious        3 0.0275 
##  3 amaznQ42019 negative        27 0.248  
##  4 amaznQ42019 positive        63 0.578  
##  5 amaznQ42019 uncertainty     14 0.128  
##  6 googlQ42019 constraining     3 0.0128 
##  7 googlQ42019 litigious        4 0.0170 
##  8 googlQ42019 negative        43 0.183  
##  9 googlQ42019 positive       161 0.685  
## 10 googlQ42019 uncertainty     24 0.102  
## 11 lowQ42019   constraining    11 0.0296 
## 12 lowQ42019   litigious        1 0.00269
## 13 lowQ42019   negative        55 0.148  
## 14 lowQ42019   positive       267 0.718  
## 15 lowQ42019   uncertainty     38 0.102  
## 16 tripQ42019  constraining     3 0.0154 
## 17 tripQ42019  negative        48 0.246  
## 18 tripQ42019  positive       130 0.667  
## 19 tripQ42019  superfluous      1 0.00513
## 20 tripQ42019  uncertainty     13 0.0667
percentages %>% 
  ggplot(aes(x='', y=percent, fill=sentiment)) +
  geom_bar(width=1, stat='identity') +
  facet_wrap(~call, ncol = 2, scales = "free_x")

# Sentiment over time

all_firms %>% 
  group_by(call, sentence_num, sentiment) %>% 
  summarize(n=n()) %>% 
  pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>% #Transpose the data for the plot
  mutate(tone = positive - negative) %>% #Create "tone"
  ggplot(aes(x=sentence_num, y=tone, fill=call)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~call, ncol = 2, scales = "free_x")
## `summarise()` has grouped output by 'call', 'sentence_num'. You can override using the `.groups` argument.

# Get individual wordclouds

set.seed(77) #Seed for random number
cloud <- tibble(amazon) %>% #Create dataframe
  unnest_tokens(word, amazon) %>% #Word tokens
  anti_join(custom_stop_words, by=c('word' = 'X1')) %>% #Remove stop words
  group_by(word) %>% 
  summarize(n = n()) %>% 
  with(wordcloud(words=word, freq=n, min.freq=10, max.words=500, random.order=F, rot.per=0.30, colors=brewer.pal(8, "Dark2")))

set.seed(77)
cloud <- tibble(google) %>% 
  unnest_tokens(word, google) %>% 
  anti_join(custom_stop_words, by=c('word' = 'X1')) %>% 
  group_by(word) %>% 
  summarize(n = n()) %>% 
  with(wordcloud(words=word, freq=n, min.freq=10, max.words=500, random.order=F, rot.per=0.30, colors=brewer.pal(8, "Dark2")))

set.seed(77)
cloud <- tibble(lowes) %>% 
  unnest_tokens(word, lowes) %>% 
  anti_join(custom_stop_words, by=c('word' = 'X1')) %>% 
  group_by(word) %>% 
  summarize(n = n()) %>% 
  with(wordcloud(words=word, freq=n, min.freq=15, max.words=500, random.order=F, rot.per=0.30, colors=brewer.pal(8, "Dark2")))

set.seed(77)
cloud <- tibble(trip) %>% 
  unnest_tokens(word, trip) %>% 
  anti_join(custom_stop_words, by=c('word' = 'X1')) %>% 
  group_by(word) %>% 
  summarize(n = n()) %>% 
  with(wordcloud(words=word, freq=n, min.freq=26, max.words=500, random.order=F, rot.per=0.30, colors=brewer.pal(8, "Dark2")))



Robotic Process Automation (RPA)

The rest of the course covers RPA with the Community Edition of UIPath Studio.

Software changes and updates have occurred since the lessons (and most tutorials) were prepared: