What words spam mail have in common
wordcloud((spam %>% filter(spam))$text, scale=c(2,1), min.freq=50, colors=rainbow(25))

Phone numbers
phone_numbers <- str_match_all(spam$text, "(?: *[-+().]? *\\d){6,14}") %>%
unlist() %>%
as.data.frame()
phone_numbers
Plot them (for fun)
We only have one axis, so we need to use plot(). Sadly,
most of it is near 0, likely because of area codes or that it is being
treated as a categorical variable.
plot(phone_numbers$.)
Warning in xy.coords(x, y, xlabel, ylabel, log) :
NAs introduced by coercion

(Likely) Phone recordings as emails
grep("press [[:digit:]]", spam$text, ignore.case = TRUE, value = TRUE) %>%
as.data.frame()
Tiny spam filter
Based on the EDA, this should work:
# get all emails that aren't spam (supposedly)
get_slipped <- function(spam, not_spam) {
length(spam %>% filter(spam) %>% pull(text)) - length(not_spam)
}
non_spam <- grep("call", spam %>% filter(spam) %>% pull(text), ignore.case = TRUE, value = TRUE, invert = TRUE)
non_spam_pretty <- non_spam %>% as.data.frame()
print(paste("Amount of spam:", length(spam %>% filter(spam) %>% pull(text)), sep=" "))
[1] "Amount of spam: 747"
print(paste("Amount of 'call' text:", length(non_spam), sep=" "))
[1] "Amount of 'call' text: 400"
print(paste("Amount of mail that slipped through the cracks:", get_slipped(spam, non_spam), sep=" "))
[1] "Amount of mail that slipped through the cracks: 347"
print(paste("Good spam filter:", get_slipped(spam, non_spam) < length(non_spam), sep=" "))
[1] "Good spam filter: TRUE"
LS0tDQp0aXRsZTogIkVtYWlsIEVEQSINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KHdvcmRjbG91ZCkNCiMjIHRoZXNlIGFyZSByZXF1aXJlZCBidXQgbm90IHVzZWQNCmxpYnJhcnkodG0pDQpsaWJyYXJ5KHNsYW0pDQpzcGFtIDwtIHJlYWRfY3N2KCJTcGFtLmNzdiIpICU+JQ0KICBtdXRhdGUoc3BhbSA9IENhdGVnb3J5ID09ICJzcGFtIikgJT4lDQogIHNlbGVjdCgtQ2F0ZWdvcnkpICU+JQ0KICBtdXRhdGUoTWVzc2FnZXMgPSBnc3ViKCJcdUZGRkQiLCAiIiwgTWVzc2FnZXMsIGZpeGVkPVRSVUUpKSAlPiUNCiAgcmVuYW1lKHRleHQgPSBNZXNzYWdlcykNCmBgYA0KDQojIyBEYXRhc2V0DQoNCmBgYHtyfQ0Kc3BhbQ0KYGBgDQoNCiMjIFdoYXQgd29yZHMgc3BhbSBtYWlsIGhhdmUgaW4gY29tbW9uDQoNCmBgYHtyIHdhcm5pbmc9RkFMU0V9DQp3b3JkY2xvdWQoKHNwYW0gJT4lIGZpbHRlcihzcGFtKSkkdGV4dCwgc2NhbGU9YygyLDEpLCBtaW4uZnJlcT01MCwgY29sb3JzPXJhaW5ib3coMjUpKQ0KYGBgDQoNCiMjIFBob25lIG51bWJlcnMNCg0KYGBge3J9DQpwaG9uZV9udW1iZXJzIDwtIHN0cl9tYXRjaF9hbGwoc3BhbSR0ZXh0LCAiKD86ICpbLSsoKS5dPyAqXFxkKXs2LDE0fSIpICU+JQ0KICB1bmxpc3QoKSAlPiUNCiAgYXMuZGF0YS5mcmFtZSgpDQpwaG9uZV9udW1iZXJzDQpgYGANCg0KIyMjIFBsb3QgdGhlbSAoZm9yIGZ1bikNCg0KV2Ugb25seSBoYXZlIG9uZSBheGlzLCBzbyB3ZSBuZWVkIHRvIHVzZSBgcGxvdCgpYC4gU2FkbHksIG1vc3Qgb2YgaXQgaXMgbmVhciAwLCBsaWtlbHkgYmVjYXVzZSBvZiBhcmVhIGNvZGVzIG9yIHRoYXQgaXQgaXMgYmVpbmcgdHJlYXRlZCBhcyBhIGNhdGVnb3JpY2FsIHZhcmlhYmxlLg0KDQpgYGB7cn0NCnBsb3QocGhvbmVfbnVtYmVycyQuKQ0KYGBgDQoNCiMjIChMaWtlbHkpIFBob25lIHJlY29yZGluZ3MgYXMgZW1haWxzDQoNCmBgYHtyfQ0KZ3JlcCgicHJlc3MgW1s6ZGlnaXQ6XV0iLCBzcGFtJHRleHQsIGlnbm9yZS5jYXNlID0gVFJVRSwgdmFsdWUgPSBUUlVFKSAlPiUNCiAgYXMuZGF0YS5mcmFtZSgpDQpgYGANCg0KIyMgVGlueSBzcGFtIGZpbHRlcg0KDQpCYXNlZCBvbiB0aGUgRURBLCB0aGlzIHNob3VsZCB3b3JrOg0KDQpgYGB7cn0NCiMgZ2V0IGFsbCBlbWFpbHMgdGhhdCBhcmVuJ3Qgc3BhbSAoc3VwcG9zZWRseSkNCg0KZ2V0X3NsaXBwZWQgPC0gZnVuY3Rpb24oc3BhbSwgbm90X3NwYW0pIHsNCiAgbGVuZ3RoKHNwYW0gJT4lIGZpbHRlcihzcGFtKSAlPiUgcHVsbCh0ZXh0KSkgLSBsZW5ndGgobm90X3NwYW0pDQp9DQoNCm5vbl9zcGFtIDwtIGdyZXAoImNhbGwiLCBzcGFtICU+JSBmaWx0ZXIoc3BhbSkgJT4lIHB1bGwodGV4dCksIGlnbm9yZS5jYXNlID0gVFJVRSwgdmFsdWUgPSBUUlVFLCBpbnZlcnQgPSBUUlVFKQ0Kbm9uX3NwYW1fcHJldHR5IDwtIG5vbl9zcGFtICU+JSBhcy5kYXRhLmZyYW1lKCkNCnByaW50KHBhc3RlKCJBbW91bnQgb2Ygc3BhbToiLCBsZW5ndGgoc3BhbSAlPiUgZmlsdGVyKHNwYW0pICU+JSBwdWxsKHRleHQpKSwgc2VwPSIgIikpDQpwcmludChwYXN0ZSgiQW1vdW50IG9mICdjYWxsJyB0ZXh0OiIsIGxlbmd0aChub25fc3BhbSksIHNlcD0iICIpKQ0KcHJpbnQocGFzdGUoIkFtb3VudCBvZiBtYWlsIHRoYXQgc2xpcHBlZCB0aHJvdWdoIHRoZSBjcmFja3M6IiwgZ2V0X3NsaXBwZWQoc3BhbSwgbm9uX3NwYW0pLCBzZXA9IiAiKSkNCnByaW50KHBhc3RlKCJHb29kIHNwYW0gZmlsdGVyOiIsIGdldF9zbGlwcGVkKHNwYW0sIG5vbl9zcGFtKSA8IGxlbmd0aChub25fc3BhbSksIHNlcD0iICIpKQ0KYGBg