inndat <- readLines("base1.dat", skip = 0)
glimpse(inndat)
## chr [1:62539] "000010100150052307201921 100001000000000000000002300611052 2001403135" ...
## Warning: Calling `as_tibble()` on a vector is discouraged, because the behavior is likely to change in the future. Use `tibble::enframe(name = NULL)` instead.
## This warning is displayed once per session.
inndat.tibble <- inndat.tibble[which(inndat.tibble$value != ""),]
inndat.tibble$id <- substr(inndat.tibble$value,1,5)
inndat.tibble$cardno <- substr(inndat.tibble$value,6,7)
head(inndat.tibble, n = 23)
## # A tibble: 23 x 3
## value id cardno
## <chr> <chr> <chr>
## 1 000010100150052307201921 100001000000000000000002300611052~ 00001 01
## 2 0000102022002 0102030405 12111 1 00001 02
## 3 0000103 2 00001 03
## 4 000010401222 049039020017015 00001 04
## 5 000010519701980199920022004 00001 05
## 6 00001060301090605 16555 00001 06
## 7 000010708 00001 07
## 8 000010811111 22222 00001 08
## 9 0000109 777 00001 09
## 10 0000110 22222 44444 00001 10
## # ... with 13 more rows
table(inndat.tibble$cardno)
##
## 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16
## 2579 2579 2579 2579 2579 2579 2579 2579 2579 2579 2579 2579 2579 2579 2579 2579
## 17 18 19 20 21 22 23
## 2579 2579 2579 5791 2579 2579 2579
card20 <- inndat.tibble[which(inndat.tibble$cardno == 20),]
card20.dup <-
card20 %>%
arrange(value, id) %>%
group_by(id) %>%
mutate(dup = row_number())
head(card20.dup[order(card20.dup$dup, decreasing = TRUE),] , 100)
## # A tibble: 100 x 4
## # Groups: id [58]
## value id cardno dup
## <chr> <chr> <chr> <int>
## 1 002022001000361611 00202 20 12
## 2 019442001002261611 01944 20 12
## 3 002022001000359591 00202 20 11
## 4 019442001002258581 01944 20 11
## 5 002022001000351511 00202 20 10
## 6 019442001001772721 01944 20 10
## 7 022292001002366661 02229 20 10
## 8 002022001000347471 00202 20 9
## 9 003902001002170761 00390 20 9
## 10 006282001001770761 00628 20 9
## # ... with 90 more rows