inndat <- readLines("base1.dat", skip = 0)
glimpse(inndat)
##  chr [1:62539] "000010100150052307201921      100001000000000000000002300611052  2001403135" ...
## Warning: Calling `as_tibble()` on a vector is discouraged, because the behavior is likely to change in the future. Use `tibble::enframe(name = NULL)` instead.
## This warning is displayed once per session.
inndat.tibble <- inndat.tibble[which(inndat.tibble$value != ""),]
inndat.tibble$id <- substr(inndat.tibble$value,1,5)
inndat.tibble$cardno <- substr(inndat.tibble$value,6,7)
head(inndat.tibble, n = 23)
## # A tibble: 23 x 3
##    value                                                            id    cardno
##    <chr>                                                            <chr> <chr> 
##  1 000010100150052307201921      100001000000000000000002300611052~ 00001 01    
##  2 0000102022002  0102030405                      12111           1 00001 02    
##  3 0000103 2                                                        00001 03    
##  4 000010401222           049039020017015                           00001 04    
##  5 000010519701980199920022004                                      00001 05    
##  6 00001060301090605                      16555                     00001 06    
##  7 000010708                                                        00001 07    
##  8 000010811111           22222                                     00001 08    
##  9 0000109                                  777                     00001 09    
## 10 0000110                22222           44444                     00001 10    
## # ... with 13 more rows
table(inndat.tibble$cardno)
## 
##   01   02   03   04   05   06   07   08   09   10   11   12   13   14   15   16 
## 2579 2579 2579 2579 2579 2579 2579 2579 2579 2579 2579 2579 2579 2579 2579 2579 
##   17   18   19   20   21   22   23 
## 2579 2579 2579 5791 2579 2579 2579
card20 <- inndat.tibble[which(inndat.tibble$cardno == 20),]
card20.dup <- 
  card20 %>% 
  arrange(value, id) %>% 
  group_by(id) %>% 
  mutate(dup = row_number())
head(card20.dup[order(card20.dup$dup, decreasing = TRUE),] , 100)
## # A tibble: 100 x 4
## # Groups:   id [58]
##    value              id    cardno   dup
##    <chr>              <chr> <chr>  <int>
##  1 002022001000361611 00202 20        12
##  2 019442001002261611 01944 20        12
##  3 002022001000359591 00202 20        11
##  4 019442001002258581 01944 20        11
##  5 002022001000351511 00202 20        10
##  6 019442001001772721 01944 20        10
##  7 022292001002366661 02229 20        10
##  8 002022001000347471 00202 20         9
##  9 003902001002170761 00390 20         9
## 10 006282001001770761 00628 20         9
## # ... with 90 more rows