library(RPostgres)
library(tidyverse)
## ── Attaching packages ────────────────── tidyverse 1.3.0.9000 ──
## ✓ ggplot2 3.2.1            ✓ purrr   0.3.3.9000  
## ✓ tibble  2.99.99.9014     ✓ dplyr   0.8.4       
## ✓ tidyr   1.0.2            ✓ stringr 1.4.0       
## ✓ readr   1.3.1.9000       ✓ forcats 0.4.0
## ── Conflicts ────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dm)
## 
## Attaching package: 'dm'
## The following object is masked from 'package:stats':
## 
##     filter
user <- keyring::key_get("aact-db-user")
password <- keyring::key_get("aact-db-password", user = user)

con <- dbConnect(
  Postgres(),
  dbname="aact",
  host="aact-db.ctti-clinicaltrials.org",
  port=5432, user = user, password = password
)

dm <- dm::dm_from_src(con, schema = "ctgov")

dm %>%
  dm_get_tables() %>%
  walk(~ print(enum_pk_candidates(.x)))
## # A tibble: 7 x 3
##   columns        candidate why                                                  
##   <keys>         <lgl>     <chr>                                                
## 1 id             TRUE      ""                                                   
## 2 count          FALSE     "has duplicate values: 0, 1, 2, 3, 4, … (>= 7 total)"
## 3 ctgov_group_c… FALSE     "has duplicate values: B1, B10, B11, B12, B13, … (>=…
## 4 nct_id         FALSE     "has duplicate values: NCT00000125, NCT00000134, NCT…
## 5 result_group_… FALSE     "has duplicate values: 3191786, 3191787, 3191788, 31…
## 6 scope          FALSE     "has duplicate values: Overall"                      
## 7 units          FALSE     "has duplicate values: Basal cell carcinoma lesions,…
## # A tibble: 18 x 3
##    columns           candidate why                                              
##    <keys>            <lgl>     <chr>                                            
##  1 id                TRUE      ""                                               
##  2 category          FALSE     "has duplicate values: , 0, 0-1, 0, 1, 0-<10 ng/…
##  3 classification    FALSE     "has duplicate values: , -/-, -/+, +/-, +/+, … (…
##  4 ctgov_group_code  FALSE     "has duplicate values: B1, B10, B11, B12, B13, ……
##  5 description       FALSE     "has duplicate values: , 0-100 scale 0 = maximum…
##  6 dispersion_lower… FALSE     "has duplicate values: -1780.0, -30.9, -30.0, -4…
##  7 dispersion_type   FALSE     "has duplicate values: , Full Range, Inter-Quart…
##  8 dispersion_upper… FALSE     "has duplicate values: -6.40, -1.30, -0.70, -0.6…
##  9 dispersion_value  FALSE     "has duplicate values: 0, 0.0, 0.00, 0.000, 0.00…
## 10 dispersion_value… FALSE     "has duplicate values: 0e+00, 1e-04, 2e-04, 3e-0…
## 11 explanation_of_na FALSE     "has duplicate values: , 1 participant analyzed,…
## 12 nct_id            FALSE     "has duplicate values: NCT00000125, NCT00000134,…
## 13 param_type        FALSE     "has duplicate values: , Count of Participants, …
## 14 param_value       FALSE     "has duplicate values: 0, 00, 0.0, -0.00, 0.00, …
## 15 param_value_num   FALSE     "has duplicate values: -23.5, -15.5, -13.4, -13.…
## 16 result_group_id   FALSE     "has duplicate values: 3185379, 3185380, 3185381…
## 17 title             FALSE     "has duplicate values: 0-180 min Post Prandial G…
## 18 units             FALSE     "has duplicate values: , °, %, ℃, 0-10 units on …
## # A tibble: 3 x 3
##   columns    candidate why                                                      
##   <keys>     <lgl>     <chr>                                                    
## 1 id         TRUE      ""                                                       
## 2 nct_id     TRUE      ""                                                       
## 3 descripti… FALSE     "has duplicate values: \n      1. Investigate degree of …
## # A tibble: 4 x 3
##   columns         candidate why                                                 
##   <keys>          <lgl>     <chr>                                               
## 1 id              TRUE      ""                                                  
## 2 downcase_mesh_… FALSE     "has duplicate values: 22q11 deletion syndrome, 46,…
## 3 mesh_term       FALSE     "has duplicate values: 22q11 Deletion Syndrome, 46,…
## 4 nct_id          FALSE     "has duplicate values: NCT00000102, NCT00000104, NC…
## # A tibble: 4 x 3
##   columns         candidate why                                                 
##   <keys>          <lgl>     <chr>                                               
## 1 id              TRUE      ""                                                  
## 2 downcase_mesh_… FALSE     "has duplicate values: 1018 oligonucleotide, 10-((4…
## 3 mesh_term       FALSE     "has duplicate values: 1018 oligonucleotide, 10-((4…
## 4 nct_id          FALSE     "has duplicate values: NCT00000114, NCT00000117, NC…
## # A tibble: 19 x 3
##    columns                   candidate why                                      
##    <keys>                    <lgl>     <chr>                                    
##  1 id                        TRUE      ""                                       
##  2 nct_id                    TRUE      ""                                       
##  3 actual_duration           FALSE     "has duplicate values: 0, 1, 2, 3, 4, … …
##  4 has_single_facility       FALSE     "has duplicate values: FALSE, TRUE"      
##  5 has_us_facility           FALSE     "has duplicate values: FALSE, TRUE, NA"  
##  6 maximum_age_num           FALSE     "has duplicate values: 1, 2, 3, 4, 5, … …
##  7 maximum_age_unit          FALSE     "has duplicate values: Day, Days, Hour, …
##  8 minimum_age_num           FALSE     "has duplicate values: 1, 2, 3, 4, 5, … …
##  9 minimum_age_unit          FALSE     "has duplicate values: Days, Month, Mont…
## 10 months_to_report_results  FALSE     "has duplicate values: -49, -46, -41, -3…
## 11 nlm_download_date         FALSE     "has duplicate values: 2020-01-31"       
## 12 number_of_facilities      FALSE     "has duplicate values: 1, 2, 3, 4, 5, … …
## 13 number_of_nsae_subjects   FALSE     "has duplicate values: 0, 2, 3, 4, 5, … …
## 14 number_of_other_outcomes… FALSE     "has duplicate values: 1, 2, 3, 4, 5, … …
## 15 number_of_primary_outcom… FALSE     "has duplicate values: 1, 2, 3, 4, 5, … …
## 16 number_of_sae_subjects    FALSE     "has duplicate values: 0, 1, 2, 3, 4, … …
## 17 number_of_secondary_outc… FALSE     "has duplicate values: 1, 2, 3, 4, 5, … …
## 18 registered_in_calendar_y… FALSE     "has duplicate values: 1999, 2000, 2001,…
## 19 were_results_reported     FALSE     "has duplicate values: FALSE, TRUE"      
## # A tibble: 6 x 3
##   columns     candidate why                                                     
##   <keys>      <lgl>     <chr>                                                   
## 1 id          TRUE      ""                                                      
## 2 contact_ty… FALSE     "has duplicate values: backup, primary"                 
## 3 email       FALSE     "has duplicate values: 007378@ntuh.gov.tw, 019466@ntuh.…
## 4 name        FALSE     "has duplicate values: 01 Studienregister MasterAdmins,…
## 5 nct_id      FALSE     "has duplicate values: NCT00001159, NCT00001160, NCT000…
## 6 phone       FALSE     "has duplicate values: 001 203-304-2499, +001 408 515-0…
## # A tibble: 4 x 3
##   columns      candidate why                                                    
##   <keys>       <lgl>     <chr>                                                  
## 1 id           TRUE      ""                                                     
## 2 downcase_na… FALSE     "has duplicate values: 13-valent pneumococcal vaccine,…
## 3 name         FALSE     "has duplicate values: 13-valent Pneumococcal Vaccine,…
## 4 nct_id       FALSE     "has duplicate values: NCT00000108, NCT00000112, NCT00…
## # A tibble: 4 x 3
##   columns candidate why                                                         
##   <keys>  <lgl>     <chr>                                                       
## 1 id      TRUE      ""                                                          
## 2 name    FALSE     "has duplicate values: Afghanistan, Albania, Algeria, Ameri…
## 3 nct_id  FALSE     "has duplicate values: NCT00000173, NCT00000414, NCT0000057…
## 4 removed FALSE     "has duplicate values: TRUE, NA"                            
## # A tibble: 4 x 3
##   columns       candidate why                                                   
##   <keys>        <lgl>     <chr>                                                 
## 1 id            TRUE      ""                                                    
## 2 design_group… FALSE     "has duplicate values: 1914249, 1914270, 1914277, 191…
## 3 intervention… FALSE     "has duplicate values: 1879963, 1879964, 1879973, 187…
## 4 nct_id        FALSE     "has duplicate values: NCT00000105, NCT00000113, NCT0…
## # A tibble: 5 x 3
##   columns    candidate why                                                      
##   <keys>     <lgl>     <chr>                                                    
## 1 id         TRUE      ""                                                       
## 2 descripti… FALSE     "has duplicate values: 0, 0.00 mcg/kg/s The day order wi…
## 3 group_type FALSE     "has duplicate values: Active Comparator, Case, Control,…
## 4 nct_id     FALSE     "has duplicate values: NCT00000105, NCT00000113, NCT0000…
## 5 title      FALSE     "has duplicate values: 0, 0°, 0.004% single-dose, 0.005%…
## # A tibble: 7 x 3
##   columns     candidate why                                                     
##   <keys>      <lgl>     <chr>                                                   
## 1 id          TRUE      ""                                                      
## 2 description FALSE     "has duplicate values: 0, 0.5, 1, 2, 4, 6, 8, 12, 24, 4…
## 3 measure     FALSE     "has duplicate values: 0.3% reduction of HbA1c in the p…
## 4 nct_id      FALSE     "has duplicate values: NCT00000105, NCT00000113, NCT000…
## 5 outcome_ty… FALSE     "has duplicate values: other, primary, secondary"       
## 6 population  FALSE     "has duplicate values: NA"                              
## 7 time_frame  FALSE     "has duplicate values: 0, 0:00, 0:30, 1:00, 1:30, 2:00,…
## # A tibble: 14 x 3
##    columns               candidate why                                          
##    <keys>                <lgl>     <chr>                                        
##  1 id                    TRUE      ""                                           
##  2 nct_id                TRUE      ""                                           
##  3 allocation            FALSE     "has duplicate values: Non-Randomized, Rando…
##  4 caregiver_masked      FALSE     "has duplicate values: TRUE, NA"             
##  5 intervention_model    FALSE     "has duplicate values: Crossover Assignment,…
##  6 intervention_model_d… FALSE     "has duplicate values: 1:1, 1:1 parallel gro…
##  7 investigator_masked   FALSE     "has duplicate values: TRUE, NA"             
##  8 masking               FALSE     "has duplicate values: Double, None (Open La…
##  9 masking_description   FALSE     "has duplicate values: 3 of the 4 conditions…
## 10 observational_model   FALSE     "has duplicate values: Case Control, Case-Co…
## 11 outcomes_assessor_ma… FALSE     "has duplicate values: TRUE, NA"             
## 12 primary_purpose       FALSE     "has duplicate values: Basic Science, Device…
## 13 subject_masked        FALSE     "has duplicate values: TRUE, NA"             
## 14 time_perspective      FALSE     "has duplicate values: Cross-Sectional, Cros…
## # A tibble: 3 x 3
##   columns    candidate why                                                      
##   <keys>     <lgl>     <chr>                                                    
## 1 id         TRUE      ""                                                       
## 2 nct_id     TRUE      ""                                                       
## 3 descripti… FALSE     "has duplicate values: \n      1. All female staff that …
## # A tibble: 6 x 3
##   columns      candidate why                                                    
##   <keys>       <lgl>     <chr>                                                  
## 1 id           TRUE      ""                                                     
## 2 comment      FALSE     "has duplicate values: Clinical Trials at LEO Pharma, …
## 3 document_id  FALSE     "has duplicate values: 0566-16-RMB, 100310, 100351, 10…
## 4 document_ty… FALSE     "has duplicate values: Abstract, Analytic Code, Annota…
## 5 nct_id       FALSE     "has duplicate values: NCT00000474, NCT00000476, NCT00…
## 6 url          FALSE     "has duplicate values: http://accessmedicine.mhmedical…
## # A tibble: 7 x 3
##   columns        candidate why                                                  
##   <keys>         <lgl>     <chr>                                                
## 1 id             TRUE      ""                                                   
## 2 count          FALSE     "has duplicate values: 0, 1, 2, 3, 4, … (>= 7 total)"
## 3 ctgov_group_c… FALSE     "has duplicate values: P1, P10, P11, P12, P13, … (>=…
## 4 nct_id         FALSE     "has duplicate values: NCT00000135, NCT00000378, NCT…
## 5 period         FALSE     "has duplicate values: 001 (Pioglitazone) -3 Months,…
## 6 reason         FALSE     "has duplicate values: [11C]MK-4232 synthesis failur…
## 7 result_group_… FALSE     "has duplicate values: 3185495, 3185496, 3185776, 31…
## # A tibble: 11 x 3
##    columns         candidate why                                                
##    <keys>          <lgl>     <chr>                                              
##  1 id              TRUE      ""                                                 
##  2 nct_id          TRUE      ""                                                 
##  3 criteria        FALSE     "has duplicate values: , \n        165 SLE patient…
##  4 gender          FALSE     "has duplicate values: , All, Female, Male"        
##  5 gender_based    FALSE     "has duplicate values: TRUE, NA"                   
##  6 gender_descrip… FALSE     "has duplicate values: , 10 men and 10 women, 18+,…
##  7 healthy_volunt… FALSE     "has duplicate values: , Accepts Healthy Volunteer…
##  8 maximum_age     FALSE     "has duplicate values: , 100 Days, 100 Years, 101 …
##  9 minimum_age     FALSE     "has duplicate values: , 108 Months, 10 Months, 10…
## 10 population      FALSE     "has duplicate values: , 100, 1000, 100 healthy vo…
## 11 sampling_method FALSE     "has duplicate values: , Non-Probability Sample, P…
## # A tibble: 8 x 3
##   columns candidate why                                                         
##   <keys>  <lgl>     <chr>                                                       
## 1 id      TRUE      ""                                                          
## 2 city    FALSE     "has duplicate values: , 11 Dmitriya Ulyanova str., Moscow,…
## 3 country FALSE     "has duplicate values: , Afghanistan, Albania, Algeria, Ame…
## 4 name    FALSE     "has duplicate values: 001, 001, Novartis Investigational S…
## 5 nct_id  FALSE     "has duplicate values: NCT00000113, NCT00000117, NCT0000012…
## 6 state   FALSE     "has duplicate values: , 3201, 7, Chung-Shan South Road, 82…
## 7 status  FALSE     "has duplicate values: , Active, not recruiting, Available,…
## 8 zip     FALSE     "has duplicate values: , - - - - -, 0, 00, 000, … (>= 7 tot…
## # A tibble: 7 x 3
##   columns     candidate why                                                     
##   <keys>      <lgl>     <chr>                                                   
## 1 id          TRUE      ""                                                      
## 2 contact_ty… FALSE     "has duplicate values: backup, primary"                 
## 3 email       FALSE     "has duplicate values: 007378@ntuh.gov.tw, 0124dain@gma…
## 4 facility_id FALSE     "has duplicate values: 9282571, 9282575, 9282586, 92826…
## 5 name        FALSE     "has duplicate values: 7 & 8 Biopharma, Aaliya Merali-D…
## 6 nct_id      FALSE     "has duplicate values: NCT00000169, NCT00001281, NCT000…
## 7 phone       FALSE     "has duplicate values: 0, 00033381218543, 0010109244050…
## # A tibble: 5 x 3
##   columns    candidate why                                                      
##   <keys>     <lgl>     <chr>                                                    
## 1 id         TRUE      ""                                                       
## 2 facility_… FALSE     "has duplicate values: 9282571, 9282576, 9282587, 928260…
## 3 name       FALSE     "has duplicate values: a a daw, md, Aadel Chaudhuri, M.D…
## 4 nct_id     FALSE     "has duplicate values: NCT00006205, NCT00009633, NCT0003…
## 5 role       FALSE     "has duplicate values: Principal Investigator, Study Cha…
## # A tibble: 4 x 3
##   columns  candidate why                                                        
##   <keys>   <lgl>     <chr>                                                      
## 1 id       TRUE      ""                                                         
## 2 id_type  FALSE     "has duplicate values: nct_alias, org_study_id, secondary_…
## 3 id_value FALSE     "has duplicate values: 0, 000, 0000, 00000, 000000, … (>= …
## 4 nct_id   FALSE     "has duplicate values: NCT00000102, NCT00000104, NCT000001…
## # A tibble: 4 x 3
##   columns       candidate why                                                   
##   <keys>        <lgl>     <chr>                                                 
## 1 id            TRUE      ""                                                    
## 2 intervention… FALSE     "has duplicate values: 1879962, 1880099, 1880103, 188…
## 3 name          FALSE     "has duplicate values: 0, 00884154, 0.1% Octenidine M…
## 4 nct_id        FALSE     "has duplicate values: NCT00000105, NCT00000134, NCT0…
## # A tibble: 5 x 3
##   columns        candidate why                                                  
##   <keys>         <lgl>     <chr>                                                
## 1 id             TRUE      ""                                                   
## 2 description    FALSE     "has duplicate values: 0, 0.0143 mg/kg psilocybin ca…
## 3 intervention_… FALSE     "has duplicate values: Behavioral, Biological, Combi…
## 4 name           FALSE     "has duplicate values: 000-0551 Lotion, 0.0015% tafl…
## 5 nct_id         FALSE     "has duplicate values: NCT00000105, NCT00000110, NCT…
## # A tibble: 3 x 3
##   columns candidate why                                                         
##   <keys>  <lgl>     <chr>                                                       
## 1 id      TRUE      ""                                                          
## 2 name    FALSE     "has duplicate values: Analytic Code, Clinical Study Report…
## 3 nct_id  FALSE     "has duplicate values: NCT00004285, NCT00005485, NCT0002181…
## # A tibble: 4 x 3
##   columns      candidate why                                                    
##   <keys>       <lgl>     <chr>                                                  
## 1 id           TRUE      ""                                                     
## 2 downcase_na… FALSE     "has duplicate values: 0145, 0382, 061, 100mm pain vas…
## 3 name         FALSE     "has duplicate values: 0145, 0382, 061, 100mm pain VAS…
## 4 nct_id       FALSE     "has duplicate values: NCT00000110, NCT00000111, NCT00…
## # A tibble: 4 x 3
##   columns    candidate why                                                      
##   <keys>     <lgl>     <chr>                                                    
## 1 id         TRUE      ""                                                       
## 2 descripti… FALSE     "has duplicate values: 12th international symposium on M…
## 3 nct_id     FALSE     "has duplicate values: NCT00000114, NCT00000118, NCT0000…
## 4 url        FALSE     "has duplicate values: http://abclocal.go.com/wabc/story…
## # A tibble: 4 x 3
##   columns    candidate why                                                      
##   <keys>     <lgl>     <chr>                                                    
## 1 id         TRUE      ""                                                       
## 2 heading    FALSE     "has duplicate values: , Amino Acids, Peptides, and Prot…
## 3 qualifier  FALSE     "has duplicate values: , A02, A03, A08, A10, … (>= 7 tot…
## 4 subcatego… FALSE     "has duplicate values: , 2017 MeSH Headings by Subcatego…
## # A tibble: 6 x 3
##   columns         candidate why                                                 
##   <keys>          <lgl>     <chr>                                               
## 1 id              TRUE      ""                                                  
## 2 tree_number     TRUE      ""                                                  
## 3 description     FALSE     "has duplicate values: NA"                          
## 4 downcase_mesh_… FALSE     "has duplicate values: 11-beta-hydroxysteroid dehyd…
## 5 mesh_term       FALSE     "has duplicate values: 11-beta-Hydroxysteroid Dehyd…
## 6 qualifier       FALSE     "has duplicate values: A01, A02, A03, A04, A05, … (…
## # A tibble: 8 x 3
##   columns        candidate why                                                  
##   <keys>         <lgl>     <chr>                                                
## 1 id             TRUE      ""                                                   
## 2 count          FALSE     "has duplicate values: 0, 1, 2, 3, 4, … (>= 7 total)"
## 3 ctgov_group_c… FALSE     "has duplicate values: P1, P10, P11, P12, P13, … (>=…
## 4 description    FALSE     "has duplicate values: , 02 October 2007, 04/15/2009…
## 5 nct_id         FALSE     "has duplicate values: NCT00000125, NCT00000134, NCT…
## 6 period         FALSE     "has duplicate values: 001 (Pioglitazone) -3 Months,…
## 7 result_group_… FALSE     "has duplicate values: 3185383, 3185384, 3185385, 31…
## 8 title          FALSE     "has duplicate values: 100 µL Dose, 100%mg, 100 mg B…
## # A tibble: 22 x 3
##    columns            candidate why                                             
##    <keys>             <lgl>     <chr>                                           
##  1 id                 TRUE      ""                                              
##  2 ci_lower_limit     FALSE     "has duplicate values: -1258.79, -1000.00, -927…
##  3 ci_n_sides         FALSE     "has duplicate values: , 1-Sided, 2-Sided"      
##  4 ci_percent         FALSE     "has duplicate values: 0.00, 0.05, 0.95, 2.00, …
##  5 ci_upper_limit     FALSE     "has duplicate values: -104.30, -90.98, -89.90,…
##  6 ci_upper_limit_na… FALSE     "has duplicate values: , infinity, Not Appropri…
##  7 dispersion_type    FALSE     "has duplicate values: , Standard Deviation, St…
##  8 dispersion_value   FALSE     "has duplicate values: 0.0000, 0.0010, 0.0020, …
##  9 estimate_descript… FALSE     "has duplicate values: , 0.018% relative to veh…
## 10 groups_description FALSE     "has duplicate values: , 0.125 mg t.i.d. IR is …
## # … with 12 more rows
## # A tibble: 5 x 3
##   columns         candidate why                                                 
##   <keys>          <lgl>     <chr>                                               
## 1 id              TRUE      ""                                                  
## 2 ctgov_group_co… FALSE     "has duplicate values: O1, O10, O11, O12, O13, … (>…
## 3 nct_id          FALSE     "has duplicate values: NCT00000378, NCT00000392, NC…
## 4 outcome_analys… FALSE     "has duplicate values: 536160, 536161, 536162, 5361…
## 5 result_group_id FALSE     "has duplicate values: 3185738, 3185746, 3185750, 3…
## # A tibble: 8 x 3
##   columns        candidate why                                                  
##   <keys>         <lgl>     <chr>                                                
## 1 id             TRUE      ""                                                   
## 2 count          FALSE     "has duplicate values: 0, 1, 2, 3, 4, … (>= 7 total)"
## 3 ctgov_group_c… FALSE     "has duplicate values: O1, O10, O11, O12, O13, … (>=…
## 4 nct_id         FALSE     "has duplicate values: NCT00000125, NCT00000134, NCT…
## 5 outcome_id     FALSE     "has duplicate values: 961339, 961340, 961341, 96134…
## 6 result_group_… FALSE     "has duplicate values: 3186226, 3186227, 3186239, 31…
## 7 scope          FALSE     "has duplicate values: Measure"                      
## 8 units          FALSE     "has duplicate values: 3-week periods, Abosorbtion c…
## # A tibble: 19 x 3
##    columns           candidate why                                              
##    <keys>            <lgl>     <chr>                                            
##  1 id                TRUE      ""                                               
##  2 category          FALSE     "has duplicate values: , 0, 0; <1 cell, 01. Miss…
##  3 classification    FALSE     "has duplicate values: , -, +, ++, +++, … (>= 7 …
##  4 ctgov_group_code  FALSE     "has duplicate values: O1, O10, O11, O12, O13, ……
##  5 description       FALSE     "has duplicate values: , 0-100%. 0= no disabilit…
##  6 dispersion_lower… FALSE     "has duplicate values: -29433.03, -1769.00, -174…
##  7 dispersion_type   FALSE     "has duplicate values: , 100% Confidence Interva…
##  8 dispersion_upper… FALSE     "has duplicate values: -680, -467, -249, -155, -…
##  9 dispersion_value  FALSE     "has duplicate values: 0, 00, .00, 0.0, 0.00, … …
## 10 dispersion_value… FALSE     "has duplicate values: -323.00, -125.00, -90.00,…
## 11 explanation_of_na FALSE     "has duplicate values: , 0 patient with hypergly…
## 12 nct_id            FALSE     "has duplicate values: NCT00000125, NCT00000134,…
## 13 outcome_id        FALSE     "has duplicate values: 961339, 961340, 961341, 9…
## 14 param_type        FALSE     "has duplicate values: , Count of Participants, …
## 15 param_value       FALSE     "has duplicate values: 0, -0, .0, 00, -0.0, … (>…
## 16 param_value_num   FALSE     "has duplicate values: -362000000, -229000000, -…
## 17 result_group_id   FALSE     "has duplicate values: 3185397, 3185398, 3185399…
## 18 title             FALSE     "has duplicate values: 0-10 Ordinal Pain Score, …
## 19 units             FALSE     "has duplicate values: , °, (%), $, %, … (>= 7 t…
## # A tibble: 13 x 3
##    columns               candidate why                                          
##    <keys>                <lgl>     <chr>                                        
##  1 id                    TRUE      ""                                           
##  2 anticipated_posting_… FALSE     "has duplicate values: 2008-11-30, 2009-08-3…
##  3 anticipated_posting_… FALSE     "has duplicate values: , 01/2016, 01/2019, 0…
##  4 description           FALSE     "has duplicate values: , 0-100 numerical rat…
##  5 dispersion_type       FALSE     "has duplicate values: , 60% Confidence Inte…
##  6 nct_id                FALSE     "has duplicate values: NCT00000392, NCT00000…
##  7 outcome_type          FALSE     "has duplicate values: Other Pre-specified, …
##  8 param_type            FALSE     "has duplicate values: , Count of Participan…
##  9 population            FALSE     "has duplicate values: , 01-9004 included pa…
## 10 time_frame            FALSE     "has duplicate values: , 0:00, 0:30, 1:00, 1…
## 11 title                 FALSE     "has duplicate values: 100-Day Mortality Aft…
## 12 units                 FALSE     "has duplicate values: , (%), $, %, ℃, … (>=…
## 13 units_analyzed        FALSE     "has duplicate values: , 3-week periods, Act…
## # A tibble: 5 x 3
##   columns    candidate why                                                      
##   <keys>     <lgl>     <chr>                                                    
## 1 id         TRUE      ""                                                       
## 2 affiliati… FALSE     "has duplicate values: 001 844 599 2273 (UCB), 00 331 49…
## 3 name       FALSE     "has duplicate values: . ., ., 01 Studienregister Master…
## 4 nct_id     FALSE     "has duplicate values: NCT00000113, NCT00000361, NCT0000…
## 5 role       FALSE     "has duplicate values: Principal Investigator, Study Cha…
## # A tibble: 4 x 3
##   columns           candidate why                                               
##   <keys>            <lgl>     <chr>                                             
## 1 id                TRUE      ""                                                
## 2 nct_id            TRUE      ""                                                
## 3 pre_assignment_d… FALSE     "has duplicate values: , 113 participants were sc…
## 4 recruitment_deta… FALSE     "has duplicate values: , 20 dry eye subjects, 20 …
## # A tibble: 5 x 3
##   columns           candidate why                                               
##   <keys>            <lgl>     <chr>                                             
## 1 id                TRUE      ""                                                
## 2 event             FALSE     "has duplicate values: returned, submission_cance…
## 3 event_date        FALSE     "has duplicate values: 2008-12-08, 2009-01-06, 20…
## 4 event_date_descr… FALSE     "has duplicate values: April 10, 2012, April 10, …
## 5 nct_id            FALSE     "has duplicate values: NCT00000375, NCT00000387, …
## # A tibble: 8 x 3
##   columns      candidate why                                                    
##   <keys>       <lgl>     <chr>                                                  
## 1 id           TRUE      ""                                                     
## 2 url          TRUE      ""                                                     
## 3 document_da… FALSE     "has duplicate values: 2004-05-23, 2009-04-28, 2009-10…
## 4 document_ty… FALSE     "has duplicate values: Informed Consent Form, Informed…
## 5 has_icf      FALSE     "has duplicate values: FALSE, TRUE"                    
## 6 has_protocol FALSE     "has duplicate values: FALSE, TRUE"                    
## 7 has_sap      FALSE     "has duplicate values: FALSE, TRUE"                    
## 8 nct_id       FALSE     "has duplicate values: NCT00001305, NCT00004635, NCT00…
## # A tibble: 17 x 3
##    columns         candidate why                                                
##    <keys>          <lgl>     <chr>                                              
##  1 id              TRUE      ""                                                 
##  2 adverse_event_… FALSE     "has duplicate values: 0, 0.5CM SKIN ULCER, 1, 101…
##  3 assessment      FALSE     "has duplicate values: , Non-systematic Assessment…
##  4 ctgov_group_co… FALSE     "has duplicate values: E1, E10, E11, E12, E13, … (…
##  5 default_assess… FALSE     "has duplicate values: , Non-systematic Assessment…
##  6 default_vocab   FALSE     "has duplicate values: , 0, 10.0, 10002218, 100053…
##  7 description     FALSE     "has duplicate values: , 0, 01-9004 included parti…
##  8 event_count     FALSE     "has duplicate values: 0, 1, 2, 3, 4, … (>= 7 tota…
##  9 event_type      FALSE     "has duplicate values: other, serious"             
## 10 frequency_thre… FALSE     "has duplicate values: 0, 1, 2, 3, 4, … (>= 7 tota…
## 11 nct_id          FALSE     "has duplicate values: NCT00000125, NCT00000134, N…
## 12 organ_system    FALSE     "has duplicate values: Blood and lymphatic system …
## 13 result_group_id FALSE     "has duplicate values: 3185389, 3185390, 3185391, …
## 14 subjects_affec… FALSE     "has duplicate values: 0, 1, 2, 3, 4, … (>= 7 tota…
## 15 subjects_at_ri… FALSE     "has duplicate values: 0, 1, 2, 3, 4, … (>= 7 tota…
## 16 time_frame      FALSE     "has duplicate values: , 0, 0-120 minutes after in…
## 17 vocab           FALSE     "has duplicate values: 1, 14.0, 14.1, 15.0, 16.0, …
## # A tibble: 7 x 3
##   columns           candidate why                                               
##   <keys>            <lgl>     <chr>                                             
## 1 id                TRUE      ""                                                
## 2 nct_id            TRUE      ""                                                
## 3 affiliation       FALSE     "has duplicate values: 303rd Hospital of the Peop…
## 4 name              FALSE     "has duplicate values: Aadia Rana, Aarne Feldheis…
## 5 organization      FALSE     "has duplicate values: 3rd Department of Surgery,…
## 6 responsible_part… FALSE     "has duplicate values: Principal Investigator, Sp…
## 7 title             FALSE     "has duplicate values: 973 project management dep…
## # A tibble: 4 x 3
##   columns    candidate why                                                      
##   <keys>     <lgl>     <chr>                                                    
## 1 id         TRUE      ""                                                       
## 2 nct_id     TRUE      ""                                                       
## 3 agreement  FALSE     "has duplicate values: 3. Other disclosure agreement tha…
## 4 pi_employ… FALSE     "has duplicate values: All Principal Investigators ARE e…
## # A tibble: 6 x 3
##   columns     candidate why                                                     
##   <keys>      <lgl>     <chr>                                                   
## 1 id          TRUE      ""                                                      
## 2 nct_id      TRUE      ""                                                      
## 3 email       FALSE     "has duplicate values: aabbate@mcvh-vcu.edu, aabbate@vc…
## 4 name        FALSE     "has duplicate values: 1-866-503-6351, Aaron Carroll, A…
## 5 organizati… FALSE     "has duplicate values: 3M, Aalborg University Hospital,…
## 6 phone       FALSE     "has duplicate values: 001-800-718-1021, 003225354219, …
## # A tibble: 6 x 3
##   columns        candidate why                                                  
##   <keys>         <lgl>     <chr>                                                
## 1 id             TRUE      ""                                                   
## 2 ctgov_group_c… FALSE     "has duplicate values: B1, B10, B11, B12, B13, … (>=…
## 3 description    FALSE     "has duplicate values: , 0.001% ASC-J9 cream applied…
## 4 nct_id         FALSE     "has duplicate values: NCT00000125, NCT00000134, NCT…
## 5 result_type    FALSE     "has duplicate values: Baseline, Outcome, Participan…
## 6 title          FALSE     "has duplicate values: , 0.0002% SJP-0035 Ophthalmic…
## # A tibble: 5 x 3
##   columns          candidate why                                                
##   <keys>           <lgl>     <chr>                                              
## 1 id               TRUE      ""                                                 
## 2 agency_class     FALSE     "has duplicate values: Industry, NIH, Other, U.S. …
## 3 lead_or_collabo… FALSE     "has duplicate values: collaborator, lead"         
## 4 name             FALSE     "has duplicate values: 113th Hospital of Chinese P…
## 5 nct_id           FALSE     "has duplicate values: NCT00000104, NCT00000113, N…
## # A tibble: 64 x 3
##    columns          candidate why                                               
##    <keys>           <lgl>     <chr>                                             
##  1 created_at       TRUE      ""                                                
##  2 nct_id           TRUE      ""                                                
##  3 updated_at       TRUE      ""                                                
##  4 acronym          FALSE     "has duplicate values: 2, 2GETHER, 2-HOBA, 3C, 3D…
##  5 baseline_popula… FALSE     "has duplicate values: , All eligible enrolled pa…
##  6 biospec_descrip… FALSE     "has duplicate values: , \n      10cc of peripher…
##  7 biospec_retenti… FALSE     "has duplicate values: None Retained, Samples Wit…
##  8 brief_title      FALSE     "has duplicate values: 12 Week Comparison of 5 Mc…
##  9 completion_date  FALSE     "has duplicate values: 1985-03-31, 1987-12-31, 19…
## 10 completion_date… FALSE     "has duplicate values: Actual, Anticipated, NA"   
## # … with 54 more rows
## # A tibble: 5 x 3
##   columns      candidate why                                                    
##   <keys>       <lgl>     <chr>                                                  
## 1 id           TRUE      ""                                                     
## 2 citation     FALSE     "has duplicate values: 1000 Genomes Project Consortium…
## 3 nct_id       FALSE     "has duplicate values: NCT00000113, NCT00000114, NCT00…
## 4 pmid         FALSE     "has duplicate values: 1, 100122, 1002163, 10022110, 1…
## 5 reference_t… FALSE     "has duplicate values: reference, results_reference"
dm %>%
  dm_get_tables() %>%
  map(colnames) %>%
  map(~ grep("^id$", .x, value = TRUE)) %>%
  enframe() %>%
  unnest(value) %>%
  mutate(text = paste0("  dm_add_pk(", name, ", ", value, ") %>%")) %>%
  pull() %>%
  cli::cat_line()
##   dm_add_pk(baseline_counts, id) %>%
##   dm_add_pk(baseline_measurements, id) %>%
##   dm_add_pk(brief_summaries, id) %>%
##   dm_add_pk(browse_conditions, id) %>%
##   dm_add_pk(browse_interventions, id) %>%
##   dm_add_pk(calculated_values, id) %>%
##   dm_add_pk(central_contacts, id) %>%
##   dm_add_pk(conditions, id) %>%
##   dm_add_pk(countries, id) %>%
##   dm_add_pk(design_group_interventions, id) %>%
##   dm_add_pk(design_groups, id) %>%
##   dm_add_pk(design_outcomes, id) %>%
##   dm_add_pk(designs, id) %>%
##   dm_add_pk(detailed_descriptions, id) %>%
##   dm_add_pk(documents, id) %>%
##   dm_add_pk(drop_withdrawals, id) %>%
##   dm_add_pk(eligibilities, id) %>%
##   dm_add_pk(facilities, id) %>%
##   dm_add_pk(facility_contacts, id) %>%
##   dm_add_pk(facility_investigators, id) %>%
##   dm_add_pk(id_information, id) %>%
##   dm_add_pk(intervention_other_names, id) %>%
##   dm_add_pk(interventions, id) %>%
##   dm_add_pk(ipd_information_types, id) %>%
##   dm_add_pk(keywords, id) %>%
##   dm_add_pk(links, id) %>%
##   dm_add_pk(mesh_headings, id) %>%
##   dm_add_pk(mesh_terms, id) %>%
##   dm_add_pk(milestones, id) %>%
##   dm_add_pk(outcome_analyses, id) %>%
##   dm_add_pk(outcome_analysis_groups, id) %>%
##   dm_add_pk(outcome_counts, id) %>%
##   dm_add_pk(outcome_measurements, id) %>%
##   dm_add_pk(outcomes, id) %>%
##   dm_add_pk(overall_officials, id) %>%
##   dm_add_pk(participant_flows, id) %>%
##   dm_add_pk(pending_results, id) %>%
##   dm_add_pk(provided_documents, id) %>%
##   dm_add_pk(reported_events, id) %>%
##   dm_add_pk(responsible_parties, id) %>%
##   dm_add_pk(result_agreements, id) %>%
##   dm_add_pk(result_contacts, id) %>%
##   dm_add_pk(result_groups, id) %>%
##   dm_add_pk(sponsors, id) %>%
##   dm_add_pk(study_references, id) %>%
dm %>%
  dm_get_tables() %>%
  map(colnames) %>%
  map(~ grep("_id$", .x, value = TRUE)) %>%
  enframe() %>%
  unnest(value) %>%
  mutate(foreign_name = gsub("nct_id", "studie_id", value)) %>%
  mutate(foreign_name = gsub("_id", "s", foreign_name)) %>%
  mutate(text = paste0(
    "  dm_add_fk(", name, ", ", value, ", ", foreign_name, ") %>%"
  )) %>%
  pull() %>%
  cli::cat_line()
##   dm_add_fk(baseline_counts, nct_id, studies) %>%
##   dm_add_fk(baseline_counts, result_group_id, result_groups) %>%
##   dm_add_fk(baseline_measurements, nct_id, studies) %>%
##   dm_add_fk(baseline_measurements, result_group_id, result_groups) %>%
##   dm_add_fk(brief_summaries, nct_id, studies) %>%
##   dm_add_fk(browse_conditions, nct_id, studies) %>%
##   dm_add_fk(browse_interventions, nct_id, studies) %>%
##   dm_add_fk(calculated_values, nct_id, studies) %>%
##   dm_add_fk(central_contacts, nct_id, studies) %>%
##   dm_add_fk(conditions, nct_id, studies) %>%
##   dm_add_fk(countries, nct_id, studies) %>%
##   dm_add_fk(design_group_interventions, nct_id, studies) %>%
##   dm_add_fk(design_group_interventions, design_group_id, design_groups) %>%
##   dm_add_fk(design_group_interventions, intervention_id, interventions) %>%
##   dm_add_fk(design_groups, nct_id, studies) %>%
##   dm_add_fk(design_outcomes, nct_id, studies) %>%
##   dm_add_fk(designs, nct_id, studies) %>%
##   dm_add_fk(detailed_descriptions, nct_id, studies) %>%
##   dm_add_fk(documents, nct_id, studies) %>%
##   dm_add_fk(documents, document_id, documents) %>%
##   dm_add_fk(drop_withdrawals, nct_id, studies) %>%
##   dm_add_fk(drop_withdrawals, result_group_id, result_groups) %>%
##   dm_add_fk(eligibilities, nct_id, studies) %>%
##   dm_add_fk(facilities, nct_id, studies) %>%
##   dm_add_fk(facility_contacts, nct_id, studies) %>%
##   dm_add_fk(facility_contacts, facility_id, facilitys) %>%
##   dm_add_fk(facility_investigators, nct_id, studies) %>%
##   dm_add_fk(facility_investigators, facility_id, facilitys) %>%
##   dm_add_fk(id_information, nct_id, studies) %>%
##   dm_add_fk(intervention_other_names, nct_id, studies) %>%
##   dm_add_fk(intervention_other_names, intervention_id, interventions) %>%
##   dm_add_fk(interventions, nct_id, studies) %>%
##   dm_add_fk(ipd_information_types, nct_id, studies) %>%
##   dm_add_fk(keywords, nct_id, studies) %>%
##   dm_add_fk(links, nct_id, studies) %>%
##   dm_add_fk(milestones, nct_id, studies) %>%
##   dm_add_fk(milestones, result_group_id, result_groups) %>%
##   dm_add_fk(outcome_analyses, nct_id, studies) %>%
##   dm_add_fk(outcome_analyses, outcome_id, outcomes) %>%
##   dm_add_fk(outcome_analysis_groups, nct_id, studies) %>%
##   dm_add_fk(outcome_analysis_groups, outcome_analysis_id, outcome_analysiss) %>%
##   dm_add_fk(outcome_analysis_groups, result_group_id, result_groups) %>%
##   dm_add_fk(outcome_counts, nct_id, studies) %>%
##   dm_add_fk(outcome_counts, outcome_id, outcomes) %>%
##   dm_add_fk(outcome_counts, result_group_id, result_groups) %>%
##   dm_add_fk(outcome_measurements, nct_id, studies) %>%
##   dm_add_fk(outcome_measurements, outcome_id, outcomes) %>%
##   dm_add_fk(outcome_measurements, result_group_id, result_groups) %>%
##   dm_add_fk(outcomes, nct_id, studies) %>%
##   dm_add_fk(overall_officials, nct_id, studies) %>%
##   dm_add_fk(participant_flows, nct_id, studies) %>%
##   dm_add_fk(pending_results, nct_id, studies) %>%
##   dm_add_fk(provided_documents, nct_id, studies) %>%
##   dm_add_fk(reported_events, nct_id, studies) %>%
##   dm_add_fk(reported_events, result_group_id, result_groups) %>%
##   dm_add_fk(responsible_parties, nct_id, studies) %>%
##   dm_add_fk(result_agreements, nct_id, studies) %>%
##   dm_add_fk(result_contacts, nct_id, studies) %>%
##   dm_add_fk(result_groups, nct_id, studies) %>%
##   dm_add_fk(sponsors, nct_id, studies) %>%
##   dm_add_fk(studies, nct_id, studies) %>%
##   dm_add_fk(study_references, nct_id, studies) %>%