# connection string
ch <- odbcConnect("DMU_CMK", uid = "TRACKINGDATAREADER", pwd = "p4_PISk=+MW+Z2~%u_el20zkt")
#################Pull BD Tier 4
BDdata <-sqlQuery(ch, paste("SELECT [State_CD], [County_cd], [Birth_def_CD], [Indicator_num], [Measure_Num],
[Measure_Type], [RPT_YR], [ROLLING_YEAR_COUNT],[INFANT_GNDR_CD],
[Min_rpt_yr], [Max_rpt_yr], [Cal_Level], [Age_band_cd], [Raceethnicity_cd], [PS_Flag],
[STABILITY], [PREVALENCE], [EVENTS_AMT], [Is_Smoothed],[no_data_id],
[TOTAL_LIVE_BIRTHS_AMT], [TOTAL_LIVE_BIRTHS_WITH_BD_AMT]
FROM tier4.T4_BirthDefects"))
# close connection
odbcClose(ch)
Check 1: Check Measure Number
#check1 <-
BDdata %>%
group_by(Measure_Num)%>%
count(Measure_Num)
## # A tibble: 26 x 2
## # Groups: Measure_Num [26]
## Measure_Num n
## <fct> <int>
## 1 M00237 158146
## 2 M00238 300167
## 3 M00239 158146
## 4 M00240 300167
## 5 M00241 151996
## 6 M00242 288242
## 7 M00243 158146
## 8 M00244 300167
## 9 M00245 153226
## 10 M00246 290627
## # ... with 16 more rows
#View(check1)
Check 2: Check Measure Type
#check2 <-
BDdata %>%
group_by(Measure_Type)%>%
count(Measure_Type)
## # A tibble: 2 x 2
## # Groups: Measure_Type [2]
## Measure_Type n
## <fct> <int>
## 1 BASIC 3748980
## 2 SMOOTH 1702840
#View(check2)
Check 3: Check Cal Level
#check3 <-
BDdata %>%
group_by(Cal_Level)%>%
count(Cal_Level)
## # A tibble: 12 x 2
## # Groups: Cal_Level [12]
## Cal_Level n
## <fct> <int>
## 1 S_C_Y 499422
## 2 S_C_Y_A 1687824
## 3 S_C_Y_RE 1997688
## 4 S_C_Y_S 923586
## 5 S_Y 5528
## 6 S_Y_A 18672
## 7 S_Y_A_RE 74688
## 8 S_Y_A_RE_S 136768
## 9 S_Y_A_S 34192
## 10 S_Y_RE 22112
## 11 S_Y_RE_S 41072
## 12 S_Y_S 10268
#View(check3)
Check 4: Check Measure num by Cal Level
#check4 <-
BDdata %>%
group_by(Measure_Num, Cal_Level)%>%
count(Cal_Level)
## # A tibble: 282 x 3
## # Groups: Measure_Num, Cal_Level [282]
## Measure_Num Cal_Level n
## <fct> <fct> <int>
## 1 M00237 S_C_Y 12911
## 2 M00237 S_C_Y_A 51644
## 3 M00237 S_C_Y_RE 51644
## 4 M00237 S_C_Y_S 25822
## 5 M00237 S_Y 215
## 6 M00237 S_Y_A 860
## 7 M00237 S_Y_A_RE 3440
## 8 M00237 S_Y_A_RE_S 6880
## 9 M00237 S_Y_A_S 1720
## 10 M00237 S_Y_RE 860
## # ... with 272 more rows
#View(check4)
Check 5: Check Cal Level by age band
#check5 <-
BDdata %>%
group_by(Cal_Level, Age_band_cd)%>%
count(Age_band_cd)
## # A tibble: 27 x 3
## # Groups: Cal_Level, Age_band_cd [27]
## Cal_Level Age_band_cd n
## <fct> <int> <int>
## 1 S_C_Y NA 499422
## 2 S_C_Y_A 1 421956
## 3 S_C_Y_A 2 421956
## 4 S_C_Y_A 3 421956
## 5 S_C_Y_A 4 421956
## 6 S_C_Y_RE NA 1997688
## 7 S_C_Y_S NA 923586
## 8 S_Y NA 5528
## 9 S_Y_A 1 4668
## 10 S_Y_A 2 4668
## # ... with 17 more rows
#View(check5)
Check 6: Check Cal Level by race ethnicity
check6 <-
BDdata %>%
group_by(Cal_Level, Raceethnicity_cd)%>%
count(Raceethnicity_cd)
## Warning: Factor `Raceethnicity_cd` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Factor `Raceethnicity_cd` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Factor `Raceethnicity_cd` contains implicit NA, consider using
## `forcats::fct_explicit_na`
#View(check6)
head(check6)
## Warning: Factor `Raceethnicity_cd` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Factor `Raceethnicity_cd` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## # A tibble: 6 x 3
## # Groups: Cal_Level, Raceethnicity_cd [6]
## Cal_Level Raceethnicity_cd n
## <fct> <fct> <int>
## 1 S_C_Y <NA> 499422
## 2 S_C_Y_A <NA> 1687824
## 3 S_C_Y_RE B 499422
## 4 S_C_Y_RE H 499422
## 5 S_C_Y_RE O 499422
## 6 S_C_Y_RE W 499422
Check 7: Check Cal Level by ps flag
#check7 <-
BDdata %>%
group_by(Cal_Level, PS_Flag)%>%
count(PS_Flag)
## # A tibble: 16 x 3
## # Groups: Cal_Level, PS_Flag [16]
## Cal_Level PS_Flag n
## <fct> <int> <int>
## 1 S_C_Y 0 332662
## 2 S_C_Y 1 166760
## 3 S_C_Y_A 0 1283500
## 4 S_C_Y_A 1 404324
## 5 S_C_Y_RE 0 1598096
## 6 S_C_Y_RE 1 399592
## 7 S_C_Y_S 0 624494
## 8 S_C_Y_S 1 299092
## 9 S_Y 0 5528
## 10 S_Y_A 0 18672
## 11 S_Y_A_RE 0 74688
## 12 S_Y_A_RE_S 0 136768
## 13 S_Y_A_S 0 34192
## 14 S_Y_RE 0 22112
## 15 S_Y_RE_S 0 41072
## 16 S_Y_S 0 10268
#View(check7)
Check 8: Check county and state
length(which(is.na(BDdata$County_cd)))
## [1] 343300
#343,300
#check8 <-
BDdata %>%
filter(is.na(County_cd))%>%
group_by(State_CD, County_cd)%>%
count(County_cd)
## # A tibble: 22 x 3
## # Groups: State_CD, County_cd [22]
## State_CD County_cd n
## <int> <int> <int>
## 1 4 NA 15600
## 2 8 NA 20930
## 3 9 NA 1610
## 4 12 NA 24150
## 5 19 NA 17710
## 6 20 NA 11530
## 7 21 NA 14490
## 8 23 NA 17460
## 9 24 NA 3220
## 10 25 NA 20930
## # ... with 12 more rows
#View(check8)
Check 9: Check Date Range By State
#check9 <-
BDdata %>%
group_by(State_CD, Min_rpt_yr, Max_rpt_yr)%>%
count((Max_rpt_yr-Min_rpt_yr))
## # A tibble: 242 x 5
## # Groups: State_CD, Min_rpt_yr, Max_rpt_yr [242]
## State_CD Min_rpt_yr Max_rpt_yr `(Max_rpt_yr - Min_rpt_yr)` n
## <int> <int> <int> <int> <int>
## 1 4 2003 2007 4 7140
## 2 4 2004 2008 4 7140
## 3 4 2005 2009 4 7140
## 4 4 2006 2010 4 7140
## 5 4 2007 2011 4 7140
## 6 4 2008 2012 4 7140
## 7 4 2009 2013 4 7140
## 8 4 2010 2014 4 7140
## 9 4 2011 2015 4 7140
## 10 4 2012 2016 4 7140
## # ... with 232 more rows
#View(check9)
Check 10: Skip
Check 11: Skip
Check 12: Check for CA, LA and MN State-Level Measures
list_of_states <- c("6", "22", "27")
check12 <- filter(BDdata, State_CD %in% list_of_states)
unique(check12$State_CD)
## [1] 22 27 6
#check12_actual <-
check12 %>%
group_by(State_CD, Cal_Level)%>%
count(Cal_Level)
## # A tibble: 12 x 3
## # Groups: State_CD, Cal_Level [12]
## State_CD Cal_Level n
## <int> <fct> <int>
## 1 6 S_C_Y 29406
## 2 6 S_C_Y_A 99528
## 3 6 S_C_Y_RE 117624
## 4 6 S_C_Y_S 54288
## 5 22 S_C_Y 8151
## 6 22 S_C_Y_A 27588
## 7 22 S_C_Y_RE 32604
## 8 22 S_C_Y_S 15048
## 9 27 S_C_Y 23751
## 10 27 S_C_Y_A 80388
## 11 27 S_C_Y_RE 95004
## 12 27 S_C_Y_S 43848
#View(check12_actual)
Check 13: Check rolling year count
length(which(BDdata$ROLLING_YEAR_COUNT != 5))
## [1] 0
Check 14: Check no data id
#Conditions for -1: birth defect count > 0 AND live birth count > 0
check14_1 <- subset(BDdata, TOTAL_LIVE_BIRTHS_WITH_BD_AMT >0 & TOTAL_LIVE_BIRTHS_AMT>0)
length(which(check14_1$no_data_id != -1))
## [1] 301106
#Incorrect: n=301106
#Conditions for 0: Birth defect count = 0 AND live birth count > 0
check14_0 <- subset(BDdata, TOTAL_LIVE_BIRTHS_WITH_BD_AMT ==0 & TOTAL_LIVE_BIRTHS_AMT>0)
length(which(check14_0$no_data_id != 0))
## [1] 0
#Correct
#Conditions for 2: NS race code where ethnicity = hispanic
check14_2 <- subset(BDdata, Raceethnicity_cd=="H")
NS <- subset(check14_2, no_data_id ==2)
length(which(check14_2$no_data_id == 2))
## [1] 45045
#Impossible to check in tier 4. No race variable to check NS with Raceethnicitiy code.
#Conditions for 3: birth defect count > 0 and live birth count = 0
check14_3 <- subset(BDdata, TOTAL_LIVE_BIRTHS_WITH_BD_AMT >0 & TOTAL_LIVE_BIRTHS_AMT == 0)
length(which(check14_3$no_data_id != 3))
## [1] 0
#Correct
Check 15: Skip
Check 16: Suppression check
suppression <- subset(BDdata, EVENTS_AMT <6)
length(which(suppression$PS_Flag != 1))
## [1] 1475327
Check 17: Check VT nuance
VTnuance <- subset(BDdata, State_CD == 50 & Min_rpt_yr <2010)
length(which(VTnuance$Indicator_num== "I00010"))
## [1] 0
length(which(VTnuance$Indicator_num== "I00011"))
## [1] 0
Check 18: Check ME nuance
MEnuance <- subset(BDdata, State_CD == 23 & Min_rpt_yr <2008)
length(which(MEnuance$Indicator_num== "I00010"))
## [1] 0
length(which(MEnuance$Indicator_num== "I00011"))
## [1] 0
Check 19: Irrelevant
Check 20: Check KS nuance no records for Birth_def_cd=26 in 2012 and Birth_def_cd=31 in 2013
KSnuance <- subset(BDdata, State_CD == 20 & Min_rpt_yr == 2012)
length(which(KSnuance$Birth_def_CD== "26"))
## [1] 0
unique(KSnuance$Birth_def_CD)
## [1] 29 21 22 28 24 23 25 30 32 27
#31 is also missing
KSnuance2 <- subset(BDdata, State_CD == 20 & Min_rpt_yr == 2013)
length(which(KSnuance2$Birth_def_CD== "31"))
## [1] 0
#No data
Check 21: Check LA nuance
#Expanded variable not in T4.
names(BDdata)
## [1] "State_CD" "County_cd"
## [3] "Birth_def_CD" "Indicator_num"
## [5] "Measure_Num" "Measure_Type"
## [7] "RPT_YR" "ROLLING_YEAR_COUNT"
## [9] "INFANT_GNDR_CD" "Min_rpt_yr"
## [11] "Max_rpt_yr" "Cal_Level"
## [13] "Age_band_cd" "Raceethnicity_cd"
## [15] "PS_Flag" "STABILITY"
## [17] "PREVALENCE" "EVENTS_AMT"
## [19] "Is_Smoothed" "no_data_id"
## [21] "TOTAL_LIVE_BIRTHS_AMT" "TOTAL_LIVE_BIRTHS_WITH_BD_AMT"