pacman::p_load(gt, haven, srvyr, survey, tidyverse, viridis, rstatix, patchwork )
MyPath <- "C:/Users/ostchegay2/OneDrive - National Institutes of Health/R programs/"
df <- read_csv(str_c(MyPath,"DCIS Tables 04.v2.csv"))
## Rows: 11 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Clinical_Measure
## dbl (2): Percentage_with_DCIS, Percentage_without_DCIS
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(df,show_col_types = FALSE)
## Rows: 11
## Columns: 5
## $ Clinical_Measure        <chr> "Height", "Weight", "BMI", "BRCA Mutation", "S…
## $ With_DCIS               <dbl> 2113, 1541, 2189, 174, 1469, 1486, 1172, 136, …
## $ Percentage_with_DCIS    <dbl> 96, 70, 99, 8, 67, 67, 53, 6, 12, 54, 23
## $ Without_DCIS            <dbl> 122616, 93088, 192484, 1218, 61034, 25521, 726…
## $ Percentage_without_DCIS <dbl> 53, 40, 83, 1, 26, 11, 3, 0, 1, 1, 1
colnames(df)
## [1] "Clinical_Measure"        "With_DCIS"              
## [3] "Percentage_with_DCIS"    "Without_DCIS"           
## [5] "Percentage_without_DCIS"
df$Clinical_Measure<- factor(df$Clinical_Measure,
      levels = c("Height", "Weight", "BMI", "BRCA Mutation",
                 "Screening Mammography",
                 "Diagnostic Mammography"
                  ,"Biopsy","Estrogen Receptor Status",
                  "Radiation Therapy","Surgery","Tamoxifen"))


glimpse(df,show_col_types = FALSE)
## Rows: 11
## Columns: 5
## $ Clinical_Measure        <fct> Height, Weight, BMI, BRCA Mutation, Screening …
## $ With_DCIS               <dbl> 2113, 1541, 2189, 174, 1469, 1486, 1172, 136, …
## $ Percentage_with_DCIS    <dbl> 96, 70, 99, 8, 67, 67, 53, 6, 12, 54, 23
## $ Without_DCIS            <dbl> 122616, 93088, 192484, 1218, 61034, 25521, 726…
## $ Percentage_without_DCIS <dbl> 53, 40, 83, 1, 26, 11, 3, 0, 1, 1, 1
p1 <- ggplot(data = df, aes(x = Clinical_Measure, y=Percentage_with_DCIS )) +
  geom_bar(color = 'red', stat = "identity")+
  scale_y_continuous(limits=c(0, 100))+
  labs(title = "Females diagnosed with DCIS",
    y = "Percentage") +
  theme(axis.text.x = element_text (angle = 90,vjust = 0.5,hjust = 1),
    plot.title = element_text(hjust = 0.5))

p2 <- ggplot(data = df, aes(x = Clinical_Measure, y=Percentage_without_DCIS)) +
  geom_bar(color = 'red', stat = "identity")+
  scale_y_continuous(limits=c(0, 100))+
  labs(title = "    Females not diagnosed with DCIS",
    y = "Percentage") +
  theme(axis.text.x = element_text (angle = 90,vjust = 0.5,hjust = 1),
   plot.title = element_text(hjust = 0.5) )
p1 + p2

subset(df, !is.na( Clinical_Measure))
## # A tibble: 11 × 5
##    Clinical_Measure         With_DCIS Percentage_with_DCIS Without_DCIS Percen…¹
##    <fct>                        <dbl>                <dbl>        <dbl>    <dbl>
##  1 Height                        2113                   96       122616       53
##  2 Weight                        1541                   70        93088       40
##  3 BMI                           2189                   99       192484       83
##  4 BRCA Mutation                  174                    8         1218        1
##  5 Screening Mammography         1469                   67        61034       26
##  6 Diagnostic Mammography        1486                   67        25521       11
##  7 Biopsy                        1172                   53         7262        3
##  8 Estrogen Receptor Status       136                    6          162        0
##  9 Radiation Therapy              271                   12         2096        1
## 10 Surgery                       1196                   54         2911        1
## 11 Tamoxifen                      514                   23         1580        1
## # … with abbreviated variable name ¹​Percentage_without_DCIS

```