pacman::p_load(gt, haven, srvyr, survey, tidyverse, viridis, rstatix, patchwork )
MyPath <- "C:/Users/ostchegay2/OneDrive - National Institutes of Health/R programs/"
df <- read_csv(str_c(MyPath,"DCIS Tables 04.v2.csv"))
## Rows: 11 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Clinical_Measure
## dbl (2): Percentage_with_DCIS, Percentage_without_DCIS
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(df,show_col_types = FALSE)
## Rows: 11
## Columns: 5
## $ Clinical_Measure <chr> "Height", "Weight", "BMI", "BRCA Mutation", "S…
## $ With_DCIS <dbl> 2113, 1541, 2189, 174, 1469, 1486, 1172, 136, …
## $ Percentage_with_DCIS <dbl> 96, 70, 99, 8, 67, 67, 53, 6, 12, 54, 23
## $ Without_DCIS <dbl> 122616, 93088, 192484, 1218, 61034, 25521, 726…
## $ Percentage_without_DCIS <dbl> 53, 40, 83, 1, 26, 11, 3, 0, 1, 1, 1
colnames(df)
## [1] "Clinical_Measure" "With_DCIS"
## [3] "Percentage_with_DCIS" "Without_DCIS"
## [5] "Percentage_without_DCIS"
df$Clinical_Measure<- factor(df$Clinical_Measure,
levels = c("Height", "Weight", "BMI", "BRCA Mutation",
"Screening Mammography",
"Diagnostic Mammography"
,"Biopsy","Estrogen Receptor Status",
"Radiation Therapy","Surgery","Tamoxifen"))
glimpse(df,show_col_types = FALSE)
## Rows: 11
## Columns: 5
## $ Clinical_Measure <fct> Height, Weight, BMI, BRCA Mutation, Screening …
## $ With_DCIS <dbl> 2113, 1541, 2189, 174, 1469, 1486, 1172, 136, …
## $ Percentage_with_DCIS <dbl> 96, 70, 99, 8, 67, 67, 53, 6, 12, 54, 23
## $ Without_DCIS <dbl> 122616, 93088, 192484, 1218, 61034, 25521, 726…
## $ Percentage_without_DCIS <dbl> 53, 40, 83, 1, 26, 11, 3, 0, 1, 1, 1
p1 <- ggplot(data = df, aes(x = Clinical_Measure, y=Percentage_with_DCIS )) +
geom_bar(color = 'red', stat = "identity")+
scale_y_continuous(limits=c(0, 100))+
labs(title = "Females diagnosed with DCIS",
y = "Percentage") +
theme(axis.text.x = element_text (angle = 90,vjust = 0.5,hjust = 1),
plot.title = element_text(hjust = 0.5))
p2 <- ggplot(data = df, aes(x = Clinical_Measure, y=Percentage_without_DCIS)) +
geom_bar(color = 'red', stat = "identity")+
scale_y_continuous(limits=c(0, 100))+
labs(title = " Females not diagnosed with DCIS",
y = "Percentage") +
theme(axis.text.x = element_text (angle = 90,vjust = 0.5,hjust = 1),
plot.title = element_text(hjust = 0.5) )
p1 + p2
subset(df, !is.na( Clinical_Measure))
## # A tibble: 11 × 5
## Clinical_Measure With_DCIS Percentage_with_DCIS Without_DCIS Percen…¹
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Height 2113 96 122616 53
## 2 Weight 1541 70 93088 40
## 3 BMI 2189 99 192484 83
## 4 BRCA Mutation 174 8 1218 1
## 5 Screening Mammography 1469 67 61034 26
## 6 Diagnostic Mammography 1486 67 25521 11
## 7 Biopsy 1172 53 7262 3
## 8 Estrogen Receptor Status 136 6 162 0
## 9 Radiation Therapy 271 12 2096 1
## 10 Surgery 1196 54 2911 1
## 11 Tamoxifen 514 23 1580 1
## # … with abbreviated variable name ¹Percentage_without_DCIS
```