Number of Cancer Surgeries (Volume) Performed in California Hospitals

#data source : https://data.chhs.ca.gov/dataset/number-of-cancer-surgeries-volume-performed-in-california-hospitals

library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
raw <- fread('https://data.chhs.ca.gov/dataset/dace89fc-69ec-4fe8-a8f4-ddfbdb053020/resource/f3d8578f-a9bf-4f6a-a224-7e5b522cbd76/download/ca-oshpd-cancer-surgeries-gachospitals-20132017.csv')
head(raw)
##    Year County  Hospital OSHPDID  Surgery # of Cases (ICD 9)
## 1: 2013        Statewide      NA    COLON               7128
## 2: 2013        Statewide      NA  BLADDER                892
## 3: 2013        Statewide      NA   RECTUM               2128
## 4: 2013        Statewide      NA   BREAST              25829
## 5: 2013        Statewide      NA    BRAIN               2719
## 6: 2013        Statewide      NA PANCREAS                819
##    # of Cases (ICD 10) LONGITUDE LATITUDE
## 1:                   .         .        .
## 2:                   .         .        .
## 3:                   .         .        .
## 4:                   .         .        .
## 5:                   .         .        .
## 6:                   .         .        .

The dataset contains the number (volume) for 11 types of cancer (bladder, breast, brain, colon, esophagus, liver, lung, pancreas, prostate, rectum, and stomach) surgeries performed in California hospitals in 2013-2015. Data are reported for January – September 2015 due to coding changes from ICD-9-CM to ICD-10-CM/PCS for procedures, which began 10/1/2015. Comparisons across years should be made with caution since previous years’ results are based on 12 months of data, while this analysis is based on 9 months of data. The 2015 data may differ from previous years due to the coding change. For all types of cancer surgeries, except breast cancer, the dataset contains surgeries performed in the inpatient hospital setting. For breast cancer surgeries, this dataset includes procedures performed in inpatient and outpatient settings.

Field Title

#—-Important reclassification

surgeries under two classifications ICD_9 and ICD_10 are combined into one “total” column

library(dplyr)

surgery <- raw %>% rename(ICD9 ="# of Cases (ICD 9)" , ICD10 = "# of Cases (ICD 10)" )
# Rename a column in R
str(surgery)
## Classes 'data.table' and 'data.frame':   9841 obs. of  9 variables:
##  $ Year     : int  2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
##  $ County   : chr  "" "" "" "" ...
##  $ Hospital : chr  "Statewide" "Statewide" "Statewide" "Statewide" ...
##  $ OSHPDID  : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Surgery  : chr  "COLON" "BLADDER" "RECTUM" "BREAST" ...
##  $ ICD9     : chr  "7128" "892" "2128" "25829" ...
##  $ ICD10    : chr  "." "." "." "." ...
##  $ LONGITUDE: chr  "." "." "." "." ...
##  $ LATITUDE : chr  "." "." "." "." ...
##  - attr(*, ".internal.selfref")=<externalptr>
surgery$ICD9[surgery$ICD9 == "."] <- 0
surgery$ICD10[surgery$ICD10 == "."] <- 0

surgery$total<- as.numeric(surgery$ICD9) + as.numeric(surgery$ICD10)
surgery$Surgery <- toupper(surgery$Surgery)

General statewide information #—————————-

state <- filter(surgery, Hospital=="Statewide")
state <- select(state,c(Year,Surgery,total))

Including Plots

Looking into year 2016 only

state_2016 <- filter(state,Year==2016)

 ggplot() + geom_bar(aes(y = total, x = reorder(Surgery,-total), fill = Surgery), 
                     data = state_2016, stat="identity")+
            coord_flip()

Grouping by surgery classificatioin

by_surgery <- state %>%
              group_by(Year)

ggplot() + geom_bar(aes(y = total, x = Year, fill = Surgery), 
                     data = by_surgery, stat="identity")

We now look at the number of surgery for a particular cancer classification

par(mfrow=c(2,2))

breast <-filter(state,Surgery=="BREAST")
ggplot() + geom_bar(aes(y = total, x = Year, fill = as.factor(Year)), 
                     data = breast, stat="identity")+
           ggtitle("Breast Cancer Surgery in California")

colon <-filter(state,Surgery=="COLON")
ggplot() + geom_bar(aes(y = total, x = Year, fill = as.factor(Year)), 
                     data = colon, stat="identity")+
           ggtitle("Colon Cancer Surgery in California")

prostate <-filter(state,Surgery=="PROSTATE")
ggplot() + geom_bar(aes(y = total, x = Year, fill = as.factor(Year)), 
                     data = prostate, stat="identity")+
           ggtitle("Prostate Cancer Surgery in California")

brain <-filter(state,Surgery=="BRAIN")
ggplot() + geom_bar(aes(y = total, x = Year, fill = as.factor(Year)), 
                     data = brain, stat="identity")+
           ggtitle("Brain Cancer Surgery in California")

#———————- # Geographic distribution of breast cancer surgery

par(mfrow=c(1,1))
hospital <- filter(surgery, Hospital!="Statewide")

hospital_breast_2017 <- filter(hospital,Surgery=="BREAST" & Year==2017)
top <- hospital_breast_2017 %>% top_n(20) %>% arrange(-total)
## Selecting by total
kable(top) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Year County Hospital OSHPDID Surgery ICD9 ICD10 LONGITUDE LATITUDE total
2017 Los Angeles Cedars Sinai Medical Center 106190555 BREAST 0 755 -118.38061 34.07681 755
2017 Orange Hoag Memorial Hospital Presbyterian 106301205 BREAST 0 625 -117.92949 33.62526 625
2017 Santa Clara Stanford Health Care 106430905 BREAST 0 561 -122.17827 37.43342 561
2017 Orange Kaiser Foundation Hospital – Orange County – Anaheim 106304409 BREAST 0 535 -117.8439815 33.8544317 535
2017 San Bernardino Kaiser Foundation Hospital – Fontana 106361223 BREAST 0 506 -117.43549 34.0725 506
2017 San Diego Sharp Memorial Hospital 106370694 BREAST 0 506 -117.15531 32.80016 506
2017 San Diego Kaiser Foundation Hospital – San Diego 106370730 BREAST 0 501 -117.09447 32.79287 501
2017 Los Angeles City of Hope Helford Clinical Research Hospital 106190176 BREAST 0 473 -117.97152 34.13216 473
2017 San Francisco UC San Francisco Medical Center 106381154 BREAST 0 459 -122.4576564 37.7635148 459
2017 Placer Kaiser Foundation Hospital – Roseville 106314024 BREAST 0 456 -121.25107 38.74636 456
2017 San Francisco California Pacific Medical Center – Pacific Campus 106380929 BREAST 0 429 -122.43102 37.79149 429
2017 San Diego UC San Diego Health System – Hillcrest Medical Center 106370782 BREAST 0 381 -117.16498 32.75387 381
2017 Santa Clara Kaiser Foundation Hospital – Santa Clara 106434153 BREAST 0 359 -121.995774 37.334597 359
2017 Los Angeles Torrance Memorial Medical Center 106190422 BREAST 0 347 -118.34388 33.81258 347
2017 Los Angeles Santa Monica – UCLA Medical Center and Orthopedic Hospital 106190687 BREAST 0 338 -118.48623 34.02755 338
2017 Fresno Clovis Community Medical Center 106100005 BREAST 0 336 -119.66072 36.83745 336
2017 Los Angeles Kaiser Foundation Hospital – Downey 106196403 BREAST 0 335 -118.1290216 33.9171234 335
2017 Los Angeles Ronald Reagan UCLA Medical Center 106190796 BREAST 0 332 -118.445 34.068889 332
2017 Los Angeles Kaiser Foundation Hospital – Baldwin Park 106196035 BREAST 0 319 -117.986 34.06348 319
2017 Contra Costa Kaiser Foundation Hospital – Walnut Creek 106070990 BREAST 0 317 -122.05828 37.8923 317
ggplot() + geom_bar(aes(y = total, x = reorder(Hospital,total)), 
                     data = top, stat="identity",fill="maroon")+
           
            labs(title = "Top Hospitals for Breast Cancer Surgery",
                 subtitle = "California, from 2013-2017",
                 caption = "Illustration by Joe Long")+
            labs(y = "Surgery Count",x="Hospital Name")+
            theme(title = element_text(size=9,face="bold"))+
            coord_flip()

## Mapping

library(leaflet)

top$LONGITUDE <- as.numeric(top$LONGITUDE)
top$LATITUDE <- as.numeric(top$LATITUDE)
#Show first 20 rows 
leaflet(data = top[1:20,]) %>% addTiles() %>%
  addMarkers(~LONGITUDE, ~LATITUDE, popup = ~as.character(Hospital), 
              label = ~as.character(Hospital))

Top hospitals for colon cancer surgery

hospital_colon_2017 <- filter(hospital,Surgery=="COLON" & Year==2017)
top <- hospital_colon_2017 %>% top_n(20) %>% arrange(-total)
## Selecting by total
kable(top) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Year County Hospital OSHPDID Surgery ICD9 ICD10 LONGITUDE LATITUDE total
2017 Los Angeles Cedars Sinai Medical Center 106190555 COLON 0 129 -118.38061 34.07681 129
2017 Orange Hoag Memorial Hospital Presbyterian 106301205 COLON 0 111 -117.92949 33.62526 111
2017 San Diego UC San Diego Health System – Hillcrest Medical Center 106370782 COLON 0 108 -117.16498 32.75387 108
2017 San Francisco UC San Francisco Medical Center 106381154 COLON 0 108 -122.4576564 37.7635148 108
2017 San Diego Sharp Memorial Hospital 106370694 COLON 0 99 -117.15531 32.80016 99
2017 Santa Clara Stanford Health Care 106430905 COLON 0 99 -122.17827 37.43342 99
2017 Los Angeles Torrance Memorial Medical Center 106190422 COLON 0 96 -118.34388 33.81258 96
2017 San Bernardino Kaiser Foundation Hospital – Fontana 106361223 COLON 0 94 -117.43549 34.0725 94
2017 Orange Kaiser Foundation Hospital – Orange County – Anaheim 106304409 COLON 0 90 -117.8439815 33.8544317 90
2017 San Diego Kaiser Foundation Hospital – San Diego 106370730 COLON 0 88 -117.09447 32.79287 88
2017 San Diego Scripps Mercy Hospital 106370744 COLON 0 87 -117.1609 32.75128 87
2017 San Francisco California Pacific Medical Center – Pacific Campus 106380929 COLON 0 85 -122.43102 37.79149 85
2017 Riverside Eisenhower Medical Center 106331168 COLON 0 84 -116.40808 33.76418 84
2017 Los Angeles Huntington Memorial Hospital 106190400 COLON 0 81 -118.1518 34.13565 81
2017 Los Angeles City of Hope Helford Clinical Research Hospital 106190176 COLON 0 71 -117.97152 34.13216 71
2017 Los Angeles Ronald Reagan UCLA Medical Center 106190796 COLON 0 71 -118.445 34.068889 71
2017 Alameda Alta Bates Summit Medical Center 106010937 COLON 0 69 -122.26257 37.82106 69
2017 Contra Costa Kaiser Foundation Hospital – Walnut Creek 106070990 COLON 0 69 -122.05828 37.8923 69
2017 Placer Kaiser Foundation Hospital – Roseville 106314024 COLON 0 69 -121.25107 38.74636 69
2017 Riverside Riverside Community Hospital 106331312 COLON 0 69 -117.37997 33.97641 69
ggplot() + geom_bar(aes(y = total, x = reorder(Hospital,total)), 
                     data = top, stat="identity",fill="blue")+
             labs(title = "Top Hospitals for Colon Cancer Surgery",
                 subtitle = "California from 2013-2017",
                 caption = "Illustration by Joe Long")+
            labs(y = "Surgery Count",x="Hospital Name")+
            theme(title = element_text(size=9,face="bold"))+
            coord_flip()

hospital_brain_2017 <- filter(hospital,Surgery=="BRAIN" & Year==2017)
top <- hospital_brain_2017 %>% top_n(20) %>% arrange(-total)
## Selecting by total
ggplot() + geom_bar(aes(y = total, x = reorder(Hospital,total)), 
                     data = top, stat="identity",fill="darkgreen")+
            labs(title = "Top Hospitals for Brain Cancer Surgery",
                 subtitle = "California from 2013-2017",
                 caption = "Illustration by Joe Long")+
            labs(y = "Surgery Count",x="Hospital Name")+
            theme(title = element_text(size=9,face="bold"))+
                                      
            coord_flip()

# Mapping of top hospitals for brain cancer surgery

library(leaflet)

top$LONGITUDE <- as.numeric(top$LONGITUDE)
top$LATITUDE <- as.numeric(top$LATITUDE)
#Show first 20 rows from the `quakes` dataset
leaflet(data = top[1:20,]) %>% addTiles() %>%
  addMarkers(~LONGITUDE, ~LATITUDE, popup = ~as.character(Hospital), 
              label = ~as.character(Hospital))
hospital_rectum_2017 <- filter(hospital,Surgery=="RECTUM" & Year==2017)
top <- hospital_rectum_2017 %>% top_n(20) %>% arrange(-total)
## Selecting by total
kable(top) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Year County Hospital OSHPDID Surgery ICD9 ICD10 LONGITUDE LATITUDE total
2017 Santa Clara Stanford Health Care 106430905 RECTUM 0 103 -122.17827 37.43342 103
2017 San Francisco UC San Francisco Medical Center 106381154 RECTUM 0 100 -122.4576564 37.7635148 100
2017 Los Angeles Keck Hospital of University of Southern California 106194219 RECTUM 0 75 -118.20287 34.06284 75
2017 Los Angeles Cedars Sinai Medical Center 106190555 RECTUM 0 74 -118.38061 34.07681 74
2017 Orange UC Irvine Medical Center 106301279 RECTUM 0 58 -117.89064 33.7889 58
2017 Los Angeles City of Hope Helford Clinical Research Hospital 106190176 RECTUM 0 52 -117.97152 34.13216 52
2017 San Bernardino Kaiser Foundation Hospital – Fontana 106361223 RECTUM 0 50 -117.43549 34.0725 50
2017 San Diego UC San Diego Health System – Hillcrest Medical Center 106370782 RECTUM 0 47 -117.16498 32.75387 47
2017 San Francisco Kaiser Foundation Hospital – San Francisco 106380857 RECTUM 0 47 -122.44289 37.78274 47
2017 Orange Hoag Memorial Hospital Presbyterian 106301205 RECTUM 0 46 -117.92949 33.62526 46
2017 Los Angeles Long Beach Memorial Medical Center 106190525 RECTUM 0 44 -118.1852 33.80801 44
2017 Los Angeles Kaiser Foundation Hospital – Sunset 106190429 RECTUM 0 43 -118.29545 34.09823 43
2017 Los Angeles Ronald Reagan UCLA Medical Center 106190796 RECTUM 0 43 -118.445 34.068889 43
2017 San Bernardino Loma Linda University Medical Center 106361246 RECTUM 0 42 -117.26146 34.05164 42
2017 Los Angeles Torrance Memorial Medical Center 106190422 RECTUM 0 38 -118.34388 33.81258 38
2017 Orange Kaiser Foundation Hospital – Orange County – Anaheim 106304409 RECTUM 0 37 -117.8439815 33.8544317 37
2017 Los Angeles Huntington Memorial Hospital 106190400 RECTUM 0 35 -118.1518 34.13565 35
2017 San Diego Scripps Mercy Hospital 106370744 RECTUM 0 35 -117.1609 32.75128 35
2017 San Francisco California Pacific Medical Center – Pacific Campus 106380929 RECTUM 0 34 -122.43102 37.79149 34
2017 Contra Costa John Muir Medical Center – Concord Campus 106071018 RECTUM 0 33 -122.03874 37.98615 33
2017 Sacramento Mercy San Juan Hospital 106340950 RECTUM 0 33 -121.31261 38.66818 33
2017 San Diego Kaiser Foundation Hospital – San Diego 106370730 RECTUM 0 33 -117.09447 32.79287 33
ggplot() + geom_bar(aes(y = total, x = reorder(Hospital,total)), 
                     data = top, stat="identity",fill="orange")+
            ggtitle("Top Hospitals for Rectum Cancer Surgery")+
            coord_flip()

str(hospital)
## 'data.frame':    9786 obs. of  10 variables:
##  $ Year     : int  2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
##  $ County   : chr  "Alameda" "Alameda" "Alameda" "Alameda" ...
##  $ Hospital : chr  "Alameda Hospital" "Alameda Hospital" "Alameda Hospital" "Alta Bates Summit Medical Center – Alta Bates Campus" ...
##  $ OSHPDID  : int  106010735 106010735 106010735 106010739 106010739 106010739 106010739 106010739 106010739 106010739 ...
##  $ Surgery  : chr  "STOMACH" "COLON" "BREAST" "BRAIN" ...
##  $ ICD9     : chr  "1" "3" "2" "8" ...
##  $ ICD10    : chr  "0" "0" "0" "0" ...
##  $ LONGITUDE: chr  "-122.25362" "-122.25362" "-122.25362" "-122.25784" ...
##  $ LATITUDE : chr  "37.762953" "37.762953" "37.762953" "37.85633" ...
##  $ total    : num  1 3 2 8 12 4 1 95 2 6 ...
##  - attr(*, ".internal.selfref")=<externalptr>
hospital$LONGITUDE <- as.numeric(hospital$LONGITUDE)
## Warning: NAs introduced by coercion
hospital$LATITUDE <- as.numeric(hospital$LATITUDE)
## Warning: NAs introduced by coercion
top_hospital <- hospital %>%
 group_by(Hospital) %>%
 mutate(Total = sum(total))
top_hospital <- select(top_hospital,c("Hospital","LONGITUDE","LATITUDE","Total"))
top_hospital <- distinct(top_hospital)
top_hospital <- arrange(top_hospital, desc(Total))
top_hospital <- top_hospital[1:20,]


ggplot() + geom_bar(aes(y = Total, x = reorder(Hospital,Total)), 
                     data = top_hospital, stat="identity",fill="darkred")+
            ggtitle("Top Hospitals for Cancer Surgery")+
            coord_flip()

kable(top_hospital) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Hospital LONGITUDE LATITUDE Total
UC San Francisco Medical Center -122.4577 37.76351 7121
Cedars Sinai Medical Center -118.3806 34.07681 7075
City of Hope Helford Clinical Research Hospital -117.9715 34.13216 5406
Hoag Memorial Hospital Presbyterian -117.9295 33.62526 5372
Keck Hospital of University of Southern California -118.2029 34.06284 4762
Ronald Reagan UCLA Medical Center -118.4450 34.06889 4000
Kaiser Foundation Hospital – Fontana -117.4355 34.07250 4000
UC San Diego Health System – Hillcrest Medical Center -117.1650 32.75387 3924
Stanford Hospital -122.1783 37.43342 3738
Kaiser Foundation Hospital – San Diego -117.0945 32.79287 3496
Sharp Memorial Hospital -117.1553 32.80016 3425
Stanford Health Care -122.1783 37.43342 3255
Kaiser Foundation Hospital – Orange County – Anaheim -117.8440 33.85443 3211
Kaiser Foundation Hospital – Sunset -118.2955 34.09823 3108
Kaiser Foundation Hospital – Santa Clara -121.9958 37.33460 3027
Kaiser Foundation Hospital – Downey -118.1290 33.91712 2992
California Pacific Medical Center – Pacific Campus -122.4310 37.79149 2875
Kaiser Foundation Hospital – West Los Angeles -118.3757 34.03793 2841
Kaiser Foundation Hospital – South Sacramento -121.4241 38.47144 2816
UC Davis Medical Center -121.4574 38.55438 2501

Mapping

library(leaflet)

top_hospital$LONGITUDE <- as.numeric(top_hospital$LONGITUDE)
top_hospital$LATITUDE <- as.numeric(top_hospital$LATITUDE)
#Show first 20 rows from the `top hospital` dataset
leaflet(data = top_hospital[1:20,]) %>% addTiles() %>%
  addMarkers(~LONGITUDE, ~LATITUDE, popup = ~as.character(Hospital), 
              label = ~as.character(Hospital)) 

#Thank you #———————– Joe Long, Cabrillo Research (951) 435-1888