#data source : https://data.chhs.ca.gov/dataset/number-of-cancer-surgeries-volume-performed-in-california-hospitals
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
raw <- fread('https://data.chhs.ca.gov/dataset/dace89fc-69ec-4fe8-a8f4-ddfbdb053020/resource/f3d8578f-a9bf-4f6a-a224-7e5b522cbd76/download/ca-oshpd-cancer-surgeries-gachospitals-20132017.csv')
head(raw)
## Year County Hospital OSHPDID Surgery # of Cases (ICD 9)
## 1: 2013 Statewide NA COLON 7128
## 2: 2013 Statewide NA BLADDER 892
## 3: 2013 Statewide NA RECTUM 2128
## 4: 2013 Statewide NA BREAST 25829
## 5: 2013 Statewide NA BRAIN 2719
## 6: 2013 Statewide NA PANCREAS 819
## # of Cases (ICD 10) LONGITUDE LATITUDE
## 1: . . .
## 2: . . .
## 3: . . .
## 4: . . .
## 5: . . .
## 6: . . .
The dataset contains the number (volume) for 11 types of cancer (bladder, breast, brain, colon, esophagus, liver, lung, pancreas, prostate, rectum, and stomach) surgeries performed in California hospitals in 2013-2015. Data are reported for January – September 2015 due to coding changes from ICD-9-CM to ICD-10-CM/PCS for procedures, which began 10/1/2015. Comparisons across years should be made with caution since previous years’ results are based on 12 months of data, while this analysis is based on 9 months of data. The 2015 data may differ from previous years due to the coding change. For all types of cancer surgeries, except breast cancer, the dataset contains surgeries performed in the inpatient hospital setting. For breast cancer surgeries, this dataset includes procedures performed in inpatient and outpatient settings.
Field Title
Name Data type Description
Number of cancer surgeries Cases (ICD-10)
#—-Important reclassification
library(dplyr)
surgery <- raw %>% rename(ICD9 ="# of Cases (ICD 9)" , ICD10 = "# of Cases (ICD 10)" )
# Rename a column in R
str(surgery)
## Classes 'data.table' and 'data.frame': 9841 obs. of 9 variables:
## $ Year : int 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
## $ County : chr "" "" "" "" ...
## $ Hospital : chr "Statewide" "Statewide" "Statewide" "Statewide" ...
## $ OSHPDID : int NA NA NA NA NA NA NA NA NA NA ...
## $ Surgery : chr "COLON" "BLADDER" "RECTUM" "BREAST" ...
## $ ICD9 : chr "7128" "892" "2128" "25829" ...
## $ ICD10 : chr "." "." "." "." ...
## $ LONGITUDE: chr "." "." "." "." ...
## $ LATITUDE : chr "." "." "." "." ...
## - attr(*, ".internal.selfref")=<externalptr>
surgery$ICD9[surgery$ICD9 == "."] <- 0
surgery$ICD10[surgery$ICD10 == "."] <- 0
surgery$total<- as.numeric(surgery$ICD9) + as.numeric(surgery$ICD10)
surgery$Surgery <- toupper(surgery$Surgery)
General statewide information #—————————-
state <- filter(surgery, Hospital=="Statewide")
state <- select(state,c(Year,Surgery,total))
Looking into year 2016 only
state_2016 <- filter(state,Year==2016)
ggplot() + geom_bar(aes(y = total, x = reorder(Surgery,-total), fill = Surgery),
data = state_2016, stat="identity")+
coord_flip()
Grouping by surgery classificatioin
by_surgery <- state %>%
group_by(Year)
ggplot() + geom_bar(aes(y = total, x = Year, fill = Surgery),
data = by_surgery, stat="identity")
We now look at the number of surgery for a particular cancer classification
par(mfrow=c(2,2))
breast <-filter(state,Surgery=="BREAST")
ggplot() + geom_bar(aes(y = total, x = Year, fill = as.factor(Year)),
data = breast, stat="identity")+
ggtitle("Breast Cancer Surgery in California")
colon <-filter(state,Surgery=="COLON")
ggplot() + geom_bar(aes(y = total, x = Year, fill = as.factor(Year)),
data = colon, stat="identity")+
ggtitle("Colon Cancer Surgery in California")
prostate <-filter(state,Surgery=="PROSTATE")
ggplot() + geom_bar(aes(y = total, x = Year, fill = as.factor(Year)),
data = prostate, stat="identity")+
ggtitle("Prostate Cancer Surgery in California")
brain <-filter(state,Surgery=="BRAIN")
ggplot() + geom_bar(aes(y = total, x = Year, fill = as.factor(Year)),
data = brain, stat="identity")+
ggtitle("Brain Cancer Surgery in California")
#———————- # Geographic distribution of breast cancer surgery
par(mfrow=c(1,1))
hospital <- filter(surgery, Hospital!="Statewide")
hospital_breast_2017 <- filter(hospital,Surgery=="BREAST" & Year==2017)
top <- hospital_breast_2017 %>% top_n(20) %>% arrange(-total)
## Selecting by total
kable(top) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Year | County | Hospital | OSHPDID | Surgery | ICD9 | ICD10 | LONGITUDE | LATITUDE | total |
---|---|---|---|---|---|---|---|---|---|
2017 | Los Angeles | Cedars Sinai Medical Center | 106190555 | BREAST | 0 | 755 | -118.38061 | 34.07681 | 755 |
2017 | Orange | Hoag Memorial Hospital Presbyterian | 106301205 | BREAST | 0 | 625 | -117.92949 | 33.62526 | 625 |
2017 | Santa Clara | Stanford Health Care | 106430905 | BREAST | 0 | 561 | -122.17827 | 37.43342 | 561 |
2017 | Orange | Kaiser Foundation Hospital – Orange County – Anaheim | 106304409 | BREAST | 0 | 535 | -117.8439815 | 33.8544317 | 535 |
2017 | San Bernardino | Kaiser Foundation Hospital – Fontana | 106361223 | BREAST | 0 | 506 | -117.43549 | 34.0725 | 506 |
2017 | San Diego | Sharp Memorial Hospital | 106370694 | BREAST | 0 | 506 | -117.15531 | 32.80016 | 506 |
2017 | San Diego | Kaiser Foundation Hospital – San Diego | 106370730 | BREAST | 0 | 501 | -117.09447 | 32.79287 | 501 |
2017 | Los Angeles | City of Hope Helford Clinical Research Hospital | 106190176 | BREAST | 0 | 473 | -117.97152 | 34.13216 | 473 |
2017 | San Francisco | UC San Francisco Medical Center | 106381154 | BREAST | 0 | 459 | -122.4576564 | 37.7635148 | 459 |
2017 | Placer | Kaiser Foundation Hospital – Roseville | 106314024 | BREAST | 0 | 456 | -121.25107 | 38.74636 | 456 |
2017 | San Francisco | California Pacific Medical Center – Pacific Campus | 106380929 | BREAST | 0 | 429 | -122.43102 | 37.79149 | 429 |
2017 | San Diego | UC San Diego Health System – Hillcrest Medical Center | 106370782 | BREAST | 0 | 381 | -117.16498 | 32.75387 | 381 |
2017 | Santa Clara | Kaiser Foundation Hospital – Santa Clara | 106434153 | BREAST | 0 | 359 | -121.995774 | 37.334597 | 359 |
2017 | Los Angeles | Torrance Memorial Medical Center | 106190422 | BREAST | 0 | 347 | -118.34388 | 33.81258 | 347 |
2017 | Los Angeles | Santa Monica – UCLA Medical Center and Orthopedic Hospital | 106190687 | BREAST | 0 | 338 | -118.48623 | 34.02755 | 338 |
2017 | Fresno | Clovis Community Medical Center | 106100005 | BREAST | 0 | 336 | -119.66072 | 36.83745 | 336 |
2017 | Los Angeles | Kaiser Foundation Hospital – Downey | 106196403 | BREAST | 0 | 335 | -118.1290216 | 33.9171234 | 335 |
2017 | Los Angeles | Ronald Reagan UCLA Medical Center | 106190796 | BREAST | 0 | 332 | -118.445 | 34.068889 | 332 |
2017 | Los Angeles | Kaiser Foundation Hospital – Baldwin Park | 106196035 | BREAST | 0 | 319 | -117.986 | 34.06348 | 319 |
2017 | Contra Costa | Kaiser Foundation Hospital – Walnut Creek | 106070990 | BREAST | 0 | 317 | -122.05828 | 37.8923 | 317 |
ggplot() + geom_bar(aes(y = total, x = reorder(Hospital,total)),
data = top, stat="identity",fill="maroon")+
labs(title = "Top Hospitals for Breast Cancer Surgery",
subtitle = "California, from 2013-2017",
caption = "Illustration by Joe Long")+
labs(y = "Surgery Count",x="Hospital Name")+
theme(title = element_text(size=9,face="bold"))+
coord_flip()
## Mapping
library(leaflet)
top$LONGITUDE <- as.numeric(top$LONGITUDE)
top$LATITUDE <- as.numeric(top$LATITUDE)
#Show first 20 rows
leaflet(data = top[1:20,]) %>% addTiles() %>%
addMarkers(~LONGITUDE, ~LATITUDE, popup = ~as.character(Hospital),
label = ~as.character(Hospital))
Top hospitals for colon cancer surgery
hospital_colon_2017 <- filter(hospital,Surgery=="COLON" & Year==2017)
top <- hospital_colon_2017 %>% top_n(20) %>% arrange(-total)
## Selecting by total
kable(top) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Year | County | Hospital | OSHPDID | Surgery | ICD9 | ICD10 | LONGITUDE | LATITUDE | total |
---|---|---|---|---|---|---|---|---|---|
2017 | Los Angeles | Cedars Sinai Medical Center | 106190555 | COLON | 0 | 129 | -118.38061 | 34.07681 | 129 |
2017 | Orange | Hoag Memorial Hospital Presbyterian | 106301205 | COLON | 0 | 111 | -117.92949 | 33.62526 | 111 |
2017 | San Diego | UC San Diego Health System – Hillcrest Medical Center | 106370782 | COLON | 0 | 108 | -117.16498 | 32.75387 | 108 |
2017 | San Francisco | UC San Francisco Medical Center | 106381154 | COLON | 0 | 108 | -122.4576564 | 37.7635148 | 108 |
2017 | San Diego | Sharp Memorial Hospital | 106370694 | COLON | 0 | 99 | -117.15531 | 32.80016 | 99 |
2017 | Santa Clara | Stanford Health Care | 106430905 | COLON | 0 | 99 | -122.17827 | 37.43342 | 99 |
2017 | Los Angeles | Torrance Memorial Medical Center | 106190422 | COLON | 0 | 96 | -118.34388 | 33.81258 | 96 |
2017 | San Bernardino | Kaiser Foundation Hospital – Fontana | 106361223 | COLON | 0 | 94 | -117.43549 | 34.0725 | 94 |
2017 | Orange | Kaiser Foundation Hospital – Orange County – Anaheim | 106304409 | COLON | 0 | 90 | -117.8439815 | 33.8544317 | 90 |
2017 | San Diego | Kaiser Foundation Hospital – San Diego | 106370730 | COLON | 0 | 88 | -117.09447 | 32.79287 | 88 |
2017 | San Diego | Scripps Mercy Hospital | 106370744 | COLON | 0 | 87 | -117.1609 | 32.75128 | 87 |
2017 | San Francisco | California Pacific Medical Center – Pacific Campus | 106380929 | COLON | 0 | 85 | -122.43102 | 37.79149 | 85 |
2017 | Riverside | Eisenhower Medical Center | 106331168 | COLON | 0 | 84 | -116.40808 | 33.76418 | 84 |
2017 | Los Angeles | Huntington Memorial Hospital | 106190400 | COLON | 0 | 81 | -118.1518 | 34.13565 | 81 |
2017 | Los Angeles | City of Hope Helford Clinical Research Hospital | 106190176 | COLON | 0 | 71 | -117.97152 | 34.13216 | 71 |
2017 | Los Angeles | Ronald Reagan UCLA Medical Center | 106190796 | COLON | 0 | 71 | -118.445 | 34.068889 | 71 |
2017 | Alameda | Alta Bates Summit Medical Center | 106010937 | COLON | 0 | 69 | -122.26257 | 37.82106 | 69 |
2017 | Contra Costa | Kaiser Foundation Hospital – Walnut Creek | 106070990 | COLON | 0 | 69 | -122.05828 | 37.8923 | 69 |
2017 | Placer | Kaiser Foundation Hospital – Roseville | 106314024 | COLON | 0 | 69 | -121.25107 | 38.74636 | 69 |
2017 | Riverside | Riverside Community Hospital | 106331312 | COLON | 0 | 69 | -117.37997 | 33.97641 | 69 |
ggplot() + geom_bar(aes(y = total, x = reorder(Hospital,total)),
data = top, stat="identity",fill="blue")+
labs(title = "Top Hospitals for Colon Cancer Surgery",
subtitle = "California from 2013-2017",
caption = "Illustration by Joe Long")+
labs(y = "Surgery Count",x="Hospital Name")+
theme(title = element_text(size=9,face="bold"))+
coord_flip()
hospital_brain_2017 <- filter(hospital,Surgery=="BRAIN" & Year==2017)
top <- hospital_brain_2017 %>% top_n(20) %>% arrange(-total)
## Selecting by total
ggplot() + geom_bar(aes(y = total, x = reorder(Hospital,total)),
data = top, stat="identity",fill="darkgreen")+
labs(title = "Top Hospitals for Brain Cancer Surgery",
subtitle = "California from 2013-2017",
caption = "Illustration by Joe Long")+
labs(y = "Surgery Count",x="Hospital Name")+
theme(title = element_text(size=9,face="bold"))+
coord_flip()
# Mapping of top hospitals for brain cancer surgery
library(leaflet)
top$LONGITUDE <- as.numeric(top$LONGITUDE)
top$LATITUDE <- as.numeric(top$LATITUDE)
#Show first 20 rows from the `quakes` dataset
leaflet(data = top[1:20,]) %>% addTiles() %>%
addMarkers(~LONGITUDE, ~LATITUDE, popup = ~as.character(Hospital),
label = ~as.character(Hospital))
hospital_rectum_2017 <- filter(hospital,Surgery=="RECTUM" & Year==2017)
top <- hospital_rectum_2017 %>% top_n(20) %>% arrange(-total)
## Selecting by total
kable(top) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Year | County | Hospital | OSHPDID | Surgery | ICD9 | ICD10 | LONGITUDE | LATITUDE | total |
---|---|---|---|---|---|---|---|---|---|
2017 | Santa Clara | Stanford Health Care | 106430905 | RECTUM | 0 | 103 | -122.17827 | 37.43342 | 103 |
2017 | San Francisco | UC San Francisco Medical Center | 106381154 | RECTUM | 0 | 100 | -122.4576564 | 37.7635148 | 100 |
2017 | Los Angeles | Keck Hospital of University of Southern California | 106194219 | RECTUM | 0 | 75 | -118.20287 | 34.06284 | 75 |
2017 | Los Angeles | Cedars Sinai Medical Center | 106190555 | RECTUM | 0 | 74 | -118.38061 | 34.07681 | 74 |
2017 | Orange | UC Irvine Medical Center | 106301279 | RECTUM | 0 | 58 | -117.89064 | 33.7889 | 58 |
2017 | Los Angeles | City of Hope Helford Clinical Research Hospital | 106190176 | RECTUM | 0 | 52 | -117.97152 | 34.13216 | 52 |
2017 | San Bernardino | Kaiser Foundation Hospital – Fontana | 106361223 | RECTUM | 0 | 50 | -117.43549 | 34.0725 | 50 |
2017 | San Diego | UC San Diego Health System – Hillcrest Medical Center | 106370782 | RECTUM | 0 | 47 | -117.16498 | 32.75387 | 47 |
2017 | San Francisco | Kaiser Foundation Hospital – San Francisco | 106380857 | RECTUM | 0 | 47 | -122.44289 | 37.78274 | 47 |
2017 | Orange | Hoag Memorial Hospital Presbyterian | 106301205 | RECTUM | 0 | 46 | -117.92949 | 33.62526 | 46 |
2017 | Los Angeles | Long Beach Memorial Medical Center | 106190525 | RECTUM | 0 | 44 | -118.1852 | 33.80801 | 44 |
2017 | Los Angeles | Kaiser Foundation Hospital – Sunset | 106190429 | RECTUM | 0 | 43 | -118.29545 | 34.09823 | 43 |
2017 | Los Angeles | Ronald Reagan UCLA Medical Center | 106190796 | RECTUM | 0 | 43 | -118.445 | 34.068889 | 43 |
2017 | San Bernardino | Loma Linda University Medical Center | 106361246 | RECTUM | 0 | 42 | -117.26146 | 34.05164 | 42 |
2017 | Los Angeles | Torrance Memorial Medical Center | 106190422 | RECTUM | 0 | 38 | -118.34388 | 33.81258 | 38 |
2017 | Orange | Kaiser Foundation Hospital – Orange County – Anaheim | 106304409 | RECTUM | 0 | 37 | -117.8439815 | 33.8544317 | 37 |
2017 | Los Angeles | Huntington Memorial Hospital | 106190400 | RECTUM | 0 | 35 | -118.1518 | 34.13565 | 35 |
2017 | San Diego | Scripps Mercy Hospital | 106370744 | RECTUM | 0 | 35 | -117.1609 | 32.75128 | 35 |
2017 | San Francisco | California Pacific Medical Center – Pacific Campus | 106380929 | RECTUM | 0 | 34 | -122.43102 | 37.79149 | 34 |
2017 | Contra Costa | John Muir Medical Center – Concord Campus | 106071018 | RECTUM | 0 | 33 | -122.03874 | 37.98615 | 33 |
2017 | Sacramento | Mercy San Juan Hospital | 106340950 | RECTUM | 0 | 33 | -121.31261 | 38.66818 | 33 |
2017 | San Diego | Kaiser Foundation Hospital – San Diego | 106370730 | RECTUM | 0 | 33 | -117.09447 | 32.79287 | 33 |
ggplot() + geom_bar(aes(y = total, x = reorder(Hospital,total)),
data = top, stat="identity",fill="orange")+
ggtitle("Top Hospitals for Rectum Cancer Surgery")+
coord_flip()
str(hospital)
## 'data.frame': 9786 obs. of 10 variables:
## $ Year : int 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
## $ County : chr "Alameda" "Alameda" "Alameda" "Alameda" ...
## $ Hospital : chr "Alameda Hospital" "Alameda Hospital" "Alameda Hospital" "Alta Bates Summit Medical Center – Alta Bates Campus" ...
## $ OSHPDID : int 106010735 106010735 106010735 106010739 106010739 106010739 106010739 106010739 106010739 106010739 ...
## $ Surgery : chr "STOMACH" "COLON" "BREAST" "BRAIN" ...
## $ ICD9 : chr "1" "3" "2" "8" ...
## $ ICD10 : chr "0" "0" "0" "0" ...
## $ LONGITUDE: chr "-122.25362" "-122.25362" "-122.25362" "-122.25784" ...
## $ LATITUDE : chr "37.762953" "37.762953" "37.762953" "37.85633" ...
## $ total : num 1 3 2 8 12 4 1 95 2 6 ...
## - attr(*, ".internal.selfref")=<externalptr>
hospital$LONGITUDE <- as.numeric(hospital$LONGITUDE)
## Warning: NAs introduced by coercion
hospital$LATITUDE <- as.numeric(hospital$LATITUDE)
## Warning: NAs introduced by coercion
top_hospital <- hospital %>%
group_by(Hospital) %>%
mutate(Total = sum(total))
top_hospital <- select(top_hospital,c("Hospital","LONGITUDE","LATITUDE","Total"))
top_hospital <- distinct(top_hospital)
top_hospital <- arrange(top_hospital, desc(Total))
top_hospital <- top_hospital[1:20,]
ggplot() + geom_bar(aes(y = Total, x = reorder(Hospital,Total)),
data = top_hospital, stat="identity",fill="darkred")+
ggtitle("Top Hospitals for Cancer Surgery")+
coord_flip()
kable(top_hospital) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Hospital | LONGITUDE | LATITUDE | Total |
---|---|---|---|
UC San Francisco Medical Center | -122.4577 | 37.76351 | 7121 |
Cedars Sinai Medical Center | -118.3806 | 34.07681 | 7075 |
City of Hope Helford Clinical Research Hospital | -117.9715 | 34.13216 | 5406 |
Hoag Memorial Hospital Presbyterian | -117.9295 | 33.62526 | 5372 |
Keck Hospital of University of Southern California | -118.2029 | 34.06284 | 4762 |
Ronald Reagan UCLA Medical Center | -118.4450 | 34.06889 | 4000 |
Kaiser Foundation Hospital – Fontana | -117.4355 | 34.07250 | 4000 |
UC San Diego Health System – Hillcrest Medical Center | -117.1650 | 32.75387 | 3924 |
Stanford Hospital | -122.1783 | 37.43342 | 3738 |
Kaiser Foundation Hospital – San Diego | -117.0945 | 32.79287 | 3496 |
Sharp Memorial Hospital | -117.1553 | 32.80016 | 3425 |
Stanford Health Care | -122.1783 | 37.43342 | 3255 |
Kaiser Foundation Hospital – Orange County – Anaheim | -117.8440 | 33.85443 | 3211 |
Kaiser Foundation Hospital – Sunset | -118.2955 | 34.09823 | 3108 |
Kaiser Foundation Hospital – Santa Clara | -121.9958 | 37.33460 | 3027 |
Kaiser Foundation Hospital – Downey | -118.1290 | 33.91712 | 2992 |
California Pacific Medical Center – Pacific Campus | -122.4310 | 37.79149 | 2875 |
Kaiser Foundation Hospital – West Los Angeles | -118.3757 | 34.03793 | 2841 |
Kaiser Foundation Hospital – South Sacramento | -121.4241 | 38.47144 | 2816 |
UC Davis Medical Center | -121.4574 | 38.55438 | 2501 |
Mapping
library(leaflet)
top_hospital$LONGITUDE <- as.numeric(top_hospital$LONGITUDE)
top_hospital$LATITUDE <- as.numeric(top_hospital$LATITUDE)
#Show first 20 rows from the `top hospital` dataset
leaflet(data = top_hospital[1:20,]) %>% addTiles() %>%
addMarkers(~LONGITUDE, ~LATITUDE, popup = ~as.character(Hospital),
label = ~as.character(Hospital))
#Thank you #———————– Joe Long, Cabrillo Research (951) 435-1888