This data was downloaded from social explorer in csv format. It was used to learn data import and data wrangling process of r. The following operations were executed by using this data:
* Importing csv data in r
* Keeping/droping observations
* Keeping/droping variables
* Renaming existing variables
* Generating new variables
* Recoding existing variables
Load library & Import Data
#Load library
library(tidyverse)
#Import data
c1<-read_csv("cancer13.csv")
Parsed with column specification:
cols(
.default = col_integer(),
Geo_NAME = col_character(),
Geo_QNAME = col_character(),
Geo_NATION = col_character(),
Geo_COUNTY = col_character(),
SE_T001_001 = col_double(),
SE_T001_002 = col_double(),
SE_T001_003 = col_double(),
SE_T001_004 = col_double(),
SE_T002_001 = col_double(),
SE_T002_002 = col_double(),
SE_T002_003 = col_double(),
SE_T002_004 = col_double(),
SE_T002_005 = col_double(),
SE_T018_001 = col_double(),
SE_T018_002 = col_double(),
SE_T018_003 = col_double(),
SE_T018_004 = col_double(),
SE_T024_001 = col_double(),
SE_T024_002 = col_double(),
SE_T024_003 = col_double()
# ... with 5 more columns
)
See spec(...) for full column specifications.
print(c1)
Keeping/dropping observation
c2<-c1[1:51,]
print(c2)
Keeping/dropping variable
c3<-c2[-c(1,3:6,11:18,24:33, 38:45,50:57, 62:69)]
print(c3)
c4<-c3[c(1,6:11,15, 19)]
print(c4)
Renaming existing variable
names(c4) <- c("state","t_cancer","t_a_g1", "t_a_g2","t_a_g3", "t_a_g4","breast","colorectal","lung")
print(c4)
Generating new variable
c4$cl_cancer<-colorectal+lung
print(c4)
Recoding existing variable
c4$category <- ifelse(c4$t_cancer < 150,
c("low"), c("high"))
print(c4)
Summarise
The data was also used for summarizing the variables.
summarise(c4, mean(t_cancer, na.rm = TRUE))
min(c4$t_cancer)
[1] 98.5
max(c4$t_cancer)
[1] 254.6
c4%>%
group_by(category)%>%
select(breast, colorectal, lung)%>%
summarise(mean_breast=mean(breast, na.rm = TRUE),
mean_colorectal=mean(colorectal, na.rm=TRUE),
mean_lung=mean(lung, na.rm=TRUE))%>%
filter(mean_breast>10|mean_colorectal>10|mean_lung>30)
Adding missing grouping variables: `category`
ggplot
The ggplots were also created by using this data.
## Total Cancer Death
c4$state <- factor(c4$state, levels = c4$state[order(-c4$t_cancer)])
ggplot(data=c4, aes(x=state, y=t_cancer, fill=state)) +
geom_bar(stat="identity")+
xlab("State")+
ylab("Total Cancer Death")+
theme(axis.text.x = element_blank())+
theme(legend.position = "right")

## Breast Cancer Death
c4$state <- factor(c4$state, levels = c4$state[order(-c4$breast)])
ggplot(data=c4, aes(x=state, y=breast, fill=state)) +
geom_bar(stat="identity")+
xlab("State")+
ylab("Breast Cancer Death")+
theme(axis.text.x = element_blank())+
theme(legend.position = "right")

###Breast cancer death in three states
ggplot(data=c5, aes(x=state, y=breast, fill=state,))+
geom_bar(stat="identity")+
xlab("State")+
ylab("Breast Cancer Death")+
ggtitle("Distribution of Total Breast Cancer Death")+
theme(axis.text.x = element_text(angle = 45))+
theme(legend.position = "none")

##Colorectal cancer death
c4$state <- factor(c4$state, levels = c4$state[order(-c4$colorectal)])
ggplot(data=c4, aes(x=state, y=colorectal, fill=state)) +
geom_bar(stat="identity")+
xlab("State")+
ylab("Colorectal Cancer Death")+
theme(axis.text.x = element_blank())+
theme(legend.position = "right")

##Lung cancer death
c4$state <- factor(c4$state, levels = c4$state[order(-c4$lung)])
ggplot(data=c4, aes(x=state, y=lung, fill=state)) +
geom_bar(stat="identity")+
xlab("State")+
ylab("Lung Cancer Death")+
theme(axis.text.x = element_blank())+
theme(legend.position = "right")

LS0tCnRpdGxlOiBDYW5jZXIgRGVhdGggaW4gMjAxMwpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpUaGlzIGRhdGEgd2FzIGRvd25sb2FkZWQgZnJvbSBzb2NpYWwgZXhwbG9yZXIgaW4gY3N2IGZvcm1hdC4gSXQgd2FzIHVzZWQgdG8gbGVhcm4gZGF0YSBpbXBvcnQgYW5kIGRhdGEgd3JhbmdsaW5nIHByb2Nlc3MgIG9mIHIuIFRoZSBmb2xsb3dpbmcgb3BlcmF0aW9ucyB3ZXJlIGV4ZWN1dGVkIGJ5IHVzaW5nIHRoaXMgZGF0YToKCiAgICAqIEltcG9ydGluZyBjc3YgZGF0YSBpbiByCiAgICAqIEtlZXBpbmcvZHJvcGluZyBvYnNlcnZhdGlvbnMKICAgICogS2VlcGluZy9kcm9waW5nIHZhcmlhYmxlcwogICAgKiBSZW5hbWluZyBleGlzdGluZyB2YXJpYWJsZXMKICAgICogR2VuZXJhdGluZyBuZXcgdmFyaWFibGVzCiAgICAqIFJlY29kaW5nIGV4aXN0aW5nIHZhcmlhYmxlcwogICAgCiAgCkxvYWQgbGlicmFyeSAmIEltcG9ydCBEYXRhCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKYzE8LXJlYWRfY3N2KCJjYW5jZXIxMy5jc3YiKQpwcmludChjMSkKYGBgCgoKCktlZXBpbmcvZHJvcHBpbmcgb2JzZXJ2YXRpb24KYGBge3J9CmMyPC1jMVsxOjUxLF0KcHJpbnQoYzIpCmBgYAoKCgpLZWVwaW5nL2Ryb3BwaW5nIHZhcmlhYmxlCmBgYHtyfQpjMzwtYzJbLWMoMSwzOjYsMTE6MTgsMjQ6MzMsIDM4OjQ1LDUwOjU3LCA2Mjo2OSldIApwcmludChjMykKYzQ8LWMzW2MoMSw2OjExLDE1LCAxOSldIApwcmludChjNCkKYGBgCgoKCgpSZW5hbWluZyBleGlzdGluZyB2YXJpYWJsZQpgYGB7cn0KCm5hbWVzKGM0KSA8LSBjKCJzdGF0ZSIsInRfY2FuY2VyIiwidF9hX2cxIiwgInRfYV9nMiIsInRfYV9nMyIsICJ0X2FfZzQiLCJicmVhc3QiLCJjb2xvcmVjdGFsIiwibHVuZyIpCnByaW50KGM0KQpgYGAKCgoKR2VuZXJhdGluZyBuZXcgdmFyaWFibGUKYGBge3J9CmM0JGNsX2NhbmNlcjwtY29sb3JlY3RhbCtsdW5nCnByaW50KGM0KQpgYGAKCgoKUmVjb2RpbmcgZXhpc3RpbmcgdmFyaWFibGUKYGBge3J9CmM0JGNhdGVnb3J5IDwtIGlmZWxzZShjNCR0X2NhbmNlciA8IDE1MCwgCiAgICAgICAgICAgICAgICAgICAgICAgIGMoImxvdyIpLCBjKCJoaWdoIikpIApwcmludChjNCkKCmBgYAoKCgoKU3VtbWFyaXNlCgoKCgoKClRoZSBkYXRhIHdhcyBhbHNvIHVzZWQgZm9yIHN1bW1hcml6aW5nIHRoZSB2YXJpYWJsZXMuIApgYGB7cn0Kc3VtbWFyaXNlKGM0LCBtZWFuKHRfY2FuY2VyLCBuYS5ybSA9IFRSVUUpKQpgYGAKCmBgYHtyfQogbWluKGM0JHRfY2FuY2VyKQogbWF4KGM0JHRfY2FuY2VyKQpgYGAKYGBge3J9CmM0JT4lCiAgIGdyb3VwX2J5KGNhdGVnb3J5KSU+JQogICBzZWxlY3QoYnJlYXN0LCBjb2xvcmVjdGFsLCBsdW5nKSU+JQogICBzdW1tYXJpc2UobWVhbl9icmVhc3Q9bWVhbihicmVhc3QsIG5hLnJtID0gVFJVRSksCiAgICBtZWFuX2NvbG9yZWN0YWw9bWVhbihjb2xvcmVjdGFsLCBuYS5ybT1UUlVFKSwKICAgIG1lYW5fbHVuZz1tZWFuKGx1bmcsIG5hLnJtPVRSVUUpKSU+JQogICBmaWx0ZXIobWVhbl9icmVhc3Q+MTB8bWVhbl9jb2xvcmVjdGFsPjEwfG1lYW5fbHVuZz4zMCkKYGBgCgoKCgpnZ3Bsb3QKCgoKCgoKClRoZSBnZ3Bsb3RzIHdlcmUgYWxzbyBjcmVhdGVkIGJ5IHVzaW5nIHRoaXMgZGF0YS4KYGBge3J9CiMjIFRvdGFsIENhbmNlciBEZWF0aCAKIGM0JHN0YXRlIDwtIGZhY3RvcihjNCRzdGF0ZSwgbGV2ZWxzID0gYzQkc3RhdGVbb3JkZXIoLWM0JHRfY2FuY2VyKV0pCiBnZ3Bsb3QoZGF0YT1jNCwgYWVzKHg9c3RhdGUsIHk9dF9jYW5jZXIsIGZpbGw9c3RhdGUpKSArCiAgIGdlb21fYmFyKHN0YXQ9ImlkZW50aXR5IikrCiAgIHhsYWIoIlN0YXRlIikrCiAgIHlsYWIoIlRvdGFsIENhbmNlciBEZWF0aCIpKwogICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfYmxhbmsoKSkrCiAgIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJyaWdodCIpCmBgYAoKCmBgYHtyfQojIyBCcmVhc3QgQ2FuY2VyIERlYXRoIAogIGM0JHN0YXRlIDwtIGZhY3RvcihjNCRzdGF0ZSwgbGV2ZWxzID0gYzQkc3RhdGVbb3JkZXIoLWM0JGJyZWFzdCldKQogZ2dwbG90KGRhdGE9YzQsIGFlcyh4PXN0YXRlLCB5PWJyZWFzdCwgZmlsbD1zdGF0ZSkpICsKICAgZ2VvbV9iYXIoc3RhdD0iaWRlbnRpdHkiKSsKICAgeGxhYigiU3RhdGUiKSsKICAgeWxhYigiQnJlYXN0IENhbmNlciBEZWF0aCIpKwogICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfYmxhbmsoKSkrCiAgIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJyaWdodCIpCmBgYAoKYGBge3J9CiMjI0JyZWFzdCBjYW5jZXIgZGVhdGggaW4gdGhyZWUgc3RhdGVzCmdncGxvdChkYXRhPWM1LCBhZXMoeD1zdGF0ZSwgeT1icmVhc3QsIGZpbGw9c3RhdGUsKSkrCiAgIGdlb21fYmFyKHN0YXQ9ImlkZW50aXR5IikrCiAgIHhsYWIoIlN0YXRlIikrCiAgIHlsYWIoIkJyZWFzdCBDYW5jZXIgRGVhdGgiKSsKICAgZ2d0aXRsZSgiRGlzdHJpYnV0aW9uIG9mIFRvdGFsIEJyZWFzdCBDYW5jZXIgRGVhdGgiKSsKICAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSkpKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJub25lIikKCmBgYAoKYGBge3J9CiMjQ29sb3JlY3RhbCBjYW5jZXIgZGVhdGgKIGM0JHN0YXRlIDwtIGZhY3RvcihjNCRzdGF0ZSwgbGV2ZWxzID0gYzQkc3RhdGVbb3JkZXIoLWM0JGNvbG9yZWN0YWwpXSkKIGdncGxvdChkYXRhPWM0LCBhZXMoeD1zdGF0ZSwgeT1jb2xvcmVjdGFsLCBmaWxsPXN0YXRlKSkgKwogICBnZW9tX2JhcihzdGF0PSJpZGVudGl0eSIpKwogICB4bGFiKCJTdGF0ZSIpKwogICB5bGFiKCJDb2xvcmVjdGFsIENhbmNlciBEZWF0aCIpKwogICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfYmxhbmsoKSkrCiAgIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJyaWdodCIpCmBgYAoKYGBge3J9CiMjTHVuZyBjYW5jZXIgZGVhdGgKYzQkc3RhdGUgPC0gZmFjdG9yKGM0JHN0YXRlLCBsZXZlbHMgPSBjNCRzdGF0ZVtvcmRlcigtYzQkbHVuZyldKQogZ2dwbG90KGRhdGE9YzQsIGFlcyh4PXN0YXRlLCB5PWx1bmcsIGZpbGw9c3RhdGUpKSArCiAgIGdlb21fYmFyKHN0YXQ9ImlkZW50aXR5IikrCiAgIHhsYWIoIlN0YXRlIikrCiAgIHlsYWIoIkx1bmcgQ2FuY2VyIERlYXRoIikrCiAgIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF9ibGFuaygpKSsKICAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gInJpZ2h0IikKYGBgCgo=