This data was downloaded from social explorer in csv format. It was used to learn data import and data wrangling process of r. The following operations were executed by using this data:

* Importing csv data in r
* Keeping/droping observations
* Keeping/droping variables
* Renaming existing variables
* Generating new variables
* Recoding existing variables

Load library & Import Data

 #Load library
library(tidyverse)
#Import data
c1<-read_csv("cancer13.csv")
Parsed with column specification:
cols(
  .default = col_integer(),
  Geo_NAME = col_character(),
  Geo_QNAME = col_character(),
  Geo_NATION = col_character(),
  Geo_COUNTY = col_character(),
  SE_T001_001 = col_double(),
  SE_T001_002 = col_double(),
  SE_T001_003 = col_double(),
  SE_T001_004 = col_double(),
  SE_T002_001 = col_double(),
  SE_T002_002 = col_double(),
  SE_T002_003 = col_double(),
  SE_T002_004 = col_double(),
  SE_T002_005 = col_double(),
  SE_T018_001 = col_double(),
  SE_T018_002 = col_double(),
  SE_T018_003 = col_double(),
  SE_T018_004 = col_double(),
  SE_T024_001 = col_double(),
  SE_T024_002 = col_double(),
  SE_T024_003 = col_double()
  # ... with 5 more columns
)
See spec(...) for full column specifications.
print(c1)

Keeping/dropping observation

c2<-c1[1:51,]
print(c2)

Keeping/dropping variable

c3<-c2[-c(1,3:6,11:18,24:33, 38:45,50:57, 62:69)] 
print(c3)
c4<-c3[c(1,6:11,15, 19)] 
print(c4)

Renaming existing variable

names(c4) <- c("state","t_cancer","t_a_g1", "t_a_g2","t_a_g3", "t_a_g4","breast","colorectal","lung")
print(c4)

Generating new variable

c4$cl_cancer<-colorectal+lung
print(c4)

Recoding existing variable

c4$category <- ifelse(c4$t_cancer < 150, 
                        c("low"), c("high")) 
print(c4)

Summarise

The data was also used for summarizing the variables.

summarise(c4, mean(t_cancer, na.rm = TRUE))
 min(c4$t_cancer)
[1] 98.5
 max(c4$t_cancer)
[1] 254.6
c4%>%
   group_by(category)%>%
   select(breast, colorectal, lung)%>%
   summarise(mean_breast=mean(breast, na.rm = TRUE),
    mean_colorectal=mean(colorectal, na.rm=TRUE),
    mean_lung=mean(lung, na.rm=TRUE))%>%
   filter(mean_breast>10|mean_colorectal>10|mean_lung>30)
Adding missing grouping variables: `category`

ggplot

The ggplots were also created by using this data.

## Total Cancer Death 
 c4$state <- factor(c4$state, levels = c4$state[order(-c4$t_cancer)])
 ggplot(data=c4, aes(x=state, y=t_cancer, fill=state)) +
   geom_bar(stat="identity")+
   xlab("State")+
   ylab("Total Cancer Death")+
   theme(axis.text.x = element_blank())+
   theme(legend.position = "right")

## Breast Cancer Death 
  c4$state <- factor(c4$state, levels = c4$state[order(-c4$breast)])
 ggplot(data=c4, aes(x=state, y=breast, fill=state)) +
   geom_bar(stat="identity")+
   xlab("State")+
   ylab("Breast Cancer Death")+
   theme(axis.text.x = element_blank())+
   theme(legend.position = "right")

###Breast cancer death in three states
ggplot(data=c5, aes(x=state, y=breast, fill=state,))+
   geom_bar(stat="identity")+
   xlab("State")+
   ylab("Breast Cancer Death")+
   ggtitle("Distribution of Total Breast Cancer Death")+
   theme(axis.text.x = element_text(angle = 45))+
  theme(legend.position = "none")

##Colorectal cancer death
 c4$state <- factor(c4$state, levels = c4$state[order(-c4$colorectal)])
 ggplot(data=c4, aes(x=state, y=colorectal, fill=state)) +
   geom_bar(stat="identity")+
   xlab("State")+
   ylab("Colorectal Cancer Death")+
   theme(axis.text.x = element_blank())+
   theme(legend.position = "right")

##Lung cancer death
c4$state <- factor(c4$state, levels = c4$state[order(-c4$lung)])
 ggplot(data=c4, aes(x=state, y=lung, fill=state)) +
   geom_bar(stat="identity")+
   xlab("State")+
   ylab("Lung Cancer Death")+
   theme(axis.text.x = element_blank())+
   theme(legend.position = "right")

LS0tCnRpdGxlOiBDYW5jZXIgRGVhdGggaW4gMjAxMwpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpUaGlzIGRhdGEgd2FzIGRvd25sb2FkZWQgZnJvbSBzb2NpYWwgZXhwbG9yZXIgaW4gY3N2IGZvcm1hdC4gSXQgd2FzIHVzZWQgdG8gbGVhcm4gZGF0YSBpbXBvcnQgYW5kIGRhdGEgd3JhbmdsaW5nIHByb2Nlc3MgIG9mIHIuIFRoZSBmb2xsb3dpbmcgb3BlcmF0aW9ucyB3ZXJlIGV4ZWN1dGVkIGJ5IHVzaW5nIHRoaXMgZGF0YToKCiAgICAqIEltcG9ydGluZyBjc3YgZGF0YSBpbiByCiAgICAqIEtlZXBpbmcvZHJvcGluZyBvYnNlcnZhdGlvbnMKICAgICogS2VlcGluZy9kcm9waW5nIHZhcmlhYmxlcwogICAgKiBSZW5hbWluZyBleGlzdGluZyB2YXJpYWJsZXMKICAgICogR2VuZXJhdGluZyBuZXcgdmFyaWFibGVzCiAgICAqIFJlY29kaW5nIGV4aXN0aW5nIHZhcmlhYmxlcwogICAgCiAgCkxvYWQgbGlicmFyeSAmIEltcG9ydCBEYXRhCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKYzE8LXJlYWRfY3N2KCJjYW5jZXIxMy5jc3YiKQpwcmludChjMSkKYGBgCgoKCktlZXBpbmcvZHJvcHBpbmcgb2JzZXJ2YXRpb24KYGBge3J9CmMyPC1jMVsxOjUxLF0KcHJpbnQoYzIpCmBgYAoKCgpLZWVwaW5nL2Ryb3BwaW5nIHZhcmlhYmxlCmBgYHtyfQpjMzwtYzJbLWMoMSwzOjYsMTE6MTgsMjQ6MzMsIDM4OjQ1LDUwOjU3LCA2Mjo2OSldIApwcmludChjMykKYzQ8LWMzW2MoMSw2OjExLDE1LCAxOSldIApwcmludChjNCkKYGBgCgoKCgpSZW5hbWluZyBleGlzdGluZyB2YXJpYWJsZQpgYGB7cn0KCm5hbWVzKGM0KSA8LSBjKCJzdGF0ZSIsInRfY2FuY2VyIiwidF9hX2cxIiwgInRfYV9nMiIsInRfYV9nMyIsICJ0X2FfZzQiLCJicmVhc3QiLCJjb2xvcmVjdGFsIiwibHVuZyIpCnByaW50KGM0KQpgYGAKCgoKR2VuZXJhdGluZyBuZXcgdmFyaWFibGUKYGBge3J9CmM0JGNsX2NhbmNlcjwtY29sb3JlY3RhbCtsdW5nCnByaW50KGM0KQpgYGAKCgoKUmVjb2RpbmcgZXhpc3RpbmcgdmFyaWFibGUKYGBge3J9CmM0JGNhdGVnb3J5IDwtIGlmZWxzZShjNCR0X2NhbmNlciA8IDE1MCwgCiAgICAgICAgICAgICAgICAgICAgICAgIGMoImxvdyIpLCBjKCJoaWdoIikpIApwcmludChjNCkKCmBgYAoKCgoKU3VtbWFyaXNlCgoKCgoKClRoZSBkYXRhIHdhcyBhbHNvIHVzZWQgZm9yIHN1bW1hcml6aW5nIHRoZSB2YXJpYWJsZXMuIApgYGB7cn0Kc3VtbWFyaXNlKGM0LCBtZWFuKHRfY2FuY2VyLCBuYS5ybSA9IFRSVUUpKQpgYGAKCmBgYHtyfQogbWluKGM0JHRfY2FuY2VyKQogbWF4KGM0JHRfY2FuY2VyKQpgYGAKYGBge3J9CmM0JT4lCiAgIGdyb3VwX2J5KGNhdGVnb3J5KSU+JQogICBzZWxlY3QoYnJlYXN0LCBjb2xvcmVjdGFsLCBsdW5nKSU+JQogICBzdW1tYXJpc2UobWVhbl9icmVhc3Q9bWVhbihicmVhc3QsIG5hLnJtID0gVFJVRSksCiAgICBtZWFuX2NvbG9yZWN0YWw9bWVhbihjb2xvcmVjdGFsLCBuYS5ybT1UUlVFKSwKICAgIG1lYW5fbHVuZz1tZWFuKGx1bmcsIG5hLnJtPVRSVUUpKSU+JQogICBmaWx0ZXIobWVhbl9icmVhc3Q+MTB8bWVhbl9jb2xvcmVjdGFsPjEwfG1lYW5fbHVuZz4zMCkKYGBgCgoKCgpnZ3Bsb3QKCgoKCgoKClRoZSBnZ3Bsb3RzIHdlcmUgYWxzbyBjcmVhdGVkIGJ5IHVzaW5nIHRoaXMgZGF0YS4KYGBge3J9CiMjIFRvdGFsIENhbmNlciBEZWF0aCAKIGM0JHN0YXRlIDwtIGZhY3RvcihjNCRzdGF0ZSwgbGV2ZWxzID0gYzQkc3RhdGVbb3JkZXIoLWM0JHRfY2FuY2VyKV0pCiBnZ3Bsb3QoZGF0YT1jNCwgYWVzKHg9c3RhdGUsIHk9dF9jYW5jZXIsIGZpbGw9c3RhdGUpKSArCiAgIGdlb21fYmFyKHN0YXQ9ImlkZW50aXR5IikrCiAgIHhsYWIoIlN0YXRlIikrCiAgIHlsYWIoIlRvdGFsIENhbmNlciBEZWF0aCIpKwogICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfYmxhbmsoKSkrCiAgIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJyaWdodCIpCmBgYAoKCmBgYHtyfQojIyBCcmVhc3QgQ2FuY2VyIERlYXRoIAogIGM0JHN0YXRlIDwtIGZhY3RvcihjNCRzdGF0ZSwgbGV2ZWxzID0gYzQkc3RhdGVbb3JkZXIoLWM0JGJyZWFzdCldKQogZ2dwbG90KGRhdGE9YzQsIGFlcyh4PXN0YXRlLCB5PWJyZWFzdCwgZmlsbD1zdGF0ZSkpICsKICAgZ2VvbV9iYXIoc3RhdD0iaWRlbnRpdHkiKSsKICAgeGxhYigiU3RhdGUiKSsKICAgeWxhYigiQnJlYXN0IENhbmNlciBEZWF0aCIpKwogICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfYmxhbmsoKSkrCiAgIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJyaWdodCIpCmBgYAoKYGBge3J9CiMjI0JyZWFzdCBjYW5jZXIgZGVhdGggaW4gdGhyZWUgc3RhdGVzCmdncGxvdChkYXRhPWM1LCBhZXMoeD1zdGF0ZSwgeT1icmVhc3QsIGZpbGw9c3RhdGUsKSkrCiAgIGdlb21fYmFyKHN0YXQ9ImlkZW50aXR5IikrCiAgIHhsYWIoIlN0YXRlIikrCiAgIHlsYWIoIkJyZWFzdCBDYW5jZXIgRGVhdGgiKSsKICAgZ2d0aXRsZSgiRGlzdHJpYnV0aW9uIG9mIFRvdGFsIEJyZWFzdCBDYW5jZXIgRGVhdGgiKSsKICAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSkpKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJub25lIikKCmBgYAoKYGBge3J9CiMjQ29sb3JlY3RhbCBjYW5jZXIgZGVhdGgKIGM0JHN0YXRlIDwtIGZhY3RvcihjNCRzdGF0ZSwgbGV2ZWxzID0gYzQkc3RhdGVbb3JkZXIoLWM0JGNvbG9yZWN0YWwpXSkKIGdncGxvdChkYXRhPWM0LCBhZXMoeD1zdGF0ZSwgeT1jb2xvcmVjdGFsLCBmaWxsPXN0YXRlKSkgKwogICBnZW9tX2JhcihzdGF0PSJpZGVudGl0eSIpKwogICB4bGFiKCJTdGF0ZSIpKwogICB5bGFiKCJDb2xvcmVjdGFsIENhbmNlciBEZWF0aCIpKwogICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfYmxhbmsoKSkrCiAgIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJyaWdodCIpCmBgYAoKYGBge3J9CiMjTHVuZyBjYW5jZXIgZGVhdGgKYzQkc3RhdGUgPC0gZmFjdG9yKGM0JHN0YXRlLCBsZXZlbHMgPSBjNCRzdGF0ZVtvcmRlcigtYzQkbHVuZyldKQogZ2dwbG90KGRhdGE9YzQsIGFlcyh4PXN0YXRlLCB5PWx1bmcsIGZpbGw9c3RhdGUpKSArCiAgIGdlb21fYmFyKHN0YXQ9ImlkZW50aXR5IikrCiAgIHhsYWIoIlN0YXRlIikrCiAgIHlsYWIoIkx1bmcgQ2FuY2VyIERlYXRoIikrCiAgIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF9ibGFuaygpKSsKICAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gInJpZ2h0IikKYGBgCgo=