library(Biobase)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colnames,
## do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, lengths, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff,
## sort, table, tapply, union, unique, unsplit, which, which.max,
## which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
library(GEOquery)
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
#gset <- getGEO("GSE89093", GSEMatrix =TRUE, getGPL=FALSE)
#if (length(gset) > 1) idx <- grep("GPL13534", attr(gset, "names")) else idx <- 1
#gset <- gset[[idx]]
# set parameters and draw the plot
# dev.new(width=4+dim(gset)[[2]]/5, height=6)
# par(mar=c(2+round(max(nchar(sampleNames(gset)))/2),4,2,1))
# title <- paste ("GSE89093", '/', annotation(gset), " selected samples", sep ='')
# boxplot(exprs(gset), boxwex=0.7, notch=T, main=title, outline=FALSE, las=2)
#
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
metadata <- read_excel("Metadata.xlsx")
# dat <- tbl_df(table(metadata$`Cancer location`))
dat <- tbl_df(na.omit(metadata$`Cancer location`))
#names(dat) <- c("Types", "Counts")
names(dat)
## [1] "value"
dat$value <- gsub("\\b(.)", "\\U\\1", dat$value, perl=TRUE)
dat$value <- gsub("Gland_thyroid", "Thyroid", dat$value)
dat$value <- gsub("Endometrium", "Uterine", dat$value)
dat$value <- gsub("Melanoma", "Skin", dat$value)
library(ggplot2)
science_theme = theme(axis.line = element_line(size = 0.7, color = "black"), legend.position = "none", text = element_text(size = 14, color = "black"), axis.text=element_text(size=15), axis.title=element_text(size=17), panel.background = element_rect(fill = "white"))
p <- ggplot(dat, aes(value)) + science_theme+ geom_bar() + xlab("Cancer Location") + ylab("Counts") + scale_y_continuous(expand = c(0,0))
print(p)
