library(BiocManager)
## Bioconductor version '3.13' is out-of-date; the current release version '3.14'
## is available with R version '4.1'; see https://bioconductor.org/install
library(ggplot2)
library(drawProteins)
library(pander)
library(compbio4all)
library(rentrez)
library(seqinr)
Accession Number Table
dio1_table<-c("NP_000783", "P49895","NA","Homo sapiens" , "Human", "DIO1",
"NP_001116123","NA", "NA", "Pan troglodytes" , "Chimpanzee","DIO1",
"NP_031886", "Q61153","NA","Mus musculus", "Mouse" ,"DIO1",
"NP_001091083","P24389","NA","Rattus norvegicus", "Rat", "Dio1",
"NP_001243226","Q2QEI3","NA","Xenopus tropicalis","Frog", "dio1",
"NP_001007284","F1R7E6","NA","Danio rerio", "Fish", "dio1")
# convert the vector to matrix using matrix()
dio1_table_matrix <- matrix(dio1_table, byrow = T, nrow = 6)
# convert the matrix to a dataframe using data.frame()
dio1_table <- as.data.frame(dio1_table_matrix, stringsAsFactors = F)
# name columns of dataframe using names() function
colnames(dio1_table) <- c("ncbi.protein.accession", "UniProt.id", "PDB", "species", "common.name", "gene.name")
# convert table to dataframe
dio1_table <- as.data.frame(dio1_table)
# display table
pander::pander(dio1_table)
| ncbi.protein.accession | UniProt.id | PDB | species | common.name |
|---|---|---|---|---|
| NP_000783 | P49895 | NA | Homo sapiens | Human |
| NP_001116123 | NA | NA | Pan troglodytes | Chimpanzee |
| NP_031886 | Q61153 | NA | Mus musculus | Mouse |
| NP_001091083 | P24389 | NA | Rattus norvegicus | Rat |
| NP_001243226 | Q2QEI3 | NA | Xenopus tropicalis | Frog |
| NP_001007284 | F1R7E6 | NA | Danio rerio | Fish |
| gene.name |
|---|
| DIO1 |
| DIO1 |
| DIO1 |
| Dio1 |
| dio1 |
| dio1 |
Downloaded the protein from UniProt using DIO1’s UniProt accession number.
DIO1_json <- drawProteins::get_features("P49895")
## [1] "Download has worked"
is(DIO1_json)
## [1] "list" "vector"
Convert the raw information to a dataframe, and now I can see specifically what features DIO1 has.
my_prot_df <- drawProteins::feature_to_dataframe(DIO1_json)
is(my_prot_df)
## [1] "data.frame" "list" "oldClass" "vector"
my_prot_df[,-2]
## type begin end length accession entryName taxid order
## featuresTemp CHAIN 1 249 248 P49895 IOD1_HUMAN 9606 1
## featuresTemp.1 TRANSMEM 13 33 20 P49895 IOD1_HUMAN 9606 1
## featuresTemp.2 ACT_SITE 126 126 0 P49895 IOD1_HUMAN 9606 1
## featuresTemp.3 NON_STD 126 126 0 P49895 IOD1_HUMAN 9606 1
## featuresTemp.4 VAR_SEQ 26 40 14 P49895 IOD1_HUMAN 9606 1
## featuresTemp.5 VAR_SEQ 41 249 208 P49895 IOD1_HUMAN 9606 1
## featuresTemp.6 VAR_SEQ 50 113 63 P49895 IOD1_HUMAN 9606 1
## featuresTemp.7 VAR_SEQ 50 89 39 P49895 IOD1_HUMAN 9606 1
## featuresTemp.8 VAR_SEQ 50 85 35 P49895 IOD1_HUMAN 9606 1
## featuresTemp.9 VAR_SEQ 78 109 31 P49895 IOD1_HUMAN 9606 1
## featuresTemp.10 VAR_SEQ 86 249 163 P49895 IOD1_HUMAN 9606 1
## featuresTemp.11 VAR_SEQ 90 249 159 P49895 IOD1_HUMAN 9606 1
## featuresTemp.12 VAR_SEQ 110 249 139 P49895 IOD1_HUMAN 9606 1
## featuresTemp.13 VAR_SEQ 113 160 47 P49895 IOD1_HUMAN 9606 1
## featuresTemp.14 VAR_SEQ 114 249 135 P49895 IOD1_HUMAN 9606 1
## featuresTemp.15 VAR_SEQ 161 161 0 P49895 IOD1_HUMAN 9606 1
## featuresTemp.16 VAR_SEQ 162 249 87 P49895 IOD1_HUMAN 9606 1
Since DIO1 has no motifs or domains, only folds, I use the function draw_recept_dom() to draw the receptor domain protein diagram of DIO1.
my_prot_df <- drawProteins::feature_to_dataframe(DIO1_json)
my_canvas <- draw_canvas(my_prot_df)
my_canvas <- draw_chains(my_canvas, my_prot_df, label_size = 2.5)
my_canvas <- draw_recept_dom(my_canvas, my_prot_df)
my_canvas