####Run through this example and try to understand what is going on with the data

####So, lets load RMarkdown

library(rmarkdown)

set messages to FALSE on everything (prevents certain boring things from being shown in the results)

knitr::opts_chunk$set(echo = FALSE, message=FALSE,warning=FALSE,collapse = FALSE)

PACKAGES

COLORS

LOAD DATA AND MAKE AN OVERALL HEATMAP

##  [1] "Gene_Symbol"             "Description"            
##  [3] "Peptides"                "MCF10A_1"               
##  [5] "MCF10A_2"                "MCF7_1"                 
##  [7] "MCF7_2"                  "MDA231_1"               
##  [9] "MDA231_2"                "MDA468_1"               
## [11] "MDA468_2"                "SKBR3_1"                
## [13] "SKBR3_2"                 "pvalue_MCF7_vs_MCF10A"  
## [15] "pvalue_MDA231_vs_MCF10A" "pvalue_MDA468_vs_MCF10A"
## [17] "pvalue_SKBR3_vs_MCF10A"

now let’s visualize the dataset and look for initial trends. We can do this by making a matrix so and then a heatmap to visualize the data

##      log_MCF7 log_MDA231 log_MDA468 log_SKBR3
## [1,]    -0.43       1.90      -0.74         0
## [2,]    -0.96      -0.35      -0.71         0
## [3,]     0.27       0.29      -0.04         0
## [4,]     0.88       0.78      -0.05         0
## [5,]     0.79       1.19       0.92         0
## [6,]    -0.79      -0.90      -1.28         0

#control is MCF10A cell line

Data Manipulation

Diving deeping with VOLCANO PLOTS

Barplots of significant points of interest

EXAMPLES OF A COUPLE PROTEINS or GENES

## # A tibble: 6 × 5
##   Gene_Symbol Description                             Peptides variable value
##   <chr>       <chr>                                      <dbl> <chr>    <dbl>
## 1 NES         Nestin OS=Homo sapiens GN=NES PE=1 SV=2        7 MCF10A_1  9.54
## 2 NES         Nestin OS=Homo sapiens GN=NES PE=1 SV=2        7 MCF10A_2  4.58
## 3 NES         Nestin OS=Homo sapiens GN=NES PE=1 SV=2        7 MCF7_1    5.07
## 4 NES         Nestin OS=Homo sapiens GN=NES PE=1 SV=2        7 MCF7_2    5.42
## 5 NES         Nestin OS=Homo sapiens GN=NES PE=1 SV=2        7 MDA231_1 25.4 
## 6 NES         Nestin OS=Homo sapiens GN=NES PE=1 SV=2        7 MDA231_2 27.4
## # A tibble: 6 × 5
##   Gene_Symbol Description                                Peptides variable value
##   <chr>       <chr>                                         <dbl> <chr>    <dbl>
## 1 HLA-A       HLA class I histocompatibility antigen, A…        3 MCF10A_1  5.42
## 2 HLA-A       HLA class I histocompatibility antigen, A…        3 MCF10A_2  6.62
## 3 HLA-A       HLA class I histocompatibility antigen, A…        3 MCF7_1    2.22
## 4 HLA-A       HLA class I histocompatibility antigen, A…        3 MCF7_2    2.69
## 5 HLA-A       HLA class I histocompatibility antigen, A…        3 MDA231_1  4.56
## 6 HLA-A       HLA class I histocompatibility antigen, A…        3 MDA231_2  4.23

##INTERPRETATION## ## What can you see in this figure? are the repeated measures/reps similar or different? What does this say about the precision and accuracy of them? ##How does the control compare to the variables? Is this what you might expect? Why? What would you look for in the literature to support this idea?

##INTERPRETATION## ## What can you see in this figure? are the repeated measures/reps similar or different? What does this say about the precision and accuracy of them? ##How does the control compare to the variables? Is this what you might expect? Why? What would you look for in the literature to support this idea?

#interpretation HINT:insert a chunk and create two seprate lines of code that filter for your specific upregulated genes/proteins of interest and selects for only their gene symbols and descriptions. Do this for the downregulated as well. This will generate two list of the descriptors for each gene of interest, helping you understand your figures. Be sure to view it, not just ask for the head of the table generated.

## # A tibble: 5 × 2
##   Gene_Symbol Description                                                       
##   <chr>       <chr>                                                             
## 1 TDP2        Isoform 2 of Tyrosyl-DNA phosphodiesterase 2 OS=Homo sapiens GN=T…
## 2 DENND4C     Isoform 2 of DENN domain-containing protein 4C OS=Homo sapiens GN…
## 3 KRT23       Keratin, type I cytoskeletal 23 OS=Homo sapiens GN=KRT23 PE=1 SV=2
## 4 FNBP1L      Isoform 2 of Formin-binding protein 1-like OS=Homo sapiens GN=FNB…
## 5 GCLC        Glutamate--cysteine ligase catalytic subunit OS=Homo sapiens GN=G…
## # A tibble: 6 × 2
##   Gene_Symbol Description                                                       
##   <chr>       <chr>                                                             
## 1 HLA-A       HLA class I histocompatibility antigen, A-23 alpha chain OS=Homo …
## 2 HLA-A       HLA class I histocompatibility antigen, A-30 alpha chain OS=Homo …
## 3 HLA-A       HLA class I histocompatibility antigen, A-2 alpha chain OS=Homo s…
## 4 HMGN5       High mobility group nucleosome-binding domain-containing protein …
## 5 MYO1B       Isoform 2 of Myosin-Ib OS=Homo sapiens GN=MYO1B                   
## 6 HLA-A       HLA class I histocompatibility antigen, A-1 alpha chain OS=Homo s…

upregulated<-bc_data%>% filter (Gene_Symbol==“TDP2” | Gene_Symbol==“KRT23” | Gene_Symbol==“DENND4C” | Gene_Symbol==“FNBP1L” | Gene_Symbol==“GCLC”“%>% select(Gene_Symbol, Description)

WRAP UP

##now you can knit this and publish to save and share your code. Use this to work with either the brain or breast cells and the Part_C_template to complete your lab 6 ELN. ##Annotate when you have trouble and reference which line of code you need help on ## good luck and have fun!