Assignment 2

# Load in CRAN packages
ll <- c('tidyverse','magrittr','forcats','stringr','cowplot','broom','scales','reshape2','ggrepel')
sapply(ll,library,character.only=T)

Question 1

# (A) Generate sequence of odd numbers
A1 <- seq(1,99,2)
A1 %>% head

## [1]  1  3  5  7  9 11

# (B) Generate sequence of even numbers
B1 <- seq(2,100,2) 
B1 %>% head

## [1]  2  4  6  8 10 12

# (C) column bind
C1 <- cbind(A1,B1)
C1 %>% head

##      A1 B1
## [1,]  1  2
## [2,]  3  4
## [3,]  5  6
## [4,]  7  8
## [5,]  9 10
## [6,] 11 12

# (D) row bind
D1 <- rbind(A1,B1)
D1 %>% t %>% head

##      A1 B1
## [1,]  1  2
## [2,]  3  4
## [3,]  5  6
## [4,]  7  8
## [5,]  9 10
## [6,] 11 12

# (#) Get row sums for C/D
apply(C1,1,sum) %>% head

## [1]  3  7 11 15 19 23

apply(D1,1,sum)

##   A1   B1 
## 2500 2550

Question 2

# (A) Put in vector
solar.rad <- c(11.1,10.6,6.3,8.8,10.7,11.2,8.9,12.2)
# (B) Get mean/median and sd
B2 <- data.frame(mean=mean(solar.rad),median=median(solar.rad),sd=sd(solar.rad))
B2

##    mean median       sd
## 1 9.975  10.65 1.877498

# (C) Add 10 and then get the same statistics
sr10 <- solar.rad+10
C2 <- data.frame(mean=mean(sr10),median=median(sr10),sd=sd(sr10))
C2

##     mean median       sd
## 1 19.975  20.65 1.877498

# Print the difference
print(B2-C2)

##   mean median            sd
## 1  -10    -10 -2.220446e-16

# (D) Multiple each obsrevations from C by -2
srm2 <- sr10*-2
D2 <- data.frame(mean=mean(srm2),median=median(srm2),sd=sd(srm2))
D2

##     mean median       sd
## 1 -39.95  -41.3 3.754997

# (F) Plot the histogram
gg.hist <- ggplot(data.frame(solar.rad,sr10,srm2) %>% gather,aes(x=value)) + 
  geom_histogram(aes(fill=key),binwidth = 2.5,color='black') + 
  labs(y='Bin count',x='Value',subtitle='Histogram - 2.5 bin width') + 
  theme(legend.position = c(1/2,1/2)) + 
  scale_fill_discrete(name='')
# (G) Plot the point plot
gg.scat <- ggplot(data.frame(gh=factor(1:length(solar.rad)),y=solar.rad),aes(x=gh,y=y)) + 
  geom_point() + 
  labs(x='Greenhouse',y='Solar radiation',subtitle='Point (scatter) plot')
# Combine and print
plot_grid(gg.hist,gg.scat,nrow=1,labels=c('A','B'))

Question 3

# Read in the yeast data
yeast <- read.table('Yeast.txt') %>% cbind(gene=rownames(.),.) %>% tbl_df
# Print average expression value
yeast %>% filter(gene %in% c('YLR407W','YJR006W')) %>% 
  gather(var,val,-gene) %>% group_by(gene) %>% summarise(mean=mean(val))

## # A tibble: 2 × 2
##      gene          mean
##    <fctr>         <dbl>
## 1 YJR006W -5.555556e-04
## 2 YLR407W  3.758191e-18

Assignment 2

Erik Drysdale

Question 1

Question 2

Question 3