data("iris")
  1. Show head, tail, dimension and structure.

To show head and tail:

head(iris)
tail(iris)
Head and Tail
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
Head
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
Tail
145 6.7 3.3 5.7 2.5 virginica
146 6.7 3.0 5.2 2.3 virginica
147 6.3 2.5 5.0 1.9 virginica
148 6.5 3.0 5.2 2.0 virginica
149 6.2 3.4 5.4 2.3 virginica
150 5.9 3.0 5.1 1.8 virginica

To display the dimension:

dim(iris)
## [1] 150   5

The iris data has 150 rows and 5 columns.

Display the structure:

str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
  1. To summarize the iris dataset:
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
  1. To select columns 1, 2 and 5:
select(iris, 1, 2, 5) 
# or with pipe command
iris %>% select(1, 2,5)
Sepal.Length Sepal.Width Species
5.1 3.5 setosa
4.9 3.0 setosa
4.7 3.2 setosa
4.6 3.1 setosa
5.0 3.6 setosa
5.4 3.9 setosa
  1. To slice rows 1-20, 51-70 and 101-120:
slice(iris, 1:20, 51:70, 101:120)
# or with pipe commannd
iris %>% slice(1:20, 51:70, 101:120) 
kable(slice(iris, 1:20, 51:70, 101:120)) %>% 
  kable_styling(bootstrap_options=c("striped", "hover", "condensed", "responsive"),
                full_width = F, position = "left")
  1. To filter out “virginica” species:
filter(iris, Species != "virginica") 
# or with pipe command
iris %>% filter(Species != "virginica")
filter(iris, Species != "virginica") 
  1. To sort data by “Sepal.Length”:
iris %>% arrange(Sepal.Length) 
  1. To add a new column called “log10.Sepal.Length”:
iris %>% dplyr::mutate(log10.Sepal.Length = log10(Sepal.Length)) 
  1. To filter out Sepal.Length less than or equal to 5.0:
iris %>% filter(Sepal.Length <= 5.0) 
  1. To use pipes to filter out species “virginica”, then select columns 1, 2 and 5 and then filter out Sepal.Length <= 5.0
iris %>% select(1, 2, 5) %>%
  filter(Species != "virginica") %>%
  filter(Sepal.Length <=5.0) 
  1. To use scatter ggplot of Sepal.Length by Sepal.Width:
ggplot(data = iris, aes(x=Sepal.Length, y=Sepal.Width, group=Species, color=Species)) +
  geom_point()

  1. To manula enter the data set from question 4 and make a scatter plot of OD by hour:
OD600 <- tibble("strain"=c("A", "B", "C"), `0` = c(1.28E-3, 6E-6, 0), 
                    `5` = c(3.48E-2, 1.0E-5, 0),
                    `10` = c(3.70E-1, 1.2E-1, 0),
                `15` = c(9.99E-1, 9.10E-2, 1.72E-4),
                `20` = c(9.99E-1, 9.98E-1, 3.83E-3),
                `25` = c(0.999999, 9.98E-1, 8.74E-1),
                `30` = c(1.00, 1.00, 9.99E-1))
OD600
# convert data set to long format
OD600.long <- OD600 %>% gather(key = hours, value = growth, c(`0`, `5`, `10`, 
                               `15`, `20`, `25`, `30`), -strain)
OD600.long
ggplot(data=OD600.long, aes(x=hours, y=growth, group=strain, color=strain)) + 
  geom_line() + geom_point()

  1. To manually enter the data set from question 5
express <- tibble("gene" = c("gene.1", "gene.2", "gene.3", "gene.4", "gene.5"),
                  "cell.line.1" = c(11.89, 8.73, 10.66, 5.27, 6.18),
                  "cell.line.2" = c(7.29, 8.96, 13.00, 6.89, 4.03), 
                  "cell.line.3" = c(13.01, 3.73, 12.09, 4.39, 10.35),
                  "cell.line.4" = c(8.58, 3.98, 3.69, 12.77, 8.65),
                  "cell.line.5" = c(9.05, 12.63, 12.84, 13.59, 11.83))
express.long <- express %>% gather(key = cell.line, value = expression, cell.line.1, 
                                   cell.line.2, cell.line.3, cell.line.4, 
                                   cell.line.5, -gene)
express.long
express.g1 <- ggplot(data=express.long, aes(x=gene, y=expression, group=gene, color=gene)) 
express.g1 + geom_boxplot()

ggplot(data=express.long, aes(x=gene, y=expression)) + 
  geom_boxplot() + facet_wrap(~cell.line)