knitr::opts_chunk$set(
  echo = TRUE,
  message = FALSE,
  warning = FALSE
)

library(ggplot2)
library(dplyr)
library(knitr)

#Text manipulation #How to make Headings

Heading 1

Heading 2

Heading 3

Heading 4

Font Manipulation

this sentence is in bold

this sentence is in italics

this sentence will have a strikethrough

We can also include a link We can insert an image like this: Orange

3+3 #Can use R as a calculator
## [1] 6
x <- 3 + 5 #creating object (variable) x and using operations
x #display
## [1] 8
y  <- c(1,2,3,4,5) #Combine numbers where 1-5 is stored as y

seq(0,10, by = 2) #Built in functions like a sequence
## [1]  0  2  4  6  8 10
values <- sample.int(100,3) #Generate 3 random integers
values
## [1] 56 12 21
add_pi <- function(x){
  x + 3.14
} #Build custom functions
add_pi(8) #call functions for a specific value of variable
## [1] 11.14

Time to read in some data

We will use the built in dataset in R called “PlantGrowth”

Conducting a quick assessment of datasets

data("PlantGrowth") # select built in dataset
PlantGrowth # you can manipulate this dataset
##    weight group
## 1    4.17  ctrl
## 2    5.58  ctrl
## 3    5.18  ctrl
## 4    6.11  ctrl
## 5    4.50  ctrl
## 6    4.61  ctrl
## 7    5.17  ctrl
## 8    4.53  ctrl
## 9    5.33  ctrl
## 10   5.14  ctrl
## 11   4.81  trt1
## 12   4.17  trt1
## 13   4.41  trt1
## 14   3.59  trt1
## 15   5.87  trt1
## 16   3.83  trt1
## 17   6.03  trt1
## 18   4.89  trt1
## 19   4.32  trt1
## 20   4.69  trt1
## 21   6.31  trt2
## 22   5.12  trt2
## 23   5.54  trt2
## 24   5.50  trt2
## 25   5.37  trt2
## 26   5.29  trt2
## 27   4.92  trt2
## 28   6.15  trt2
## 29   5.80  trt2
## 30   5.26  trt2
dim(PlantGrowth)#Check the dimensions (Rows, Columns)
## [1] 30  2
str(PlantGrowth)#Check the structure (type of data, factor and levels)
## 'data.frame':    30 obs. of  2 variables:
##  $ weight: num  4.17 5.58 5.18 6.11 4.5 4.61 5.17 4.53 5.33 5.14 ...
##  $ group : Factor w/ 3 levels "ctrl","trt1",..: 1 1 1 1 1 1 1 1 1 1 ...

Data Wrangling and Visualizing using some tree data

data("Orange")
head(Orange)
##   Tree  age circumference
## 1    1  118            30
## 2    1  484            58
## 3    1  664            87
## 4    1 1004           115
## 5    1 1231           120
## 6    1 1372           142
tail(Orange)
##    Tree  age circumference
## 30    5  484            49
## 31    5  664            81
## 32    5 1004           125
## 33    5 1231           142
## 34    5 1372           174
## 35    5 1582           177
is.null(Orange) #check for null values
## [1] FALSE
orange_table <- head(Orange)
kable(orange_table)
Tree age circumference
1 118 30
1 484 58
1 664 87
1 1004 115
1 1231 120
1 1372 142
summary(Orange) #provides basic statistics
##  Tree       age         circumference  
##  3:7   Min.   : 118.0   Min.   : 30.0  
##  1:7   1st Qu.: 484.0   1st Qu.: 65.5  
##  5:7   Median :1004.0   Median :115.0  
##  2:7   Mean   : 922.1   Mean   :115.9  
##  4:7   3rd Qu.:1372.0   3rd Qu.:161.5  
##        Max.   :1582.0   Max.   :214.0
Orange %>%
    group_by(Tree) %>%
    summarize(mean_circumference <- mean(circumference))
## # A tibble: 5 × 2
##   Tree  `mean_circumference <- mean(circumference)`
##   <ord>                                       <dbl>
## 1 3                                            94  
## 2 1                                            99.6
## 3 5                                           111. 
## 4 2                                           135. 
## 5 4                                           139.
attach(Orange) #Attach function adds a data frame to the R search path
cor(circumference, age)
## [1] 0.9135189
plot(circumference, age)

plot(Orange$age, Orange$circumference) #same graph if we used the $ to extract elements in a list or data frame

plot(Orange$age, Orange$circumference, #adding titles and axis labels
     xlab="Age", ylab="Circumference",
     main="Circumference vs. Age",
     col="pink", pch=16)

ggplot(Orange, aes(x=age, y = circumference)) +  #using ggplot for more control and flexibility
  geom_point() +
  theme_light() + 
  geom_smooth(method = "lm", se=FALSE) +
  labs(x = "Age (days)",
       y= "Circumference (cm)",
       title = "Growth over time and fitted model") +
         theme(title = element_text(size = 10, face = "bold"))