Basic R calculation

2+2

## [1] 4

5-3

## [1] 2

4+2*(10/2)

## [1] 14

sqrt(9)

## [1] 3

a <- c(2, 3, 4)

2*a

## [1] 4 6 8

Install and load libraries

rm(list=ls())
library(here) #set directory

## here() starts at /Users/tonisignes/Desktop/Drive UMH/1. PROJECTS/PR344_Nitra_Slovakia_Scientific_Articles/3. Posit Projects/Posit_Intro_EuroDisBioFood

library(datasets) #build in datasets
library(tidyverse) #The tidyverse is an opinionated collection of R packages designed for data science

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Iris dataset

Upload the data as a built-in dataset.

data(iris)
help(iris)
names(iris) <- tolower(names(iris)) #This is to use lowercase names.
names(iris)

## [1] "sepal.length" "sepal.width"  "petal.length" "petal.width"  "species"

dim(iris)

## [1] 150   5

head(iris)

##   sepal.length sepal.width petal.length petal.width species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

tail(iris)

##     sepal.length sepal.width petal.length petal.width   species
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica

summary(iris)

##   sepal.length    sepal.width     petal.length    petal.width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
##

str(iris)

## 'data.frame':    150 obs. of  5 variables:
##  $ sepal.length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ sepal.width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ petal.length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ petal.width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

table(iris$species)

## 
##     setosa versicolor  virginica 
##         50         50         50

Upload the data as an external dataset.

dt <- read.csv(here("data", "iris.csv"))
names(iris)

## [1] "sepal.length" "sepal.width"  "petal.length" "petal.width"  "species"

dim(iris)

## [1] 150   5

head(iris)

##   sepal.length sepal.width petal.length petal.width species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

tail(iris)

##     sepal.length sepal.width petal.length petal.width   species
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica

summary(iris)

##   sepal.length    sepal.width     petal.length    petal.width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
##

str(iris)

## 'data.frame':    150 obs. of  5 variables:
##  $ sepal.length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ sepal.width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ petal.length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ petal.width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

table(iris$species)

## 
##     setosa versicolor  virginica 
##         50         50         50

# filter() the data for species virginica
virginica <- filter(iris, species == "virginica")
dim(virginica)

## [1] 50  5

head(virginica) # This dispalys the first six rows

##   sepal.length sepal.width petal.length petal.width   species
## 1          6.3         3.3          6.0         2.5 virginica
## 2          5.8         2.7          5.1         1.9 virginica
## 3          7.1         3.0          5.9         2.1 virginica
## 4          6.3         2.9          5.6         1.8 virginica
## 5          6.5         3.0          5.8         2.2 virginica
## 6          7.6         3.0          6.6         2.1 virginica

sepalLength6 <- filter(iris, species == "virginica", sepal.length > 6)
dim(sepalLength6)

## [1] 41  5

tail(sepalLength6) # compare this to head()

##    sepal.length sepal.width petal.length petal.width   species
## 36          6.8         3.2          5.9         2.3 virginica
## 37          6.7         3.3          5.7         2.5 virginica
## 38          6.7         3.0          5.2         2.3 virginica
## 39          6.3         2.5          5.0         1.9 virginica
## 40          6.5         3.0          5.2         2.0 virginica
## 41          6.2         3.4          5.4         2.3 virginica

# select() the specified columns
selected <- iris %>% select( sepal.length, sepal.width, petal.length)
# select all columns from sepal.length to petal.length
selected2 <- iris %>% select(sepal.length:petal.length)
head(selected, 3)

##   sepal.length sepal.width petal.length
## 1          5.1         3.5          1.4
## 2          4.9         3.0          1.4
## 3          4.7         3.2          1.3

head(selected2, 3)

##   sepal.length sepal.width petal.length
## 1          5.1         3.5          1.4
## 2          4.9         3.0          1.4
## 3          4.7         3.2          1.3

# create a new column that stores logical values for sepal.width greater than half of sepal.length
newCol <- iris %>% mutate(greater.half = sepal.width > 0.5 * sepal.length)
newCol$half_sepal_length <- 0.5 * newCol$sepal.length
tail(newCol)

##     sepal.length sepal.width petal.length petal.width   species greater.half
## 145          6.7         3.3          5.7         2.5 virginica        FALSE
## 146          6.7         3.0          5.2         2.3 virginica        FALSE
## 147          6.3         2.5          5.0         1.9 virginica        FALSE
## 148          6.5         3.0          5.2         2.0 virginica        FALSE
## 149          6.2         3.4          5.4         2.3 virginica         TRUE
## 150          5.9         3.0          5.1         1.8 virginica         TRUE
##     half_sepal_length
## 145              3.35
## 146              3.35
## 147              3.15
## 148              3.25
## 149              3.10
## 150              2.95

Visualization

plot(iris)

# use ?plot to read more about other arguments
plot(iris$sepal.width, iris$sepal.length)

# ?hist will give you details on more arguments
hist(iris$sepal.width)

# library(ggplot2)
ggplot(iris, aes(x = sepal.length, y = sepal.width)) + geom_point()

ggplot(iris, aes(x = sepal.length, y = sepal.width, colour = species)) + geom_point()

ggplot(iris, aes(x = sepal.length, fill = species)) + geom_histogram()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(iris, aes(x = sepal.length, colour = species)) + geom_density()

ggplot(iris, 
       aes(x = species, y = sepal.length)) + 
        geom_boxplot()

ggplot(iris, aes(x = sepal.length, y = sepal.width)) + geom_point() + geom_smooth()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(iris, aes(x = sepal.length, y = sepal.width)) + geom_point() + geom_smooth(aes(colour = species))

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

base <- ggplot(iris, aes(x = sepal.length, y = sepal.width)) + geom_point()

base + facet_wrap(~species)

Inference

petal.length.aov <- aov(formula = petal.length ~ species, data = iris)
summary(petal.length.aov)

##              Df Sum Sq Mean Sq F value Pr(>F)    
## species       2  437.1  218.55    1180 <2e-16 ***
## Residuals   147   27.2    0.19                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Safe the results from the ANOVA

sink(file = "output/petal-length-anova.txt")
summary(object = petal.length.aov)

##              Df Sum Sq Mean Sq F value Pr(>F)    
## species       2  437.1  218.55    1180 <2e-16 ***
## Residuals   147   27.2    0.19                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

sink()

Practice Exercises

Basic R calculation

Install and load libraries

Iris dataset

Visualization

Inference