2+2
## [1] 4
5-3
## [1] 2
4+2*(10/2)
## [1] 14
sqrt(9)
## [1] 3
a <- c(2, 3, 4)
2*a
## [1] 4 6 8
rm(list=ls())
library(here) #set directory
## here() starts at /Users/tonisignes/Desktop/Drive UMH/1. PROJECTS/PR344_Nitra_Slovakia_Scientific_Articles/3. Posit Projects/Posit_Intro_EuroDisBioFood
library(datasets) #build in datasets
library(tidyverse) #The tidyverse is an opinionated collection of R packages designed for data science
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.5 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Upload the data as a built-in dataset.
data(iris)
help(iris)
names(iris) <- tolower(names(iris)) #This is to use lowercase names.
names(iris)
## [1] "sepal.length" "sepal.width" "petal.length" "petal.width" "species"
dim(iris)
## [1] 150 5
head(iris)
## sepal.length sepal.width petal.length petal.width species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
tail(iris)
## sepal.length sepal.width petal.length petal.width species
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
summary(iris)
## sepal.length sepal.width petal.length petal.width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ sepal.length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ sepal.width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ petal.length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ petal.width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
table(iris$species)
##
## setosa versicolor virginica
## 50 50 50
Upload the data as an external dataset.
dt <- read.csv(here("data", "iris.csv"))
names(iris)
## [1] "sepal.length" "sepal.width" "petal.length" "petal.width" "species"
dim(iris)
## [1] 150 5
head(iris)
## sepal.length sepal.width petal.length petal.width species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
tail(iris)
## sepal.length sepal.width petal.length petal.width species
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
summary(iris)
## sepal.length sepal.width petal.length petal.width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ sepal.length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ sepal.width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ petal.length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ petal.width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
table(iris$species)
##
## setosa versicolor virginica
## 50 50 50
# filter() the data for species virginica
virginica <- filter(iris, species == "virginica")
dim(virginica)
## [1] 50 5
head(virginica) # This dispalys the first six rows
## sepal.length sepal.width petal.length petal.width species
## 1 6.3 3.3 6.0 2.5 virginica
## 2 5.8 2.7 5.1 1.9 virginica
## 3 7.1 3.0 5.9 2.1 virginica
## 4 6.3 2.9 5.6 1.8 virginica
## 5 6.5 3.0 5.8 2.2 virginica
## 6 7.6 3.0 6.6 2.1 virginica
sepalLength6 <- filter(iris, species == "virginica", sepal.length > 6)
dim(sepalLength6)
## [1] 41 5
tail(sepalLength6) # compare this to head()
## sepal.length sepal.width petal.length petal.width species
## 36 6.8 3.2 5.9 2.3 virginica
## 37 6.7 3.3 5.7 2.5 virginica
## 38 6.7 3.0 5.2 2.3 virginica
## 39 6.3 2.5 5.0 1.9 virginica
## 40 6.5 3.0 5.2 2.0 virginica
## 41 6.2 3.4 5.4 2.3 virginica
# select() the specified columns
selected <- iris %>% select( sepal.length, sepal.width, petal.length)
# select all columns from sepal.length to petal.length
selected2 <- iris %>% select(sepal.length:petal.length)
head(selected, 3)
## sepal.length sepal.width petal.length
## 1 5.1 3.5 1.4
## 2 4.9 3.0 1.4
## 3 4.7 3.2 1.3
head(selected2, 3)
## sepal.length sepal.width petal.length
## 1 5.1 3.5 1.4
## 2 4.9 3.0 1.4
## 3 4.7 3.2 1.3
# create a new column that stores logical values for sepal.width greater than half of sepal.length
newCol <- iris %>% mutate(greater.half = sepal.width > 0.5 * sepal.length)
newCol$half_sepal_length <- 0.5 * newCol$sepal.length
tail(newCol)
## sepal.length sepal.width petal.length petal.width species greater.half
## 145 6.7 3.3 5.7 2.5 virginica FALSE
## 146 6.7 3.0 5.2 2.3 virginica FALSE
## 147 6.3 2.5 5.0 1.9 virginica FALSE
## 148 6.5 3.0 5.2 2.0 virginica FALSE
## 149 6.2 3.4 5.4 2.3 virginica TRUE
## 150 5.9 3.0 5.1 1.8 virginica TRUE
## half_sepal_length
## 145 3.35
## 146 3.35
## 147 3.15
## 148 3.25
## 149 3.10
## 150 2.95
plot(iris)
# use ?plot to read more about other arguments
plot(iris$sepal.width, iris$sepal.length)
# ?hist will give you details on more arguments
hist(iris$sepal.width)
# library(ggplot2)
ggplot(iris, aes(x = sepal.length, y = sepal.width)) + geom_point()
ggplot(iris, aes(x = sepal.length, y = sepal.width, colour = species)) + geom_point()
ggplot(iris, aes(x = sepal.length, fill = species)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(iris, aes(x = sepal.length, colour = species)) + geom_density()
ggplot(iris,
aes(x = species, y = sepal.length)) +
geom_boxplot()
ggplot(iris, aes(x = sepal.length, y = sepal.width)) + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(iris, aes(x = sepal.length, y = sepal.width)) + geom_point() + geom_smooth(aes(colour = species))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
base <- ggplot(iris, aes(x = sepal.length, y = sepal.width)) + geom_point()
base + facet_wrap(~species)
petal.length.aov <- aov(formula = petal.length ~ species, data = iris)
summary(petal.length.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## species 2 437.1 218.55 1180 <2e-16 ***
## Residuals 147 27.2 0.19
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Safe the results from the ANOVA
sink(file = "output/petal-length-anova.txt")
summary(object = petal.length.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## species 2 437.1 218.55 1180 <2e-16 ***
## Residuals 147 27.2 0.19
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sink()