# install.packages("dslabs")
library("dslabs")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.1 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
data(package="dslabs")
data("olive")
This data set represents eight chemical measurements on different specimen of olive oil produced in various regions in Italy (northern Apulia, southern Apulia, Calabria, Sicily, inland Sardinia and coast Sardinia, eastern and western Liguria, Umbria).
This data frame contains 572 observations, each corresponding to a different specimen of olive oil, and 10 variables. The first and the second columns of data correspond to the area and the region of origin of the olive oils respectively. Columns 3-10 represent the following eight chemical measurements on the acid components for the oil specimens which includes: palmitic, palmitoleic, stearic, oleic, linoleic, linolenic, arachidic, eicosenoic.
Palmitic - most common saturated fatty acid (found in animals and plants) Palmitoleic - monosaturated fatty acid (omega-7 found in high concentration - liver) Stearic - most common saturated fatty acid (found in animnal and vegetable fats) Oleic - mono-unsaturated fatty acid (omega-9 found in animals and vegetables) Linoleic - poly-unsaturated fatty acid (omega-6 that is one of two essential fatty acids for humans. Virtually insoluble in water) Linolenic - essential fatty acid (omega-3 found in plants) Arachidic - saturated fatty acid Eicosenoic - mono-unsaturated fatty acid (omega-9 found in plant oils)
Two essential fatty acids for humans by area - Linoleic and Linolenic Acids are the two essential fatty acids for humans and must be consumed through food.
Two non-essential fatty acids, (omega-9), by area - Oleic and Eicosenoic Acids are mono-unsaturated fatty acids that are condisdered “non-essential” because the human body produces them on its own.
#Clean Data
olive <- olive
olive [olive == ""] <- NA
colSums(is.na(olive))
## region area palmitic palmitoleic stearic oleic
## 0 0 0 0 0 0
## linoleic linolenic arachidic eicosenoic
## 0 0 0 0
tibble (olive)
## # A tibble: 572 × 10
## region area palmi…¹ palmi…² stearic oleic linol…³ linol…⁴ arach…⁵ eicos…⁶
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Southern… Nort… 10.8 0.75 2.26 78.2 6.72 0.36 0.6 0.29
## 2 Southern… Nort… 10.9 0.73 2.24 77.1 7.81 0.31 0.61 0.29
## 3 Southern… Nort… 9.11 0.54 2.46 81.1 5.49 0.31 0.63 0.29
## 4 Southern… Nort… 9.66 0.57 2.4 79.5 6.19 0.5 0.78 0.35
## 5 Southern… Nort… 10.5 0.67 2.59 77.7 6.72 0.5 0.8 0.46
## 6 Southern… Nort… 9.11 0.49 2.68 79.2 6.78 0.51 0.7 0.44
## 7 Southern… Nort… 9.22 0.66 2.64 79.9 6.18 0.49 0.56 0.29
## 8 Southern… Nort… 11 0.61 2.35 77.3 7.34 0.39 0.64 0.35
## 9 Southern… Nort… 10.8 0.6 2.39 77.4 7.09 0.46 0.83 0.33
## 10 Southern… Nort… 10.4 0.55 2.13 79.4 6.33 0.26 0.52 0.3
## # … with 562 more rows, and abbreviated variable names ¹palmitic, ²palmitoleic,
## # ³linoleic, ⁴linolenic, ⁵arachidic, ⁶eicosenoic
summary(olive)
## region area palmitic palmitoleic
## Northern Italy:151 South-Apulia :206 Min. : 6.10 Min. :0.1500
## Sardinia : 98 Inland-Sardinia: 65 1st Qu.:10.95 1st Qu.:0.8775
## Southern Italy:323 Calabria : 56 Median :12.01 Median :1.1000
## Umbria : 51 Mean :12.32 Mean :1.2609
## East-Liguria : 50 3rd Qu.:13.60 3rd Qu.:1.6925
## West-Liguria : 50 Max. :17.53 Max. :2.8000
## (Other) : 94
## stearic oleic linoleic linolenic
## Min. :1.520 Min. :63.00 Min. : 4.480 Min. :0.0000
## 1st Qu.:2.050 1st Qu.:70.00 1st Qu.: 7.707 1st Qu.:0.2600
## Median :2.230 Median :73.03 Median :10.300 Median :0.3300
## Mean :2.289 Mean :73.12 Mean : 9.805 Mean :0.3189
## 3rd Qu.:2.490 3rd Qu.:76.80 3rd Qu.:11.807 3rd Qu.:0.4025
## Max. :3.750 Max. :84.10 Max. :14.700 Max. :0.7400
##
## arachidic eicosenoic
## Min. :0.000 Min. :0.0100
## 1st Qu.:0.500 1st Qu.:0.0200
## Median :0.610 Median :0.1700
## Mean :0.581 Mean :0.1628
## 3rd Qu.:0.700 3rd Qu.:0.2800
## Max. :1.050 Max. :0.5800
##
#structure of the dataset
str(olive)
## 'data.frame': 572 obs. of 10 variables:
## $ region : Factor w/ 3 levels "Northern Italy",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ area : Factor w/ 9 levels "Calabria","Coast-Sardinia",..: 5 5 5 5 5 5 5 5 5 5 ...
## $ palmitic : num 10.75 10.88 9.11 9.66 10.51 ...
## $ palmitoleic: num 0.75 0.73 0.54 0.57 0.67 0.49 0.66 0.61 0.6 0.55 ...
## $ stearic : num 2.26 2.24 2.46 2.4 2.59 2.68 2.64 2.35 2.39 2.13 ...
## $ oleic : num 78.2 77.1 81.1 79.5 77.7 ...
## $ linoleic : num 6.72 7.81 5.49 6.19 6.72 6.78 6.18 7.34 7.09 6.33 ...
## $ linolenic : num 0.36 0.31 0.31 0.5 0.5 0.51 0.49 0.39 0.46 0.26 ...
## $ arachidic : num 0.6 0.61 0.63 0.78 0.8 0.7 0.56 0.64 0.83 0.52 ...
## $ eicosenoic : num 0.29 0.29 0.29 0.35 0.46 0.44 0.29 0.35 0.33 0.3 ...
#table showing data by area
table(olive$area)
##
## Calabria Coast-Sardinia East-Liguria Inland-Sardinia North-Apulia
## 56 33 50 65 25
## Sicily South-Apulia Umbria West-Liguria
## 36 206 51 50
#table showing data by region
table(olive$region)
##
## Northern Italy Sardinia Southern Italy
## 151 98 323
#filter by region - Southern Italy
italysouth <- olive %>%
filter(region =='Southern Italy' )
#filter by region - Northern Italy
italynorth <- olive %>%
filter(region == 'Northern Italy')
#filter by region - Sardinia
sardinia <- olive %>%
filter(region == 'Sardinia')
#install library
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'highcharter'
## The following object is masked from 'package:dslabs':
##
## stars
library(devtools)
## Loading required package: usethis
#Plo1 - Linoleic and Linolenic Acids
p1 <- olive %>%
#establish plot and aestetics
hchart('scatter', hcaes(x = linoleic, y = linolenic, group = area)) %>%
#establish color
hc_colorAxis() %>%
#establish font for chart
hc_chart(style = list(fontFamily = "NewCenturySchoolbook",
fontWeight = "bold")) %>%
#establish title on x and y axis
hc_xAxis(title = list(text="Linoleic Acid")) %>%
hc_yAxis(title = list(text="Linolenic Acid"))%>%
#establish title and subtitle
hc_title( text = "Linoleic and Linolenic Acids - The Two Essential Fatty Acids for Humans by Area") %>%
hc_subtitle(text = "Source: J. Zupan, and J. Gasteiger. Neural Networks in Chemistry and Drug Design") %>%
#establish theme
hc_add_theme(hc_theme_gridlight()) %>%
hc_tooltip(shared = TRUE)
p1
#Plot 2 - Oleic and Eicosenoic Acids
p2 <- olive %>%
#establish plot and aestetics
hchart('scatter', hcaes(x = oleic, y = eicosenoic, group = area)) %>%
#establish color
hc_colorAxis() %>%
#establish title on x and y axis
hc_xAxis(title = list(text="Oleic Acid")) %>%
hc_yAxis(title = list(text="Eicosenoic Acid"))%>%
#establish font for chart
hc_chart(style = list(fontFamily = "Georgia",
fontWeight = "bold")) %>%
#establish scatterplot title and subtitle
hc_title( text = "Oleic and Eicosenoic Acids - Two Non-Essential Fatty Acids by Area") %>%
hc_subtitle(text = "Source: J. Zupan, and J. Gasteiger. Neural Networks in Chemistry and Drug Design") %>%
#establish font for chart
hc_chart(style = list(fontFamily = "Georgia",
fontWeight = "bold")) %>%
#establish theme
hc_add_theme(hc_theme_gridlight()) %>%
hc_tooltip(shared = TRUE)
p2
```