# install.packages("dslabs")

library("dslabs")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.1     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
data(package="dslabs")
data("olive")

Description

This data set represents eight chemical measurements on different specimen of olive oil produced in various regions in Italy (northern Apulia, southern Apulia, Calabria, Sicily, inland Sardinia and coast Sardinia, eastern and western Liguria, Umbria).

Format

This data frame contains 572 observations, each corresponding to a different specimen of olive oil, and 10 variables. The first and the second columns of data correspond to the area and the region of origin of the olive oils respectively. Columns 3-10 represent the following eight chemical measurements on the acid components for the oil specimens which includes: palmitic, palmitoleic, stearic, oleic, linoleic, linolenic, arachidic, eicosenoic.

Summary of the Eight Oil Specimans Samples

Palmitic - most common saturated fatty acid (found in animals and plants) Palmitoleic - monosaturated fatty acid (omega-7 found in high concentration - liver) Stearic - most common saturated fatty acid (found in animnal and vegetable fats) Oleic - mono-unsaturated fatty acid (omega-9 found in animals and vegetables) Linoleic - poly-unsaturated fatty acid (omega-6 that is one of two essential fatty acids for humans. Virtually insoluble in water) Linolenic - essential fatty acid (omega-3 found in plants) Arachidic - saturated fatty acid Eicosenoic - mono-unsaturated fatty acid (omega-9 found in plant oils)

Two Plots

Two essential fatty acids for humans - Linoleic and Linolenic Acids are the two essential fatty acids for humans and must be consumed through food.

Two non-essential fatty acids (omega-9) - Oleic and Eicosenoic Acids are mono-unsaturated fatty acids that are condisdered “non-essential” because the human body produces them on its own.

#Clean Data
olive <- olive
olive [olive == ""] <- NA 
colSums(is.na(olive))
##      region        area    palmitic palmitoleic     stearic       oleic 
##           0           0           0           0           0           0 
##    linoleic   linolenic   arachidic  eicosenoic 
##           0           0           0           0
tibble (olive)
## # A tibble: 572 × 10
##    region    area  palmi…¹ palmi…² stearic oleic linol…³ linol…⁴ arach…⁵ eicos…⁶
##    <fct>     <fct>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1 Southern… Nort…   10.8     0.75    2.26  78.2    6.72    0.36    0.6     0.29
##  2 Southern… Nort…   10.9     0.73    2.24  77.1    7.81    0.31    0.61    0.29
##  3 Southern… Nort…    9.11    0.54    2.46  81.1    5.49    0.31    0.63    0.29
##  4 Southern… Nort…    9.66    0.57    2.4   79.5    6.19    0.5     0.78    0.35
##  5 Southern… Nort…   10.5     0.67    2.59  77.7    6.72    0.5     0.8     0.46
##  6 Southern… Nort…    9.11    0.49    2.68  79.2    6.78    0.51    0.7     0.44
##  7 Southern… Nort…    9.22    0.66    2.64  79.9    6.18    0.49    0.56    0.29
##  8 Southern… Nort…   11       0.61    2.35  77.3    7.34    0.39    0.64    0.35
##  9 Southern… Nort…   10.8     0.6     2.39  77.4    7.09    0.46    0.83    0.33
## 10 Southern… Nort…   10.4     0.55    2.13  79.4    6.33    0.26    0.52    0.3 
## # … with 562 more rows, and abbreviated variable names ¹​palmitic, ²​palmitoleic,
## #   ³​linoleic, ⁴​linolenic, ⁵​arachidic, ⁶​eicosenoic
summary(olive)
##             region                 area        palmitic      palmitoleic    
##  Northern Italy:151   South-Apulia   :206   Min.   : 6.10   Min.   :0.1500  
##  Sardinia      : 98   Inland-Sardinia: 65   1st Qu.:10.95   1st Qu.:0.8775  
##  Southern Italy:323   Calabria       : 56   Median :12.01   Median :1.1000  
##                       Umbria         : 51   Mean   :12.32   Mean   :1.2609  
##                       East-Liguria   : 50   3rd Qu.:13.60   3rd Qu.:1.6925  
##                       West-Liguria   : 50   Max.   :17.53   Max.   :2.8000  
##                       (Other)        : 94                                   
##     stearic          oleic          linoleic        linolenic     
##  Min.   :1.520   Min.   :63.00   Min.   : 4.480   Min.   :0.0000  
##  1st Qu.:2.050   1st Qu.:70.00   1st Qu.: 7.707   1st Qu.:0.2600  
##  Median :2.230   Median :73.03   Median :10.300   Median :0.3300  
##  Mean   :2.289   Mean   :73.12   Mean   : 9.805   Mean   :0.3189  
##  3rd Qu.:2.490   3rd Qu.:76.80   3rd Qu.:11.807   3rd Qu.:0.4025  
##  Max.   :3.750   Max.   :84.10   Max.   :14.700   Max.   :0.7400  
##                                                                   
##    arachidic       eicosenoic    
##  Min.   :0.000   Min.   :0.0100  
##  1st Qu.:0.500   1st Qu.:0.0200  
##  Median :0.610   Median :0.1700  
##  Mean   :0.581   Mean   :0.1628  
##  3rd Qu.:0.700   3rd Qu.:0.2800  
##  Max.   :1.050   Max.   :0.5800  
## 
#structure of the dataset
str(olive)
## 'data.frame':    572 obs. of  10 variables:
##  $ region     : Factor w/ 3 levels "Northern Italy",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ area       : Factor w/ 9 levels "Calabria","Coast-Sardinia",..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ palmitic   : num  10.75 10.88 9.11 9.66 10.51 ...
##  $ palmitoleic: num  0.75 0.73 0.54 0.57 0.67 0.49 0.66 0.61 0.6 0.55 ...
##  $ stearic    : num  2.26 2.24 2.46 2.4 2.59 2.68 2.64 2.35 2.39 2.13 ...
##  $ oleic      : num  78.2 77.1 81.1 79.5 77.7 ...
##  $ linoleic   : num  6.72 7.81 5.49 6.19 6.72 6.78 6.18 7.34 7.09 6.33 ...
##  $ linolenic  : num  0.36 0.31 0.31 0.5 0.5 0.51 0.49 0.39 0.46 0.26 ...
##  $ arachidic  : num  0.6 0.61 0.63 0.78 0.8 0.7 0.56 0.64 0.83 0.52 ...
##  $ eicosenoic : num  0.29 0.29 0.29 0.35 0.46 0.44 0.29 0.35 0.33 0.3 ...
#table showing data by area
table(olive$area)
## 
##        Calabria  Coast-Sardinia    East-Liguria Inland-Sardinia    North-Apulia 
##              56              33              50              65              25 
##          Sicily    South-Apulia          Umbria    West-Liguria 
##              36             206              51              50
#table showing data by region
table(olive$region)
## 
## Northern Italy       Sardinia Southern Italy 
##            151             98            323
#filter by region - Southern Italy 
italysouth <- olive %>%
filter(region =='Southern Italy' )
#filter by region - Northern Italy 
italynorth <- olive %>%
filter(region == 'Northern Italy')
#filter by region - Sardinia
sardinia <- olive %>%
  filter(region == 'Sardinia')
#install library 
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## Attaching package: 'highcharter'
## The following object is masked from 'package:dslabs':
## 
##     stars
library(devtools)
## Loading required package: usethis
#Plo1 - Linoleic and Linolenic Acids 
p1 <- olive %>% 
#establish plot and aestetics 
  hchart('scatter', hcaes(x = linoleic, y = linolenic, group = region)) %>%
#establish color 
    hc_colorAxis() %>%
#establish font for chart
hc_chart(style = list(fontFamily = "NewCenturySchoolbook",
                        fontWeight = "bold")) %>%
#establish title on x  and y axis
      hc_xAxis(title = list(text="Linoleic Acid")) %>%
      hc_yAxis(title = list(text="Linolenic Acid"))%>%
#establish title and subtitle
   hc_title( text = "Linoleic and Linolenic Acids - The Two Essential Fatty Acids for Humans") %>% 
      hc_subtitle(text = "Source: J. Zupan, and J. Gasteiger. Neural Networks in Chemistry and Drug Design") %>%
#establish theme 
   hc_add_theme(hc_theme_538()) %>%
   hc_tooltip(shared = TRUE)

p1
#Plot 2 - Oleic and Eicosenoic Acids 
p2 <- olive %>% 
#establish plot and aestetics 
  hchart('scatter', hcaes(x = oleic, y = eicosenoic, group = region)) %>%
#establish color 
    hc_colorAxis() %>%
#establish title on x  and y axis
      hc_xAxis(title = list(text="Oleic Acid")) %>%
      hc_yAxis(title = list(text="Eicosenoic Acid"))%>%
#establish font for chart
  hc_chart(style = list(fontFamily = "Georgia",
                        fontWeight = "bold")) %>%
#establish scatterplot title and subtitle
   hc_title( text = "Oleic and Eicosenoic Acids - Two Non-Essential Fatty Acids") %>% 
      hc_subtitle(text = "Source: J. Zupan, and J. Gasteiger. Neural Networks in Chemistry and Drug Design") %>%
#establish font for chart
  hc_chart(style = list(fontFamily = "Georgia",
                        fontWeight = "bold")) %>%
#establish theme 
   hc_add_theme(hc_theme_gridlight()) %>%
   hc_tooltip(shared = TRUE)

p2

```