install packages

library("dslabs")
data(package="dslabs")
##list.files(system.file("script", package = "dslabs"))

import olive oil data set

data("olive")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.0     ✓ dplyr   1.0.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggthemes)
library(ggrepel)
view(olive)
write_csv(olive, "olive.csv", na="")
summary(olive)
##             region                 area        palmitic      palmitoleic    
##  Northern Italy:151   South-Apulia   :206   Min.   : 6.10   Min.   :0.1500  
##  Sardinia      : 98   Inland-Sardinia: 65   1st Qu.:10.95   1st Qu.:0.8775  
##  Southern Italy:323   Calabria       : 56   Median :12.01   Median :1.1000  
##                       Umbria         : 51   Mean   :12.32   Mean   :1.2609  
##                       East-Liguria   : 50   3rd Qu.:13.60   3rd Qu.:1.6925  
##                       West-Liguria   : 50   Max.   :17.53   Max.   :2.8000  
##                       (Other)        : 94                                   
##     stearic          oleic          linoleic        linolenic     
##  Min.   :1.520   Min.   :63.00   Min.   : 4.480   Min.   :0.0000  
##  1st Qu.:2.050   1st Qu.:70.00   1st Qu.: 7.707   1st Qu.:0.2600  
##  Median :2.230   Median :73.03   Median :10.300   Median :0.3300  
##  Mean   :2.289   Mean   :73.12   Mean   : 9.805   Mean   :0.3189  
##  3rd Qu.:2.490   3rd Qu.:76.80   3rd Qu.:11.807   3rd Qu.:0.4025  
##  Max.   :3.750   Max.   :84.10   Max.   :14.700   Max.   :0.7400  
##                                                                   
##    arachidic       eicosenoic    
##  Min.   :0.000   Min.   :0.0100  
##  1st Qu.:0.500   1st Qu.:0.0200  
##  Median :0.610   Median :0.1700  
##  Mean   :0.581   Mean   :0.1628  
##  3rd Qu.:0.700   3rd Qu.:0.2800  
##  Max.   :1.050   Max.   :0.5800  
## 

view the structure of data

str(olive)
## 'data.frame':    572 obs. of  10 variables:
##  $ region     : Factor w/ 3 levels "Northern Italy",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ area       : Factor w/ 9 levels "Calabria","Coast-Sardinia",..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ palmitic   : num  10.75 10.88 9.11 9.66 10.51 ...
##  $ palmitoleic: num  0.75 0.73 0.54 0.57 0.67 0.49 0.66 0.61 0.6 0.55 ...
##  $ stearic    : num  2.26 2.24 2.46 2.4 2.59 2.68 2.64 2.35 2.39 2.13 ...
##  $ oleic      : num  78.2 77.1 81.1 79.5 77.7 ...
##  $ linoleic   : num  6.72 7.81 5.49 6.19 6.72 6.78 6.18 7.34 7.09 6.33 ...
##  $ linolenic  : num  0.36 0.31 0.31 0.5 0.5 0.51 0.49 0.39 0.46 0.26 ...
##  $ arachidic  : num  0.6 0.61 0.63 0.78 0.8 0.7 0.56 0.64 0.83 0.52 ...
##  $ eicosenoic : num  0.29 0.29 0.29 0.35 0.46 0.44 0.29 0.35 0.33 0.3 ...

Setting up Scatter plot

ds_theme_set() ## using DS Theme
olive %>% ggplot(aes(x = linoleic, y = arachidic, label = area)) + ## setting x and Y axis and label 
  geom_point(aes(color=region), size = 1) + ## smaller size point for lots of sample data
  geom_text_repel(nudge_x = 0.005) +
  scale_x_log10("Loinolenic Acid as % of Sample") + 
  scale_y_log10("Arachidic Acid as % of Sample") +
  ggtitle("Linoleic and Arachidic Content of Italian Olive Oil By Region") +
  scale_color_discrete(name="Region") +
  theme_solarized()
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: ggrepel: 558 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Notes about olive oil chemical characteristics:

source: https://www.oliveoilsource.com/page/chemical-characteristics

The major fatty acids in olive oil triacylglycerols are:

Oleic Acid (C18:1), a monounsaturated omega-9 fatty acid. It makes up 55 to 83% of olive oil.

Linoleic Acid (C18:2), a polyunsaturated omega-6 fatty acid that makes up about 3.5 to 21% of olive oil.

Palmitic Acid (C16:0), a saturated fatty acid that makes up 7.5 to 20% of olive oil.

Stearic Acid (C18:0), a saturated fatty acid that makes up 0.5 to 5% of olive oil.

Linolenic Acid (C18:3)(specifically alpha-Linolenic Acid), a polyunsaturated omega-3 fatty acid that makes up 0 to 1.5% of olive oil.

Percentage of Linolenic Acid Allowed in Olive Oil

Regarding the polyunsaturated fatty acids (PUFAs), there is a wide range acceptable for extra virgin olive oil, however the linolenic acid has to be less than 0.9% per the International Olive Oil Council (IOOC) guidelines. Higher levels, e.g. 1.5%, do not present a nutritional problem, but the IOOC uses the linolenic acid level to establish the authenticity of the olive oil. Seed oils like canola oil have higher levels of linolenic acid.