DS labs HWK

library(tidyverse)
Warning: package 'ggplot2' was built under R version 4.5.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.1     ✔ stringr   1.5.2
✔ ggplot2   4.0.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("dslabs")
Warning: package 'dslabs' was built under R version 4.5.3
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
 [1] "make-admissions.R"                   
 [2] "make-brca.R"                         
 [3] "make-brexit_polls.R"                 
 [4] "make-calificaciones.R"               
 [5] "make-death_prob.R"                   
 [6] "make-divorce_margarine.R"            
 [7] "make-gapminder-rdas.R"               
 [8] "make-greenhouse_gases.R"             
 [9] "make-historic_co2.R"                 
[10] "make-mice_weights.R"                 
[11] "make-mnist_127.R"                    
[12] "make-mnist_27.R"                     
[13] "make-movielens.R"                    
[14] "make-murders-rda.R"                  
[15] "make-na_example-rda.R"               
[16] "make-nyc_regents_scores.R"           
[17] "make-olive.R"                        
[18] "make-outlier_example.R"              
[19] "make-polls_2008.R"                   
[20] "make-polls_us_election_2016.R"       
[21] "make-pr_death_counts.R"              
[22] "make-reported_heights-rda.R"         
[23] "make-research_funding_rates.R"       
[24] "make-results_us_election_2012.R"     
[25] "make-stars.R"                        
[26] "make-temp_carbon.R"                  
[27] "make-tissue-gene-expression.R"       
[28] "make-trump_tweets.R"                 
[29] "make-weekly_us_contagious_diseases.R"
[30] "save-gapminder-example-csv.R"        
data("olive")
head(olive)
          region         area palmitic palmitoleic stearic oleic linoleic
1 Southern Italy North-Apulia    10.75        0.75    2.26 78.23     6.72
2 Southern Italy North-Apulia    10.88        0.73    2.24 77.09     7.81
3 Southern Italy North-Apulia     9.11        0.54    2.46 81.13     5.49
4 Southern Italy North-Apulia     9.66        0.57    2.40 79.52     6.19
5 Southern Italy North-Apulia    10.51        0.67    2.59 77.71     6.72
6 Southern Italy North-Apulia     9.11        0.49    2.68 79.24     6.78
  linolenic arachidic eicosenoic
1      0.36      0.60       0.29
2      0.31      0.61       0.29
3      0.31      0.63       0.29
4      0.50      0.78       0.35
5      0.50      0.80       0.46
6      0.51      0.70       0.44
sum(is.na(olive$oleic))
[1] 0
sum(is.na(olive$linoleic))
[1] 0
sum(is.na(olive$region))
[1] 0
ggplot(olive, aes(x = oleic, y = linoleic, color = region, size = palmitic)) +
  
  geom_point(alpha = 0.7) +
  
  facet_wrap(~ region) +
  
  labs(
    title = "Relationship Between Oleic and Linoleic Acid by Region",
    x = "Oleic Acid (%)",
    y = "Linoleic Acid (%)",
    color = "Region",
    size = "Palmitic Acid (%)" ) +
  theme_minimal(base_size = 12) 

ggplot(olive, aes(x = oleic, y = linoleic, color = region, size = palmitic)) +
  
  geom_point(alpha = 0.7) +
  
  labs(
    title = "Relationship Between Oleic and Linoleic Acid by Region",
    x = "Oleic Acid (%)",
    y = "Linoleic Acid (%)",
    color = "Region",
    size = "Palmitic Acid (%)" ) +
  
  theme_minimal(base_size = 12)

I chose the olive dataset because my country is known for having good quality olive oil, so I found it interesting to work with. This dataset includes information about the fatty acid composition of olive oil samples from different regions in Italy. Before making the graph, I checked the variables I used (oleic, linoleic, palmitic, and region) and there were no missing values, so I didn’t need to do any data cleaning. I created a scatterplot to look at the relationship between oleic acid and linoleic acid. I also used color to show the different regions and size to represent the palmitic acid levels. From the graph, I noticed that as oleic acid increases, linoleic acid decreases. I also saw that the points form clusters by region, which suggests that olive oil composition changes depending on the region.