# install.packages("https://cran.rstudio.com/bin/windows/contrib/4.1/dslabs_0.7.4.zip")
# install.packages("dslabs")
library(dslabs)
data(package="dslabs")
list.files(system.file("script",package = "dslabs"))
## [1] "make-admissions.R"
## [2] "make-brca.R"
## [3] "make-brexit_polls.R"
## [4] "make-death_prob.R"
## [5] "make-divorce_margarine.R"
## [6] "make-gapminder-rdas.R"
## [7] "make-greenhouse_gases.R"
## [8] "make-historic_co2.R"
## [9] "make-mnist_27.R"
## [10] "make-movielens.R"
## [11] "make-murders-rda.R"
## [12] "make-na_example-rda.R"
## [13] "make-nyc_regents_scores.R"
## [14] "make-olive.R"
## [15] "make-outlier_example.R"
## [16] "make-polls_2008.R"
## [17] "make-polls_us_election_2016.R"
## [18] "make-reported_heights-rda.R"
## [19] "make-research_funding_rates.R"
## [20] "make-stars.R"
## [21] "make-temp_carbon.R"
## [22] "make-tissue-gene-expression.R"
## [23] "make-trump_tweets.R"
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"
data("olive")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.5 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggthemes)
library(ggrepel)
str(olive)
## 'data.frame': 572 obs. of 10 variables:
## $ region : Factor w/ 3 levels "Northern Italy",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ area : Factor w/ 9 levels "Calabria","Coast-Sardinia",..: 5 5 5 5 5 5 5 5 5 5 ...
## $ palmitic : num 10.75 10.88 9.11 9.66 10.51 ...
## $ palmitoleic: num 0.75 0.73 0.54 0.57 0.67 0.49 0.66 0.61 0.6 0.55 ...
## $ stearic : num 2.26 2.24 2.46 2.4 2.59 2.68 2.64 2.35 2.39 2.13 ...
## $ oleic : num 78.2 77.1 81.1 79.5 77.7 ...
## $ linoleic : num 6.72 7.81 5.49 6.19 6.72 6.78 6.18 7.34 7.09 6.33 ...
## $ linolenic : num 0.36 0.31 0.31 0.5 0.5 0.51 0.49 0.39 0.46 0.26 ...
## $ arachidic : num 0.6 0.61 0.63 0.78 0.8 0.7 0.56 0.64 0.83 0.52 ...
## $ eicosenoic : num 0.29 0.29 0.29 0.35 0.46 0.44 0.29 0.35 0.33 0.3 ...
write_csv(olive, "olive.csv", na="") # write the olive dataset to my folder.
Sample #1:
Data of the percentage composition of eight fatty acids found by lipid fraction of 572 Italian olive oils.
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
pairs(olive[,-(1:2)], col=olive$area)
region <- as.numeric(olive$region)
pairs(olive[,-(1:2)],
col=ifelse(region < 5, 1, ifelse(region < 7, 2, ifelse(region == 9, 4, 3))))
Sample #2:
Correlation between two variables.
# install.packages("DataExplorer")
library(DataExplorer)
plot_correlation(olive$area)
Sample #3:
Relationships between every variable and every other variable of Eight fatty acids.
library(tidyverse)
library(GGally)
data <-olive %>%
ggpairs(olive,columns = c("palmitic","palmitoleic","stearic","oleic","linoleic","linolenic","arachidic","eicosenoic"))
data
A data frame with 572 observations and 10 columns. The first column gives the region, and the second gives the area (one of Southern Italy, Sardinia, and Northern Italy), and the remaining 8 columns give the variables. Southern Italy comprises the North Apulia, Calabria, South Apulia, and Sicily regions, Sardinia is divided into Inland Sardinia and Coastal Sardinia and Northern Italy comprises the Umbria, East Liguria, and West Liguria regions.Resulting from this plot, data on the percentage composition of eight fatty acids found by lipid fraction of 572 Italian olive oils. The data comes from three regions: Southern Italy, Sardinia, and Northern Italy.