#upload libraries
library(ggplot2)
library(datasets)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(skimr)
library(dplyr)
library(knitr)
library(directlabels)
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(ggrepel)
library(dplyr)
library(forcats)
#read the dataset file
c <- read.csv("data_fastfood_calories.csv")
#analyse the dataset
colnames(c)
## [1] "restaurant" "item" "calories" "cal_fat" "total_fat"
## [6] "sat_fat" "trans_fat" "cholesterol" "sodium" "total_carb"
## [11] "fiber" "sugar" "protein" "vit_a" "vit_c"
## [16] "calcium"
skim(c)
Data summary
| Name |
c |
| Number of rows |
515 |
| Number of columns |
16 |
| _______________________ |
|
| Column type frequency: |
|
| character |
2 |
| numeric |
14 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| restaurant |
0 |
1 |
5 |
11 |
0 |
8 |
0 |
| item |
0 |
1 |
5 |
63 |
0 |
505 |
0 |
Variable type: numeric
| calories |
0 |
1.00 |
530.91 |
282.44 |
20 |
330.0 |
490.0 |
690 |
2430 |
▇▆▁▁▁ |
| cal_fat |
0 |
1.00 |
238.81 |
166.41 |
0 |
120.0 |
210.0 |
310 |
1270 |
▇▃▁▁▁ |
| total_fat |
0 |
1.00 |
26.59 |
18.41 |
0 |
14.0 |
23.0 |
35 |
141 |
▇▃▁▁▁ |
| sat_fat |
0 |
1.00 |
8.15 |
6.42 |
0 |
4.0 |
7.0 |
11 |
47 |
▇▃▁▁▁ |
| trans_fat |
0 |
1.00 |
0.47 |
0.84 |
0 |
0.0 |
0.0 |
1 |
8 |
▇▁▁▁▁ |
| cholesterol |
0 |
1.00 |
72.46 |
63.16 |
0 |
35.0 |
60.0 |
95 |
805 |
▇▁▁▁▁ |
| sodium |
0 |
1.00 |
1246.74 |
689.95 |
15 |
800.0 |
1110.0 |
1550 |
6080 |
▇▆▁▁▁ |
| total_carb |
0 |
1.00 |
45.66 |
24.88 |
0 |
28.5 |
44.0 |
57 |
156 |
▅▇▂▁▁ |
| fiber |
12 |
0.98 |
4.14 |
3.04 |
0 |
2.0 |
3.0 |
5 |
17 |
▇▅▂▁▁ |
| sugar |
0 |
1.00 |
7.26 |
6.76 |
0 |
3.0 |
6.0 |
9 |
87 |
▇▁▁▁▁ |
| protein |
1 |
1.00 |
27.89 |
17.68 |
1 |
16.0 |
24.5 |
36 |
186 |
▇▂▁▁▁ |
| vit_a |
214 |
0.58 |
18.86 |
31.38 |
0 |
4.0 |
10.0 |
20 |
180 |
▇▁▁▁▁ |
| vit_c |
210 |
0.59 |
20.17 |
30.59 |
0 |
4.0 |
10.0 |
30 |
400 |
▇▁▁▁▁ |
| calcium |
210 |
0.59 |
24.85 |
25.52 |
0 |
8.0 |
20.0 |
30 |
290 |
▇▁▁▁▁ |
#pull out the required dataset variables
restaurant= c[, "restaurant"]
item= c[, "item"]
calories = c[, "calories"]
sodium <- c[, "sodium"]
#draw scatter plot
ggplot(c, aes(x = calories, y = sodium))+
geom_point() +
ylab("Sodium (mg)") +
xlab("Calories")+
facet_wrap(~restaurant)+
theme_bw()+
geom_hline(yintercept = 2300, color = "black", size = 0.2)+
theme(axis.text = element_text(size =4),
legend.text = element_text(size = 4)) +
geom_text_repel(data = c %>% filter(sodium > 2300),aes(label = item), direction = "x", size = 3, max.overlaps = 10,box.padding = unit(0.45, "lines"))+
theme(axis.text = element_text(size =8),
legend.text = element_text(size = 8),
panel.border = element_rect(colour = "black", fill = NA, size=0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
