#upload libraries
library(ggplot2)
library(datasets)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(skimr)
library(dplyr)
library(knitr)
library(directlabels)
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(ggrepel)
library(dplyr)
library(forcats)
#read the dataset file
c <- read.csv("data_fastfood_calories.csv")
#analyze the dataset
skim(c)
Data summary
| Name |
c |
| Number of rows |
515 |
| Number of columns |
16 |
| _______________________ |
|
| Column type frequency: |
|
| character |
2 |
| numeric |
14 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| restaurant |
0 |
1 |
5 |
11 |
0 |
8 |
0 |
| item |
0 |
1 |
5 |
63 |
0 |
505 |
0 |
Variable type: numeric
| calories |
0 |
1.00 |
530.91 |
282.44 |
20 |
330.0 |
490.0 |
690 |
2430 |
▇▆▁▁▁ |
| cal_fat |
0 |
1.00 |
238.81 |
166.41 |
0 |
120.0 |
210.0 |
310 |
1270 |
▇▃▁▁▁ |
| total_fat |
0 |
1.00 |
26.59 |
18.41 |
0 |
14.0 |
23.0 |
35 |
141 |
▇▃▁▁▁ |
| sat_fat |
0 |
1.00 |
8.15 |
6.42 |
0 |
4.0 |
7.0 |
11 |
47 |
▇▃▁▁▁ |
| trans_fat |
0 |
1.00 |
0.47 |
0.84 |
0 |
0.0 |
0.0 |
1 |
8 |
▇▁▁▁▁ |
| cholesterol |
0 |
1.00 |
72.46 |
63.16 |
0 |
35.0 |
60.0 |
95 |
805 |
▇▁▁▁▁ |
| sodium |
0 |
1.00 |
1246.74 |
689.95 |
15 |
800.0 |
1110.0 |
1550 |
6080 |
▇▆▁▁▁ |
| total_carb |
0 |
1.00 |
45.66 |
24.88 |
0 |
28.5 |
44.0 |
57 |
156 |
▅▇▂▁▁ |
| fiber |
12 |
0.98 |
4.14 |
3.04 |
0 |
2.0 |
3.0 |
5 |
17 |
▇▅▂▁▁ |
| sugar |
0 |
1.00 |
7.26 |
6.76 |
0 |
3.0 |
6.0 |
9 |
87 |
▇▁▁▁▁ |
| protein |
1 |
1.00 |
27.89 |
17.68 |
1 |
16.0 |
24.5 |
36 |
186 |
▇▂▁▁▁ |
| vit_a |
214 |
0.58 |
18.86 |
31.38 |
0 |
4.0 |
10.0 |
20 |
180 |
▇▁▁▁▁ |
| vit_c |
210 |
0.59 |
20.17 |
30.59 |
0 |
4.0 |
10.0 |
30 |
400 |
▇▁▁▁▁ |
| calcium |
210 |
0.59 |
24.85 |
25.52 |
0 |
8.0 |
20.0 |
30 |
290 |
▇▁▁▁▁ |
cal <- c %>% filter(restaurant != "Taco Bell")
table(cal$restaurant)
##
## Arbys Burger King Chick Fil-A Dairy Queen Mcdonalds Sonic
## 55 70 27 42 57 53
## Subway
## 96
cal1 <- cal %>%
group_by(restaurant) %>% mutate(median_sugar = median(sugar))
#upload the fastfood_sales document
d <- read.csv("data_fastfood_sales.csv")
#innerjoin the two datasets (fastfood_sales and calories)
cal2 <- inner_join(cal1, d, by = "restaurant")
colnames(cal2)
## [1] "restaurant" "item" "calories"
## [4] "cal_fat" "total_fat" "sat_fat"
## [7] "trans_fat" "cholesterol" "sodium"
## [10] "total_carb" "fiber" "sugar"
## [13] "protein" "vit_a" "vit_c"
## [16] "calcium" "median_sugar" "average_sales"
## [19] "us_sales" "num_company_stores" "num_franchised_stores"
## [22] "unit_count"
## look at output as a tibble
as_tibble(cal2)
## # A tibble: 331 × 22
## restaurant item calories cal_fat total_fat sat_fat trans_fat cholesterol
## <chr> <chr> <int> <int> <int> <dbl> <dbl> <int>
## 1 Mcdonalds Artisan … 380 60 7 2 0 95
## 2 Mcdonalds Single B… 840 410 45 17 1.5 130
## 3 Mcdonalds Double B… 1130 600 67 27 3 220
## 4 Mcdonalds Grilled … 750 280 31 10 0.5 155
## 5 Mcdonalds Crispy B… 920 410 45 12 0.5 120
## 6 Mcdonalds Big Mac 540 250 28 10 1 80
## 7 Mcdonalds Cheesebu… 300 100 12 5 0.5 40
## 8 Mcdonalds Classic … 510 210 24 4 0 65
## 9 Mcdonalds Double C… 430 190 21 11 1 85
## 10 Mcdonalds Double Q… 770 400 45 21 2.5 175
## # ℹ 321 more rows
## # ℹ 14 more variables: sodium <int>, total_carb <int>, fiber <int>,
## # sugar <int>, protein <int>, vit_a <int>, vit_c <int>, calcium <int>,
## # median_sugar <dbl>, average_sales <dbl>, us_sales <dbl>,
## # num_company_stores <int>, num_franchised_stores <int>, unit_count <int>
#order the bars by mean us_sales value
cal2$restaurant <- with(cal2, reorder(restaurant , us_sales, mean , na.rm=T))
summary(cal2$us_sales)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3634 4408 10028 13017 10800 37481
#plot the data
ggplot(cal2, aes(x = restaurant, y = us_sales, fill = median_sugar),color = median_sugar)+
geom_col()+
ylab("U.S. sales (in million)") +
xlab("Restaurant") +
theme_classic() +
labs(fill = "Median sugar (grams) in fast food entries")+
theme(axis.text = element_text(size = 8),
legend.text = element_text(size = 8))+
scale_fill_continuous(type = "viridis")
