#upload libraries
library(ggplot2)
library(datasets)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(skimr)
library(dplyr)
library(knitr)
library(directlabels)
library(cowplot)
## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
library(ggrepel)
library(dplyr)
library(forcats)
#read the dataset file
c <- read.csv("data_fastfood_calories.csv")

#analyze the dataset
skim(c)
Data summary
Name c
Number of rows 515
Number of columns 16
_______________________
Column type frequency:
character 2
numeric 14
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
restaurant 0 1 5 11 0 8 0
item 0 1 5 63 0 505 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
calories 0 1.00 530.91 282.44 20 330.0 490.0 690 2430 ▇▆▁▁▁
cal_fat 0 1.00 238.81 166.41 0 120.0 210.0 310 1270 ▇▃▁▁▁
total_fat 0 1.00 26.59 18.41 0 14.0 23.0 35 141 ▇▃▁▁▁
sat_fat 0 1.00 8.15 6.42 0 4.0 7.0 11 47 ▇▃▁▁▁
trans_fat 0 1.00 0.47 0.84 0 0.0 0.0 1 8 ▇▁▁▁▁
cholesterol 0 1.00 72.46 63.16 0 35.0 60.0 95 805 ▇▁▁▁▁
sodium 0 1.00 1246.74 689.95 15 800.0 1110.0 1550 6080 ▇▆▁▁▁
total_carb 0 1.00 45.66 24.88 0 28.5 44.0 57 156 ▅▇▂▁▁
fiber 12 0.98 4.14 3.04 0 2.0 3.0 5 17 ▇▅▂▁▁
sugar 0 1.00 7.26 6.76 0 3.0 6.0 9 87 ▇▁▁▁▁
protein 1 1.00 27.89 17.68 1 16.0 24.5 36 186 ▇▂▁▁▁
vit_a 214 0.58 18.86 31.38 0 4.0 10.0 20 180 ▇▁▁▁▁
vit_c 210 0.59 20.17 30.59 0 4.0 10.0 30 400 ▇▁▁▁▁
calcium 210 0.59 24.85 25.52 0 8.0 20.0 30 290 ▇▁▁▁▁
cal <- c %>% filter(restaurant != "Taco Bell")
table(cal$restaurant)
## 
##       Arbys Burger King Chick Fil-A Dairy Queen   Mcdonalds       Sonic 
##          55          70          27          42          57          53 
##      Subway 
##          96
cal1 <- cal %>% 
  group_by(restaurant) %>% mutate(median_sugar = median(sugar))
  


#upload the fastfood_sales document
d <- read.csv("data_fastfood_sales.csv")

#innerjoin the two datasets (fastfood_sales and calories)
cal2 <- inner_join(cal1, d, by = "restaurant")
colnames(cal2)
##  [1] "restaurant"            "item"                  "calories"             
##  [4] "cal_fat"               "total_fat"             "sat_fat"              
##  [7] "trans_fat"             "cholesterol"           "sodium"               
## [10] "total_carb"            "fiber"                 "sugar"                
## [13] "protein"               "vit_a"                 "vit_c"                
## [16] "calcium"               "median_sugar"          "average_sales"        
## [19] "us_sales"              "num_company_stores"    "num_franchised_stores"
## [22] "unit_count"
## look at output as a tibble
as_tibble(cal2)
## # A tibble: 331 × 22
##    restaurant item      calories cal_fat total_fat sat_fat trans_fat cholesterol
##    <chr>      <chr>        <int>   <int>     <int>   <dbl>     <dbl>       <int>
##  1 Mcdonalds  Artisan …      380      60         7       2       0            95
##  2 Mcdonalds  Single B…      840     410        45      17       1.5         130
##  3 Mcdonalds  Double B…     1130     600        67      27       3           220
##  4 Mcdonalds  Grilled …      750     280        31      10       0.5         155
##  5 Mcdonalds  Crispy B…      920     410        45      12       0.5         120
##  6 Mcdonalds  Big Mac        540     250        28      10       1            80
##  7 Mcdonalds  Cheesebu…      300     100        12       5       0.5          40
##  8 Mcdonalds  Classic …      510     210        24       4       0            65
##  9 Mcdonalds  Double C…      430     190        21      11       1            85
## 10 Mcdonalds  Double Q…      770     400        45      21       2.5         175
## # ℹ 321 more rows
## # ℹ 14 more variables: sodium <int>, total_carb <int>, fiber <int>,
## #   sugar <int>, protein <int>, vit_a <int>, vit_c <int>, calcium <int>,
## #   median_sugar <dbl>, average_sales <dbl>, us_sales <dbl>,
## #   num_company_stores <int>, num_franchised_stores <int>, unit_count <int>
#order the bars by mean us_sales value
cal2$restaurant <- with(cal2, reorder(restaurant , us_sales, mean , na.rm=T))

summary(cal2$us_sales)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3634    4408   10028   13017   10800   37481
#plot the data
ggplot(cal2, aes(x = restaurant, y = us_sales, fill = median_sugar),color = median_sugar)+
  geom_col()+
  ylab("U.S. sales (in million)") +
  xlab("Restaurant") +
  theme_classic() +
  labs(fill = "Median sugar (grams) in fast food entries")+
  theme(axis.text = element_text(size = 8),
        legend.text = element_text(size = 8))+
 scale_fill_continuous(type = "viridis")