#upload libraries
library(ggplot2)
library(datasets)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(skimr)
library(dplyr)
library(knitr)
library(directlabels)
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(ggrepel)
library(dplyr)
#upload the fastfood_sales document
d <- read.csv("data_fastfood_sales.csv")
#check the data file
head(d)
## restaurant average_sales us_sales num_company_stores num_franchised_stores
## 1 Subway 416.86 10800.00 0 25908
## 2 Mcdonalds 2670.32 37480.67 842 13194
## 3 Starbucks 945.27 13167.61 8222 5708
## 4 Dunkin Donuts 733.13 9192.00 0 12538
## 5 Pizza Hut 900.00 5510.84 96 7426
## 6 Burger King 1387.81 10028.32 50 7196
## unit_count
## 1 25908
## 2 14036
## 3 13930
## 4 12538
## 5 7522
## 6 7266
dim(d)
## [1] 19 6
colnames(d)
## [1] "restaurant" "average_sales" "us_sales"
## [4] "num_company_stores" "num_franchised_stores" "unit_count"
#take out the required variables in the dataset
restaurant = d[, "restaurant"]
avg_sales = d[, "average_sales"]
us_sales = d[, "us_sales"]
num_comp_stores = d[, "num_company_stores"]
num_franch_stores = d[, "num_franchised_stores"]
unit_count = d[, "unit_count"]
#calculate the proporiton of franchise stores
pro <- num_franch_stores / unit_count
#plot the scatter plot
ggplot(d, aes(x = us_sales, y = unit_count, fill = pro), color = num_franch_stores )+
geom_point(color = "dodgerblue")+
geom_text_repel(label = restaurant)+
#guides(color = guide_legend("Proportion of stores franchised")) +
scale_y_continuous(trans = "log10") +
scale_x_continuous(trans = "log10") +
ylab("Total number of stores (log10 scale)") +
xlab("U.S. sales in million (log10 scale)") +
labs(fill = "Proportion of stores franchised")+
theme_minimal() +
theme(title = element_text(size = 10),
axis.text = element_text(size =8),
legend.text = element_text(size = 8),
panel.border = element_rect(colour = "black", fill = NA, size=0.5))
