library (tidyverse)
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.3.4 v dplyr 0.7.4
## v tidyr 0.7.2 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
library (readxl)
library (ggplot2)
BMI <- read_csv("C:/Users/ThuyAnh/Desktop/ITKM549/BMI.csv")
## Parsed with column specification:
## cols(
## Gender = col_integer(),
## age = col_integer(),
## weight = col_double(),
## height = col_double()
## )
summary(BMI)
## Gender age weight height
## Min. :1.0 Min. :18.00 Min. : 97.0 Min. :56.90
## 1st Qu.:1.0 1st Qu.:29.75 1st Qu.:142.2 1st Qu.:63.38
## Median :2.0 Median :45.00 Median :165.3 Median :65.50
## Mean :1.6 Mean :46.38 Mean :171.7 Mean :65.97
## 3rd Qu.:2.0 3rd Qu.:63.25 3rd Qu.:202.9 3rd Qu.:68.50
## Max. :2.0 Max. :84.00 Max. :324.5 Max. :77.00
## NA's :1
marketing <- read_excel("C:/Users/ThuyAnh/Desktop/ITKM549/Marketing.xlsx")
summary(marketing)
## MarketID MarketSize LocationID AgeOfStore
## Min. : 1.000 Length:548 Min. : 1.0 Min. : 1.000
## 1st Qu.: 3.000 Class :character 1st Qu.:216.0 1st Qu.: 4.000
## Median : 6.000 Mode :character Median :504.0 Median : 7.000
## Mean : 5.715 Mean :479.7 Mean : 8.504
## 3rd Qu.: 8.000 3rd Qu.:708.0 3rd Qu.:12.000
## Max. :10.000 Max. :920.0 Max. :28.000
## Promotion Week SalesInThousands
## Min. :1.000 Min. :1.00 Min. :17.34
## 1st Qu.:1.000 1st Qu.:1.75 1st Qu.:42.55
## Median :2.000 Median :2.50 Median :50.20
## Mean :2.029 Mean :2.50 Mean :53.47
## 3rd Qu.:3.000 3rd Qu.:3.25 3rd Qu.:60.48
## Max. :3.000 Max. :4.00 Max. :99.65
BMI <- data.frame(BMI) %>%
na.omit()
marketing <- data.frame(marketing) %>%
mutate(MarketSize= ifelse(MarketSize == "Large",3,
ifelse(MarketSize == "Medium",2,1)),
LocationID= as.numeric(LocationID),
MarketID = as.numeric(MarketID))
1.Create a function that calculates BMI
calculateBMI <- function(height,weight){
x = (weight/(height)^2)*703
return(x)
}
2.Test the function created in question1 on the BMI.csv data
BMI$BMI = calculateBMI(BMI$weight,BMI$height)
summary(BMI)
## Gender age weight height
## Min. :1.000 Min. :18.00 Min. : 97.0 Min. :56.90
## 1st Qu.:1.000 1st Qu.:29.50 1st Qu.:142.2 1st Qu.:63.45
## Median :2.000 Median :45.00 Median :165.3 Median :65.50
## Mean :1.596 Mean :46.28 Mean :171.7 Mean :66.03
## 3rd Qu.:2.000 3rd Qu.:63.50 3rd Qu.:202.9 3rd Qu.:68.50
## Max. :2.000 Max. :84.00 Max. :324.5 Max. :77.00
## BMI
## Min. :0.482
## 1st Qu.:1.134
## Median :1.688
## Mean :1.885
## 3rd Qu.:2.310
## Max. :4.617
3.Create a function that creates a histogram
hist_fun <- function(data, x){
ggplot(data, aes_string(x=x)) +
geom_histogram(binwidth = 1,
stat= "bin",
color = "black",
fill = "blue") +
ggtitle(paste("Histogram of", x) )+
xlab(x)+
ylab ("Number of Stores")
}
4.Test the function developed in question 3 on any variable of the marketing data
hist_fun(marketing,"MarketID")
