library (tidyverse)
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1     v purrr   0.2.4
## v tibble  1.3.4     v dplyr   0.7.4
## v tidyr   0.7.2     v stringr 1.2.0
## v readr   1.1.1     v forcats 0.2.0
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readr)
library (readxl)
library (ggplot2)
BMI <- read_csv("C:/Users/ThuyAnh/Desktop/ITKM549/BMI.csv") 
## Parsed with column specification:
## cols(
##   Gender = col_integer(),
##   age = col_integer(),
##   weight = col_double(),
##   height = col_double()
## )
summary(BMI)
##      Gender         age            weight          height     
##  Min.   :1.0   Min.   :18.00   Min.   : 97.0   Min.   :56.90  
##  1st Qu.:1.0   1st Qu.:29.75   1st Qu.:142.2   1st Qu.:63.38  
##  Median :2.0   Median :45.00   Median :165.3   Median :65.50  
##  Mean   :1.6   Mean   :46.38   Mean   :171.7   Mean   :65.97  
##  3rd Qu.:2.0   3rd Qu.:63.25   3rd Qu.:202.9   3rd Qu.:68.50  
##  Max.   :2.0   Max.   :84.00   Max.   :324.5   Max.   :77.00  
##                                NA's   :1
marketing <- read_excel("C:/Users/ThuyAnh/Desktop/ITKM549/Marketing.xlsx") 
summary(marketing)
##     MarketID       MarketSize          LocationID      AgeOfStore    
##  Min.   : 1.000   Length:548         Min.   :  1.0   Min.   : 1.000  
##  1st Qu.: 3.000   Class :character   1st Qu.:216.0   1st Qu.: 4.000  
##  Median : 6.000   Mode  :character   Median :504.0   Median : 7.000  
##  Mean   : 5.715                      Mean   :479.7   Mean   : 8.504  
##  3rd Qu.: 8.000                      3rd Qu.:708.0   3rd Qu.:12.000  
##  Max.   :10.000                      Max.   :920.0   Max.   :28.000  
##    Promotion          Week      SalesInThousands
##  Min.   :1.000   Min.   :1.00   Min.   :17.34   
##  1st Qu.:1.000   1st Qu.:1.75   1st Qu.:42.55   
##  Median :2.000   Median :2.50   Median :50.20   
##  Mean   :2.029   Mean   :2.50   Mean   :53.47   
##  3rd Qu.:3.000   3rd Qu.:3.25   3rd Qu.:60.48   
##  Max.   :3.000   Max.   :4.00   Max.   :99.65
BMI <- data.frame(BMI) %>% 
  na.omit()
marketing <- data.frame(marketing) %>% 
  mutate(MarketSize= ifelse(MarketSize == "Large",3,
                                       ifelse(MarketSize == "Medium",2,1)),
  LocationID= as.numeric(LocationID),
  MarketID = as.numeric(MarketID))

1.Create a function that calculates BMI

calculateBMI <- function(height,weight){
  x = (weight/(height)^2)*703
  return(x)
  }

2.Test the function created in question1 on the BMI.csv data

BMI$BMI =  calculateBMI(BMI$weight,BMI$height)
summary(BMI)
##      Gender           age            weight          height     
##  Min.   :1.000   Min.   :18.00   Min.   : 97.0   Min.   :56.90  
##  1st Qu.:1.000   1st Qu.:29.50   1st Qu.:142.2   1st Qu.:63.45  
##  Median :2.000   Median :45.00   Median :165.3   Median :65.50  
##  Mean   :1.596   Mean   :46.28   Mean   :171.7   Mean   :66.03  
##  3rd Qu.:2.000   3rd Qu.:63.50   3rd Qu.:202.9   3rd Qu.:68.50  
##  Max.   :2.000   Max.   :84.00   Max.   :324.5   Max.   :77.00  
##       BMI       
##  Min.   :0.482  
##  1st Qu.:1.134  
##  Median :1.688  
##  Mean   :1.885  
##  3rd Qu.:2.310  
##  Max.   :4.617

3.Create a function that creates a histogram

hist_fun <- function(data, x){
    ggplot(data, aes_string(x=x)) +
    geom_histogram(binwidth = 1,
                   stat= "bin",
                   color = "black",
                   fill = "blue") +
    ggtitle(paste("Histogram of", x) )+
    xlab(x)+
         ylab ("Number of Stores")
}

4.Test the function developed in question 3 on any variable of the marketing data

hist_fun(marketing,"MarketID")

5.Create a loop that generates histograms for every continuous variable in the marketing data. The loop should use the function created in question 3.

data_for_hist<- names(marketing %>% 
  select(-LocationID))