1 My Exercise

In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:

  • Univariate variable (one dimension) Here an example data we are going to test, x = (1,3,5,9,9,11,35,15,17,19)
x <- c(1,3,5,9,9,11,35,15,17,19)
x
##  [1]  1  3  5  9  9 11 35 15 17 19
  • average
Average <- function(x)
  { sum(x)/length(x) }
Average(x)
## [1] 12.4
  • middle_value
Middle_value <- function(x)
  { sorted <- sort(x)
  n <- length(x)
  if( n %% 2==0) 
         { mid <- sorted[(n/2):(n/2+1)]
           med <- sum(mid)/2 }else
  { med <- sorted[(n+1)/2] }
  return(med) }
Middle_value(x)
## [1] 10
  • most_frequent
Most_frequent <- function(x)
{ Data <- unique(x) 
  Tab <- tabulate(match(x, Data))
  Data[ Tab == max(Tab)] }
Most_frequent(x)
## [1] 9
  • max_value
Max_value <- function(x)
  { sorted <- sort(x)
    tail(sorted,1) }
Max_value(x)
## [1] 35
  • min_value
Min_value <- function(x)
  { sorted <- sort(x)
    head(sorted,1) }
Min_value(x)
## [1] 1
  • variance
Variance <- function(x) 
  { n <- length(x)
    (sum((x-Average(x))^2))/(n-1) }
Variance(x)
## [1] 97.82222
  • standard_deviation
Standard_deviation <- function(x)
{ n <- length(x)
  sqrt((sum((x-mean(x))^2))/(n-1)) }
Standard_deviation(x)
## [1] 9.890512
  • Outliers #Quartile
Outlier <- function(x)
{ Q1 <- quantile(x)[2]
  Q3 <- quantile(x)[4]
  Range <- Q3 - Q1
  Head_line <- (Range * 1.5)+ Q3
  Down_line <- Q1 - (Range * 1.5)
  result <- (which(x < Down_line | x > Head_line))
  x[(result)] }
Outlier(x)
## [1] 35
  • summary (all functions) - optional

  • Multivariate variable (more dimension) Here an example data we are going to test, x = (1,3,25,7,9) y = (9,11,13,15,17)

x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
data.frame(x,y)
##    x  y
## 1  1  9
## 2  3 11
## 3 25 13
## 4  7 15
## 5  9 17
  • average
Average <- function(x,y)
  { sum(x*y)/length(x) }
Average(x,y)
## [1] 125
  • middle_value
x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
Function <- rep(x,y)
Middle_value <- function(x,y)
  { sorted <- sort(Function)
  n <- length(sorted)
  if( n %% 2==0) 
         { mid <- sorted[(n/2):(n/2+1)]
           med <- sum(mid)/2 }else
  { med <- sorted[(n+1)/2] }
  return(med) }
Middle_value(x,y)
## [1] 7
  • most_frequent
x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
Function <- rep(x,y)
Most_frequent <- function(x,y)
{ Data <- unique(x,y) 
  Tab <- tabulate(match(Function, Data))
  Data[Tab == max(Tab)] }
Most_frequent(x,y)
## [1] 9
  • max_value
x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
Function <- rep(x,y)
Max_value <- function(x,y)
  { sorted <- sort(Function)
    tail(sorted, 1) }
Max_value(x,y)
## [1] 25
  • min_value
x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
Function <- rep(x,y)
Min_value <- function(x,y)
  { sorted <- sort(Function)
    head(sorted,1) }
Min_value(x,y)
## [1] 1
  • variance
x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
Function <- rep(x,y)
Variance <- function(x,y)
{ n <- length(Function)
  sum((Function - Average(x,y))^2)/(n-1) }
Variance(x,y)
## [1] 13589.38
  • standard_deviation
x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
Function <- rep(x,y)
Standard_deviation <- function(x,y)
{ n <- length(Function)
  sqrt((sum((Function - Average(x,y))^2))/(n-1)) }
Standard_deviation(x,y)
## [1] 116.5735
  • Outliers
Outliers <- function(x,y)
{ Q1          <- quantile(sort(rep.int(x,y)))[2]
  Q3          <- quantile(sort(rep.int(x,y)))[4]
  Range         <- Q3 - Q1 
  Head_line <- (Range*1.5) + Q3
  Down_line <- Q1 - (Range*1.5)
  result      <- which(sort(rep.int(x,y)) < Down_line | sort(rep.int(x,y)) > Head_line)
  sort(rep.int(x,y)) [head(result, 1)] }
x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
Outliers(x,y)
## [1] 25
  • summary (all functions) - optional

  • Simple Case Example

Id       <- (1:5000)
Date     <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)

Name     <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
               "Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
               5000, replace = T)

City     <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))

Outlet   <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)

Menu     <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
              "Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
              "Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price    <- sample(18000:45000,14, replace = T)
DFPrice  <- data.frame(Menu, Price)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Menu_Price <- left_join(data.frame(Menu = all_menu),DFPrice)
## Joining, by = "Menu"
KopiKenangan <- cbind(data.frame(Id,
                                 Date,
                                 Name,
                                 City,
                                 Outlet),
                                 Menu_Price)
head(KopiKenangan,5)
##   Id       Date   Name    City   Outlet                 Menu Price
## 1  1 2018-01-01 Jeffry Jakarta Outlet 3    Ice Caramel Latte 30222
## 2  2 2018-01-02  Kevin Jakarta Outlet 2   Ice Coffee Avocado 21897
## 3  3 2018-01-03 Ardifo  Bekasi Outlet 5  Ice Matcha Espresso 30220
## 4  4 2018-01-04 Sherly   Bogor Outlet 5        Ice Americano 20955
## 5  5 2018-01-05 Nikita   Depok Outlet 2 Hot Red Velvet Latte 33742

Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:

  • The percentage of sales for each city.
Percentage <- function(x)
{ Percent <- round(x*100, 1)
  result <- paste(Percent, sep = "", "%")
  return(result) }
City <- aggregate(Price ~ City, data = KopiKenangan, sum)
Sales <- sum(City$Price)
City$Percent <- Percentage(City$Price/Sales)
City
##        City    Price Percent
## 1    Bekasi 29076291     20%
## 2     Bogor 29093671     20%
## 3     Depok 28986523   19.9%
## 4   Jakarta 29425492   20.2%
## 5 Tangerang 28755425   19.8%
  • The frequency of Name and Menu.
Name <- data.frame(table(KopiKenangan$Name))
Name
##        Var1 Freq
## 1     Angel  225
## 2    Ardifo  244
## 3   Calisha  248
## 4      Eric  264
## 5    Fallen  271
## 6   Felisha  254
## 7     Irene  229
## 8     Jacob  245
## 9    Jeffry  218
## 10   Julian  273
## 11    Kefas  275
## 12    Kevin  263
## 13     Lala  259
## 14  Michael  242
## 15    Naomi  248
## 16   Nikita  228
## 17 Patricia  262
## 18   Sherly  247
## 19    Siana  246
## 20  Vanessa  259
Menu <- data.frame(table(KopiKenangan$Menu))
Menu
##                    Var1 Freq
## 1             Cappucino  380
## 2          Es Kopi Susu  365
## 3     Hot Caramel Latte  378
## 4         Hot Chocolate  333
## 5  Hot Red Velvet Latte  365
## 6         Ice Americano  354
## 7      Ice Berry Coffee  357
## 8        Ice Cafe Latte  357
## 9     Ice Caramel Latte  341
## 10   Ice Coffee Avocado  346
## 11      Ice Coffee Lite  356
## 12  Ice Matcha Espresso  369
## 13     Ice Matcha Latte  354
## 14 Ice Red Velvet Latte  345
  • The Average of daily sales per-menu item.
library(dplyr)
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v stringr 1.4.0
## v tidyr   1.1.2     v forcats 0.5.0
## v readr   1.3.1
## -- Conflicts ----------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
Yearly <- KopiKenangan %>% 
  separate(Date,c("Year","Month","Day"), sep ="-") %>%
  select(Year, Month)
Monthly <- paste(Yearly$Year, sep = "-", ... = Yearly$Month) %>%
  table() %>% 
  length()
Frequency <- as.data.frame(table(KopiKenangan$Menu))
Frequency$Sales <- round(Frequency$Freq/Monthly, 2)
Frequency
##                    Var1 Freq Sales
## 1             Cappucino  380  2.30
## 2          Es Kopi Susu  365  2.21
## 3     Hot Caramel Latte  378  2.29
## 4         Hot Chocolate  333  2.02
## 5  Hot Red Velvet Latte  365  2.21
## 6         Ice Americano  354  2.15
## 7      Ice Berry Coffee  357  2.16
## 8        Ice Cafe Latte  357  2.16
## 9     Ice Caramel Latte  341  2.07
## 10   Ice Coffee Avocado  346  2.10
## 11      Ice Coffee Lite  356  2.16
## 12  Ice Matcha Espresso  369  2.24
## 13     Ice Matcha Latte  354  2.15
## 14 Ice Red Velvet Latte  345  2.09