1 Your Exercise

In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:

Univariate variable (one dimension)
- average

average          <- function(x){
  sum(x)/length(x)
}                                      # Use this if you're looking for mean (univariate)

middle_value

middle_value     <- function(x){
  n              <- length(x)
  if(n%%2==0){
    letak        <- n%/%2
    middle_value <- round((sort(x)[letak]+sort(x)[letak+1])/2,digit=2)}
  else if(n%%2==1){
    letak        <- (n+1)%/%2
    middle_value <- round((sort(x)[letak]),digit=2)}
  return(middle_value)
}                                      # Use this if you're looking for median (univariate)

most_frequent

most_frequent <- function(x){
  u           <- unique(x)
  tab      <- tabulate(match(x,u))
  u[tab==max(tab)]
}                                      # Use this if you're looking for mode (univariate)

max_value

max_value <- function(x){
  s       <- sort(x)
  tail(s,n=1)
}                                      # Use this if you're looking for maximum value (univariate)

min_value

min_value <- function(x){
  s       <- sort(x)
  head(s,n=1)
}                                      # Use this if you're looking for minimum value (univariate)

variance

var.s  <- function(x){
  n    <- length(x)
  round(sum((x-(sum(x)/n))^2)/(n-1),digit=2)
}                                      # Use this if you're looking for variance sample (univariate)

standard_deviation

stdev.s <- function(x){
  n     <- length(x)
  round(sqrt(sum((x-(sum(x)/n))^2)/(n-1)),digit=2)
}                                      # Use this if you're looking for standard deviation sample (univariate)

Outliers

Outlier         <- function(x){
    Q1          <- quantile(x)[2]
    Q3          <- quantile(x)[4]
    IQR         <- Q3 - Q1 
    upper_bound <- (IQR * 1.5) + Q3
    lower_bound <- Q1 - (IQR * 1.5)
    result      <- which(x < lower_bound | x > upper_bound)
    x[head(result)]
}                                      # Use this if you're looking for Outlier (univariate)

summary

statistics           <- function(x){
  average            <- round((sum(x)/length(x)),digit = 2)
  n                  <- length(x)
  if(n%%2==0){
    letak            <- n%/%2
    middle_value     <- (sort(x)[letak]+sort(x)[letak+1])/2}
  else if(n%%2==1){
    letak            <- (n+1)%/%2
    middle_value     <- (sort(x)[letak])}
  u                  <- unique(x)
  tab                <- tabulate(match(x,u))
  most_frequent      <- u[tab==max(tab)]
  s                  <- sort(x)
  max_value          <- tail(s,n=1)
  min_value          <- head(s,n=1)
  variance_sample    <- round(sum((x-(sum(x)/n-1))^2)/(n-1), digit = 2)
  stdev_sample       <- round(sqrt(sum((x-(sum(x)/n-1))^2)/(n-1)), digit = 2)
  Q1                 <- quantile(x)[2]
  Q3                 <- quantile(x)[4]
  IQR                <- Q3 - Q1 
  upper_bound        <- (IQR * 1.5) + Q3
  lower_bound        <- Q1 - (IQR * 1.5)
  result             <- which(x < lower_bound | x > upper_bound)
  Outlier            <- x[head(result)]
  return(c(average=average,
           median=middle_value,
           mode=most_frequent,
           min=min_value,
           max=max_value,
           var.s=variance_sample,
           stdev.s=stdev_sample,
           Outlier=Outlier))
}

Testing Function

A <- c(25,22,23,25,18,11,15,45,27,22,28,25,25,12,14,14,13,17,15,19)
table(A)

## A
## 11 12 13 14 15 17 18 19 22 23 25 27 28 45 
##  1  1  1  2  2  1  1  1  2  1  4  1  1  1

average(A)

## [1] 20.75

middle_value(A)

## [1] 20.5

most_frequent(A)

## [1] 25

min_value(A)

## [1] 11

max_value(A)

## [1] 45

var.s(A)

## [1] 61.78

stdev.s(A)

## [1] 7.86

Outlier(A)

## [1] 45

statistics(A)

## average  median    mode     min     max   var.s stdev.s Outlier 
##   20.75   20.50   25.00   11.00   45.00   62.83    7.93   45.00

Multivariate variable (more dimension)
- average

average.multi <- function(x,freq){
  sum(x*freq)/sum(freq)
}                                      # Use this if you're looking for Average (multivariate)

middle_value

median.multi     <- function(x,freq){
  n              <- sum(freq)
  data           <- sort(rep.int(x,freq))
  if(n%%2==0){
    letak        <- n%/%2
    median.multi <- round((data[letak]+data[letak+1])/2,digit=2)}
  else if(n%%2==1){
    letak        <- (n+1)%/%2
    median.multi <- round((data[letak]),digit=2)}
  return(median.multi)
}                                      # Use this if you're looking for Median (multivariate)

most_frequent

mode.multi <- function(x,freq){
  data     <- sort(rep.int(x,freq))
  u        <- unique(data)
  tab      <- tabulate(match(data,u))
  u[tab==max(tab)]
}                                      # Use this if you're looking for Mode (multivariate)

max_value

max.multi <- function(x,freq){
  data    <- sort(rep.int(x,freq))
  tail(data,n=1)
}                                      # Use this if you're looking for Maximum (multivariate)

min_value

min.multi <- function(x,freq){
  data    <- sort(rep.int(x,freq))
  head(data,n=1)
  
}                                      # Use this if you're looking for Minimum (multivariate)

variance

var.multi.s <- function(x,freq){
  n         <- sum(freq)
  data      <- sort(rep.int(x,freq))
  round(sum((data-(sum(data)/n))^2)/(n-1),digit=2)
}                                      # Use this if you're looking for Variance (multivariate)

standard_deviation

stdev.multi.s <- function(x,freq){
  n           <- length(x)
  data        <- sort(rep.int(x,freq))
  round(sqrt(sum((data-(sum(data)/n))^2)/(n-1)),digit=2)
}                                      # Use this if you're looking for Standard Deviation (multivariate)

Outliers

Outlier.multi <- function(x,freq){
  data        <- sort(rep.int(x,freq))
  Q1          <- quantile(data)[2]
  Q3          <- quantile(data)[4]
  IQR         <- Q3 - Q1 
  upper_bound <- (IQR * 1.5) + Q3
  lower_bound <- Q1 - (IQR * 1.5)
  result      <- which(data < lower_bound | data > upper_bound)
  data[head(result)]
}                                      # Use this if you're looking for Outlier (multivariate)

summary

statistics.multi <- function(x,freq){
  average.multi  <- round((sum(x*freq)/sum(freq)),digit = 2)
  n              <- sum(freq)
  data           <- sort(rep.int(x,freq))
  if(n%%2==0){
    letak        <- n%/%2
    median.multi <- (data[letak]+data[letak+1])/2}
  else if(n%%2==1){
    letak        <- (n+1)%/%2
    median.multi <- (data[letak])}
  u              <- unique(data)
  tab            <- tabulate(match(data,u))
  mode.multi     <- u[tab==max(tab)]
  max.multi      <- tail(data,n=1)
  min.multi      <- head(data,n=1)
  var.multi.s    <- round((sum((data-(sum(data)/n))^2)/(n-1)),digit = 2)
  stdev.multi.s  <- round((sqrt(sum((data-(sum(data)/n))^2)/(n-1))),digit =2)
  Q1             <- quantile(data)[2]
  Q3             <- quantile(data)[4]
  IQR            <- Q3 - Q1 
  upper_bound    <- (IQR * 1.5) + Q3
  lower_bound    <- Q1 - (IQR * 1.5)
  result         <- which(data < lower_bound | data > upper_bound)
  Outlier.multi  <- data[head(result)]
  return(c(average=average.multi,
           median=median.multi,
           mode=mode.multi,
           min=min.multi,
           max=max.multi,
           var.s=var.multi.s,
           stdev.s=stdev.multi.s,
           Outlier=Outlier.multi))
}

Testing Function

B        <- c(36,23,37,33,32,34,39,31,53)
Fi       <- c(3,1,2,6,3,2,5,7,1)
data.frame(B,Fi)

##    B Fi
## 1 36  3
## 2 23  1
## 3 37  2
## 4 33  6
## 5 32  3
## 6 34  2
## 7 39  5
## 8 31  7
## 9 53  1

average  <- paste("The average is", sep =" ",average.multi(B,Fi))
average

## [1] "The average is 34.4"

median   <- paste("The median is", sep =" ",median.multi(B,Fi))
median

## [1] "The median is 33"

modus    <- paste("The mode is", sep =" ",mode.multi(B,Fi))
modus

## [1] "The mode is 31"

min      <- paste("The minimum value is", sep =" ",min.multi(B,Fi))
min

## [1] "The minimum value is 23"

max      <- paste("The maximum value is", sep =" ",max.multi(B,Fi))
max

## [1] "The maximum value is 53"

variance <- paste("The variance sample is", sep =" ",var.multi.s(B,Fi))
variance

## [1] "The variance sample is 24.59"

stdev    <- paste("The standard deviation sample is", sep=" ",stdev.multi.s(B,Fi))
stdev

## [1] "The standard deviation sample is 155.72"

Outlier  <- paste("The outlier is", sep=" ",Outlier.multi(B,Fi))
Outlier

## [1] "The outlier is 53"

statistics.multi(B,Fi)

## average  median    mode     min     max   var.s stdev.s Outlier 
##   34.40   33.00   31.00   23.00   53.00   24.59    4.96   53.00

Simple Case Example

Id           <- (1:5000)
Date         <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)

Name         <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
                         "Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
                       5000, replace = T)

City         <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))

Outlet       <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)

Menu         <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
                  "Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
                  "Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu     <- sample(Menu, 5000, replace = T)
Price        <- sample(18000:45000,14, replace = T)
DFPrice      <- data.frame(Menu, Price)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Menu_Price   <- left_join(data.frame(Menu = all_menu),DFPrice)

## Joining, by = "Menu"

KopiKenangan <- cbind(data.frame(Id,
                                 Date,
                                 Name,
                                 City,
                                 Outlet),
                                 Menu_Price)
head(KopiKenangan,5)

##   Id       Date    Name      City   Outlet                 Menu Price
## 1  1 2018-01-01   Irene    Bekasi Outlet 3            Cappucino 31666
## 2  2 2018-01-02   Kefas     Bogor Outlet 5    Hot Caramel Latte 27773
## 3  3 2018-01-03   Angel    Bekasi Outlet 1 Ice Red Velvet Latte 28320
## 4  4 2018-01-04  Fallen     Depok Outlet 3  Ice Matcha Espresso 30622
## 5  5 2018-01-05 Calisha Tangerang Outlet 2     Ice Matcha Latte 26485

Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:

The percentage of sales for each city.

library(dplyr)

Percentage       <- function(value,Data){
  n              <- sum(value)
  pembulatan     <- round((value*100)/n, digits = 2)
  Percentage     <- paste(pembulatan, sep = "", "%")
  result         <- data.frame(Data,Percentage)
  return(result)
}
sales_city       <- aggregate(KopiKenangan$Price,
                        list(City=KopiKenangan$City),
                        FUN = sum)

Percentage(sales_city$x,sales_city$City)

##        Data Percentage
## 1    Bekasi     19.87%
## 2     Bogor     20.06%
## 3     Depok     20.07%
## 4   Jakarta     19.91%
## 5 Tangerang     20.09%

The frequency of Name and Menu.

frekuensi <- function(x,y){
  table(x,y)
}
frekuensi(KopiKenangan$Menu,KopiKenangan$Name)

##                       y
## x                      Angel Ardifo Calisha Eric Fallen Felisha Irene Jacob
##   Cappucino               27     18      23   26     23      13    23    20
##   Es Kopi Susu            17     19      13   17     15      18    12    13
##   Hot Caramel Latte       21     22      16   20     20      17    10    13
##   Hot Chocolate           14     21      13   15     16      29    20    19
##   Hot Red Velvet Latte    15     15      15   14     16      17    23    13
##   Ice Americano           20     21      14   21     14      16    15    17
##   Ice Berry Coffee        20     17      21   11     17      11    15    21
##   Ice Cafe Latte          13     18       9   20     17      14    13    17
##   Ice Caramel Latte       20     19      23   21     17      21    20    15
##   Ice Coffee Avocado      19     19      18   19     13      20    14    21
##   Ice Coffee Lite         23     21      22   16     20      23    29    21
##   Ice Matcha Espresso     17     22      28   15     14      21    19    18
##   Ice Matcha Latte        26     19      21   13     24      18    15    20
##   Ice Red Velvet Latte    23      9      11   26     28      15    17    11
##                       y
## x                      Jeffry Julian Kefas Kevin Lala Michael Naomi Nikita
##   Cappucino                20     20    18    20   20      12    21     17
##   Es Kopi Susu             21     20    14    18   19      20    16     21
##   Hot Caramel Latte        13     21    19     9   21       9     9     15
##   Hot Chocolate            19     27    13    19   18      14    10     15
##   Hot Red Velvet Latte     18     22    21    16   21      14    17     17
##   Ice Americano            14     12    20    16   16      18    14     12
##   Ice Berry Coffee         23     20    12    17   11      16    27     22
##   Ice Cafe Latte           31     22    22    21   16      19    28     16
##   Ice Caramel Latte        14     17    21    16   16      12    19     20
##   Ice Coffee Avocado       11     20    18    24   16      29    18     13
##   Ice Coffee Lite          19     23    15    17   13      17    11     15
##   Ice Matcha Espresso      25      9    21    14   15      20    24     12
##   Ice Matcha Latte         21     13    22    12   22      12    17     15
##   Ice Red Velvet Latte     15     15    13    12   19      12    17     17
##                       y
## x                      Patricia Sherly Siana Vanessa
##   Cappucino                  20     20    17      20
##   Es Kopi Susu               25     19    16      15
##   Hot Caramel Latte          15     21    24      17
##   Hot Chocolate              25     18    24      19
##   Hot Red Velvet Latte       14     20    19      16
##   Ice Americano              22     12    18      11
##   Ice Berry Coffee           22     19    16      22
##   Ice Cafe Latte             16     21    12      14
##   Ice Caramel Latte          19     15    14      22
##   Ice Coffee Avocado         21     11    15      24
##   Ice Coffee Lite            23     20    17      22
##   Ice Matcha Espresso        27     16    16      17
##   Ice Matcha Latte           22     11    15      20
##   Ice Red Velvet Latte       15     16    17      22

The Average of monthly sales per-menu item.

average.monthly   <- function(Item){
  month.year      <- format(Date,"%B,%Y")
  n               <- length(table(month.year))
  data.x          <- data.frame(table(Item))
  monthly.average <- round((data.x$Freq/n), digit=2)
  result          <- data.frame(data.x,monthly.average)
  return(result)
}
average.monthly(KopiKenangan$Menu)

##                    Item Freq monthly.average
## 1             Cappucino  398            2.41
## 2          Es Kopi Susu  348            2.11
## 3     Hot Caramel Latte  332            2.01
## 4         Hot Chocolate  368            2.23
## 5  Hot Red Velvet Latte  343            2.08
## 6         Ice Americano  323            1.96
## 7      Ice Berry Coffee  360            2.18
## 8        Ice Cafe Latte  359            2.18
## 9     Ice Caramel Latte  361            2.19
## 10   Ice Coffee Avocado  363            2.20
## 11      Ice Coffee Lite  387            2.35
## 12  Ice Matcha Espresso  370            2.24
## 13     Ice Matcha Latte  358            2.17
## 14 Ice Red Velvet Latte  330            2.00

Lab5: R Programming

Putri Angelina Windjaya - 20194920010

September 30, 2020

1 Your Exercise