1 Your Exercise

In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:

  • Univariate variable (one dimension)
    • average
average  <- function(x)
{
  sum(x)/length(x)
}
x  <- c(1,2,3)
average(x)
## [1] 2
  • middle_value
middle_value_ganjil  <-function(x)
{
  (length(x)+1)/2   # untuk jumlah datanya ganjil
}
x  <- c(1,2,3,4,5)
middle_value_ganjil(x)
## [1] 3
middle_value_genap  <-function(x)
{
  1/2*((length(x)/2)+((length(x)+1)/2))   # untuk jumlah datanya genap
}
x  <- c(1,2,3,4,5,6)
middle_value_genap(x)
## [1] 3.25
  • most_frequent
most_frequent  <- function(x)
{
  y<-data.frame(table(x))
  y[y$Freq==max(y$Freq),1]
}
x  <- c(1,4,5,2,5,3,5,6,7,5)
most_frequent(x)
## [1] 5
## Levels: 1 2 3 4 5 6 7
  • max_value
max_value  <-function(x)
{
  max(x)
}
x  <- c(1,2,3,4,5,6,7)
max_value(x)
## [1] 7
  • min_value
min_value  <-function(x)
{
  min(x)
}
x  <- c(1,2,3,4,5,6,7)
min_value(x)
## [1] 1
  • variance
variance  <-function(x)
{
  ((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
}
x  <- c(1,2,3,4,5,6,7)
variance(x)
## [1] 196
  • standard_deviation
standard_deviation  <-function(x)
{
  sqrt(((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1))
}
x  <- c(1,2,3,4,5,6,7)
standard_deviation(x)
## [1] 14
  • Outliers

  • summary

x  <- c(1,2,3,4,5,6,7)
Summary <- function(x)
{
  average <- sum(x)/length(x)
  middle_value_ganjil <- (length(x)+1)/2
  middle_value_genap <- 1/2*((length(x)/2)+((length(x)+1)/2))
  max <- max(x)
  min <- min(x)
  variance <- ((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
  stdev <- sqrt(((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1))
  result <- matrix(c(average, middle_value_ganjil, middle_value_genap, max, min, variance, stdev),
                   1,7, 
                   dimnames = list("Value", c("Mean",
                                              "Med.Ganjil",
                                              "Med.Genap",
                                              "Max",
                                              "Min",
                                              "Variance",
                                              "StDeviasi")))
  return(result)
  
}
Summary(x)
##       Mean Med.Ganjil Med.Genap Max Min Variance StDeviasi
## Value    4          4      3.75   7   1      196        14
  • Multivariate variable (more dimension)

    • average
average_freq  <- function(x,freq)
{
  sum(x*freq)/length(x)
}
x  <- c(1,2,3,4,5)
freq  <- c(2,4,3,6,4)
average_freq(x,freq)
## [1] 12.6
  • middle_value
K<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(K,freq)
middle_value <- function(x)
{
  sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    mid <- sorted[c(floor(n/2),floor(n/2)+1)]
    med <- sum(mid)/2
    }
  return(med)
}
middle_value(list3)
## [1] 4
  • most_frequent
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
most_frequent <- function(x)
{
  sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    y<-data.frame(table(x))
  y[y$Freq==max(y$Freq),1]
    }
  return(y)
}
most_frequent(list3)
##   x Freq
## 1 2    2
## 2 3    3
## 3 4    5
## 4 5    2
## 5 9    4
  • max_value
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
max_value <- function(x)
{
  sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    P<- max(x)
    }
  return(P)
}
max_value(list3)
## [1] 9
  • min_value
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
min_value <- function(x)
{
  sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    P<- min(x)
    }
  return(P)
}
min_value(list3)
## [1] 2
  • variance
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
variance <- function(x)
{
  sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    Q<- ((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
    }
  return(Q)
}
variance(list3)
## [1] 1583
  • standard_deviation

  • summary

M<-c(2,3,4,5,9)
x<- c(1,2,3,4,5)
freq<-c(2,3,5,2,4)
Summary <- function(x)
{
  Average <- sum(x*freq)/length(x)
  Median <- sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    mid <- sorted[c(floor(n/2),floor(n/2)+1)]
    x <- sum(mid)/2
    }
  return(x)
  Modus <- sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    P<- max(x)
    }
  return(P)
  Max_Value <- sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    P<- max(x)
    }
  return(P)
  Min_Value <- sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    P<- min(x)
    }
  return(P)
  Variance <- sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    Q<- ((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
    }
  return(Q)
  result <- matrix(c(Average,Median,Modus,Max_Value,Min_Value,Variance),1,6,
                   dimnames = list("Value", c("Average",
                                              "Median",
                                              "Max",
                                              "Min",
                                              "Variance")))
  return(result)
}
  • Simple Case Example
Id       <- (1:5000)
Date     <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)

Name     <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
               "Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
               5000, replace = T)

City     <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))

Outlet   <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)

Menu     <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
              "Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
              "Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price    <- sample(18000:45000,14, replace = T)
DFPrice  <- data.frame(Menu, Price)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Menu_Price <- left_join(data.frame(Menu = all_menu),DFPrice)
## Joining, by = "Menu"
KopiKenangan <- cbind(data.frame(Id,
                                 Date,
                                 Name,
                                 City,
                                 Outlet),
                                 Menu_Price)
head(KopiKenangan,5)
##   Id       Date    Name      City   Outlet                 Menu Price
## 1  1 2018-01-01    Lala Tangerang Outlet 2 Hot Red Velvet Latte 34412
## 2  2 2018-01-02   Angel    Bekasi Outlet 5    Ice Caramel Latte 26021
## 3  3 2018-01-03 Michael Tangerang Outlet 4        Hot Chocolate 19551
## 4  4 2018-01-04  Nikita    Bekasi Outlet 2    Hot Caramel Latte 32576
## 5  5 2018-01-05  Sherly     Depok Outlet 4       Ice Cafe Latte 25739

Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:

  • The percentage of sales for each city.
library(dplyr)
City_percentage <- data.frame(prop.table(table(KopiKenangan$City)*100))
addPercent <- function(x)
{
  percent  <- round(City_percentage[,2]*100,digits=1)
  result   <- paste(percent,sep="","%")
  return(result)
}
percentage <-addPercent(x)
City_percentage$Freq <- NULL
cbind(City_percentage,percentage)
##        Var1 percentage
## 1    Bekasi        20%
## 2     Bogor        20%
## 3     Depok        20%
## 4   Jakarta        20%
## 5 Tangerang        20%
  • The frequency of Name and Menu.
NameandMenu_Frequency <- table(KopiKenangan$Name,KopiKenangan$Menu)
NameandMenu_Frequency
##           
##            Cappucino Es Kopi Susu Hot Caramel Latte Hot Chocolate
##   Angel           16           18                13            19
##   Ardifo          18           18                20            18
##   Calisha         16           15                19            19
##   Eric            13           14                17            18
##   Fallen          13           18                11            19
##   Felisha         17           19                21            28
##   Irene           18           21                11            11
##   Jacob           21           14                13            17
##   Jeffry          18           21                22            17
##   Julian          11           15                17            20
##   Kefas           23           13                16            17
##   Kevin           15           19                21            14
##   Lala            10           21                10            21
##   Michael         19           18                18            25
##   Naomi           19           19                15            11
##   Nikita          15           26                23            20
##   Patricia        20           13                15            15
##   Sherly          31           14                15            19
##   Siana           18           19                20            21
##   Vanessa         21           16                22            26
##           
##            Hot Red Velvet Latte Ice Americano Ice Berry Coffee Ice Cafe Latte
##   Angel                      24            14               21             17
##   Ardifo                     15            18               12             10
##   Calisha                    14            21               17             16
##   Eric                       10            24               22             20
##   Fallen                     13            15               15             16
##   Felisha                    19            20               19             13
##   Irene                      20            16               17             18
##   Jacob                      21            17               22             21
##   Jeffry                     24            15               17             14
##   Julian                     16            18               17             23
##   Kefas                      17            15               27             23
##   Kevin                      26            14               15             15
##   Lala                       17            20               26             19
##   Michael                    16            19               16             19
##   Naomi                      13            24               13             14
##   Nikita                     23            27               16             16
##   Patricia                   19            18               13             23
##   Sherly                     14            22               20             20
##   Siana                      17            24               12             26
##   Vanessa                    20            25               17             16
##           
##            Ice Caramel Latte Ice Coffee Avocado Ice Coffee Lite
##   Angel                   21                 16              17
##   Ardifo                  11                 18              17
##   Calisha                 18                 24              21
##   Eric                    12                 17              20
##   Fallen                  22                 19              24
##   Felisha                 19                 17              12
##   Irene                   17                 16              27
##   Jacob                   19                 18               9
##   Jeffry                  18                 11              15
##   Julian                  17                 25              20
##   Kefas                   16                 15              11
##   Kevin                   20                 22              15
##   Lala                    15                 20              13
##   Michael                 21                 17              21
##   Naomi                   16                 18              24
##   Nikita                  18                 17              21
##   Patricia                16                 21              19
##   Sherly                  20                 23              12
##   Siana                   17                 25              16
##   Vanessa                 21                 13              11
##           
##            Ice Matcha Espresso Ice Matcha Latte Ice Red Velvet Latte
##   Angel                     21               16                   18
##   Ardifo                    31               18                   21
##   Calisha                   29               16                   20
##   Eric                      13               25                   21
##   Fallen                    17               17                   16
##   Felisha                   18               13                   20
##   Irene                     15               16                   23
##   Jacob                     14               14                   12
##   Jeffry                    20               15                   27
##   Julian                     9               21                   16
##   Kefas                     12               10                   16
##   Kevin                     16               11                   11
##   Lala                      18               21                   17
##   Michael                   19               24                   14
##   Naomi                     15               15                   26
##   Nikita                    22               22                   21
##   Patricia                  20               14                   14
##   Sherly                    15               17                   21
##   Siana                     14               16                   24
##   Vanessa                   10               15                   13
  • The Average of monthly sales per-menu item.
monthly.sales <- format(Date,"%B,%Y")
table(monthly.sales)
## monthly.sales
##     April,2018     April,2019     April,2020     April,2021     April,2022 
##             30             30             30             30             30 
##     April,2023     April,2024     April,2025     April,2026     April,2027 
##             30             30             30             30             30 
##     April,2028     April,2029     April,2030     April,2031    August,2018 
##             30             30             30             30             31 
##    August,2019    August,2020    August,2021    August,2022    August,2023 
##             31             31             31             31             31 
##    August,2024    August,2025    August,2026    August,2027    August,2028 
##             31             31             31             31             31 
##    August,2029    August,2030    August,2031  December,2018  December,2019 
##             31             31             31             31             31 
##  December,2020  December,2021  December,2022  December,2023  December,2024 
##             31             31             31             31             31 
##  December,2025  December,2026  December,2027  December,2028  December,2029 
##             31             31             31             31             31 
##  December,2030  February,2018  February,2019  February,2020  February,2021 
##             31             28             28             29             28 
##  February,2022  February,2023  February,2024  February,2025  February,2026 
##             28             28             29             28             28 
##  February,2027  February,2028  February,2029  February,2030  February,2031 
##             28             29             28             28             28 
##   January,2018   January,2019   January,2020   January,2021   January,2022 
##             31             31             31             31             31 
##   January,2023   January,2024   January,2025   January,2026   January,2027 
##             31             31             31             31             31 
##   January,2028   January,2029   January,2030   January,2031      July,2018 
##             31             31             31             31             31 
##      July,2019      July,2020      July,2021      July,2022      July,2023 
##             31             31             31             31             31 
##      July,2024      July,2025      July,2026      July,2027      July,2028 
##             31             31             31             31             31 
##      July,2029      July,2030      July,2031      June,2018      June,2019 
##             31             31             31             30             30 
##      June,2020      June,2021      June,2022      June,2023      June,2024 
##             30             30             30             30             30 
##      June,2025      June,2026      June,2027      June,2028      June,2029 
##             30             30             30             30             30 
##      June,2030      June,2031     March,2018     March,2019     March,2020 
##             30             30             31             31             31 
##     March,2021     March,2022     March,2023     March,2024     March,2025 
##             31             31             31             31             31 
##     March,2026     March,2027     March,2028     March,2029     March,2030 
##             31             31             31             31             31 
##     March,2031       May,2018       May,2019       May,2020       May,2021 
##             31             31             31             31             31 
##       May,2022       May,2023       May,2024       May,2025       May,2026 
##             31             31             31             31             31 
##       May,2027       May,2028       May,2029       May,2030       May,2031 
##             31             31             31             31             31 
##  November,2018  November,2019  November,2020  November,2021  November,2022 
##             30             30             30             30             30 
##  November,2023  November,2024  November,2025  November,2026  November,2027 
##             30             30             30             30             30 
##  November,2028  November,2029  November,2030   October,2018   October,2019 
##             30             30             30             31             31 
##   October,2020   October,2021   October,2022   October,2023   October,2024 
##             31             31             31             31             31 
##   October,2025   October,2026   October,2027   October,2028   October,2029 
##             31             31             31             31             31 
##   October,2030 September,2018 September,2019 September,2020 September,2021 
##             31             30             30             30             30 
## September,2022 September,2023 September,2024 September,2025 September,2026 
##             30             30             30             30             30 
## September,2027 September,2028 September,2029 September,2030 September,2031 
##             30             30             30             30              9
data.cafe <- data.frame(table(KopiKenangan$Menu))
data.cafe$monthly.average <- data.cafe$Freq/length(monthly.sales)
data.cafe
##                    Var1 Freq monthly.average
## 1             Cappucino  352          0.0704
## 2          Es Kopi Susu  351          0.0702
## 3     Hot Caramel Latte  339          0.0678
## 4         Hot Chocolate  375          0.0750
## 5  Hot Red Velvet Latte  358          0.0716
## 6         Ice Americano  386          0.0772
## 7      Ice Berry Coffee  354          0.0708
## 8        Ice Cafe Latte  359          0.0718
## 9     Ice Caramel Latte  354          0.0708
## 10   Ice Coffee Avocado  372          0.0744
## 11      Ice Coffee Lite  345          0.0690
## 12  Ice Matcha Espresso  348          0.0696
## 13     Ice Matcha Latte  336          0.0672
## 14 Ice Red Velvet Latte  371          0.0742