1 Your Exercise

In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:

  • Univariate variable (one dimension)
    • average
average  <- function(x)
{
  sum(x)/length(x)
}
x  <- c(1,2,3)
average(x)
## [1] 2
  • middle_value
middle_value_ganjil  <-function(x)
{
  (length(x)+1)/2   # untuk jumlah datanya ganjil
}
x  <- c(1,2,3,4,5)
middle_value_ganjil(x)
## [1] 3
middle_value_genap  <-function(x)
{
  1/2*((length(x)/2)+((length(x)+1)/2))   # untuk jumlah datanya genap
}
x  <- c(1,2,3,4,5,6)
middle_value_genap(x)
## [1] 3.25
  • most_frequent
most_frequent  <- function(x)
{
  y<-data.frame(table(x))
  y[y$Freq==max(y$Freq),1]
}
x  <- c(1,4,5,2,5,3,5,6,7,5)
most_frequent(x)
## [1] 5
## Levels: 1 2 3 4 5 6 7
  • max_value
max_value  <-function(x)
{
  max(x)
}
x  <- c(1,2,3,4,5,6,7)
max_value(x)
## [1] 7
  • min_value
min_value  <-function(x)
{
  min(x)
}
x  <- c(1,2,3,4,5,6,7)
min_value(x)
## [1] 1
  • variance
variance  <-function(x)
{
  ((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
}
x  <- c(1,2,3,4,5,6,7)
variance(x)
## [1] 196
  • standard_deviation
standard_deviation  <-function(x)
{
  sqrt(((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1))
}
x  <- c(1,2,3,4,5,6,7)
standard_deviation(x)
## [1] 14
  • Outliers

  • summary

x  <- c(1,2,3,4,5,6,7)
Summary <- function(x)
{
  average <- sum(x)/length(x)
  middle_value_ganjil <- (length(x)+1)/2
  middle_value_genap <- 1/2*((length(x)/2)+((length(x)+1)/2))
  max <- max(x)
  min <- min(x)
  variance <- ((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
  stdev <- sqrt(((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1))
  result <- matrix(c(average, middle_value_ganjil, middle_value_genap, max, min, variance, stdev),
                   1,7, 
                   dimnames = list("Value", c("Mean",
                                              "Med.Ganjil",
                                              "Med.Genap",
                                              "Max",
                                              "Min",
                                              "Variance",
                                              "StDeviasi")))
  return(result)
  
}
Summary(x)
##       Mean Med.Ganjil Med.Genap Max Min Variance StDeviasi
## Value    4          4      3.75   7   1      196        14
  • Multivariate variable (more dimension)

    • average
average_freq  <- function(x,freq)
{
  sum(x*freq)/length(x)
}
x  <- c(1,2,3,4,5)
freq  <- c(2,4,3,6,4)
average_freq(x,freq)
## [1] 12.6
  • middle_value
K<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(K,freq)
middle_value <- function(x)
{
  sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    mid <- sorted[c(floor(n/2),floor(n/2)+1)]
    med <- sum(mid)/2
    }
  return(med)
}
middle_value(list3)
## [1] 4
  • most_frequent
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
most_frequent <- function(x)
{
  sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    y<-data.frame(table(x))
  y[y$Freq==max(y$Freq),1]
    }
  return(y)
}
most_frequent(list3)
##   x Freq
## 1 2    2
## 2 3    3
## 3 4    5
## 4 5    2
## 5 9    4
  • max_value
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
max_value <- function(x)
{
  sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    P<- max(x)
    }
  return(P)
}
max_value(list3)
## [1] 9
  • min_value
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
min_value <- function(x)
{
  sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    P<- min(x)
    }
  return(P)
}
min_value(list3)
## [1] 2
  • variance
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
variance <- function(x)
{
  sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    Q<- ((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
    }
  return(Q)
}
variance(list3)
## [1] 1583
  • standard_deviation

  • summary

M<-c(2,3,4,5,9)
x<- c(1,2,3,4,5)
freq<-c(2,3,5,2,4)
Summary <- function(x)
{
  Average <- sum(x*freq)/length(x)
  Median <- sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    mid <- sorted[c(floor(n/2),floor(n/2)+1)]
    x <- sum(mid)/2
    }
  return(x)
  Modus <- sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    P<- max(x)
    }
  return(P)
  Max_Value <- sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    P<- max(x)
    }
  return(P)
  Min_Value <- sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    P<- min(x)
    }
  return(P)
  Variance <- sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    Q<- ((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
    }
  return(Q)
  result <- matrix(c(Average,Median,Modus,Max_Value,Min_Value,Variance),1,6,
                   dimnames = list("Value", c("Average",
                                              "Median",
                                              "Max",
                                              "Min",
                                              "Variance")))
  return(result)
}
  • Simple Case Example
Id       <- (1:5000)
Date     <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)

Name     <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
               "Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
               5000, replace = T)

City     <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))

Outlet   <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)

Menu     <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
              "Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
              "Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price    <- sample(18000:45000,14, replace = T)
DFPrice  <- data.frame(Menu, Price)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Menu_Price <- left_join(data.frame(Menu = all_menu),DFPrice)
## Joining, by = "Menu"
KopiKenangan <- cbind(data.frame(Id,
                                 Date,
                                 Name,
                                 City,
                                 Outlet),
                                 Menu_Price)
head(KopiKenangan,5)
##   Id       Date     Name      City   Outlet                 Menu Price
## 1  1 2018-01-01  Michael     Depok Outlet 4         Es Kopi Susu 21984
## 2  2 2018-01-02   Ardifo Tangerang Outlet 3        Ice Americano 33053
## 3  3 2018-01-03   Fallen   Jakarta Outlet 1 Ice Red Velvet Latte 18154
## 4  4 2018-01-04   Nikita Tangerang Outlet 1 Ice Red Velvet Latte 18154
## 5  5 2018-01-05 Patricia     Bogor Outlet 4  Ice Matcha Espresso 19723

Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:

  • The percentage of sales for each city.
library(dplyr)
City_percentage <- data.frame(prop.table(table(KopiKenangan$City)*100))
addPercent <- function(x)
{
  percent  <- round(City_percentage[,2]*100,digits=1)
  result   <- paste(percent,sep="","%")
  return(result)
}
percentage <-addPercent(x)
City_percentage$Freq <- NULL
cbind(City_percentage,percentage)
##        Var1 percentage
## 1    Bekasi        20%
## 2     Bogor        20%
## 3     Depok        20%
## 4   Jakarta        20%
## 5 Tangerang        20%
  • The frequency of Name and Menu.
NameandMenu_Frequency <- table(KopiKenangan$Name,KopiKenangan$Menu)
NameandMenu_Frequency
##           
##            Cappucino Es Kopi Susu Hot Caramel Latte Hot Chocolate
##   Angel           16           16                14            19
##   Ardifo          18           23                15            17
##   Calisha         23           19                11            17
##   Eric            22           14                18            18
##   Fallen          19           13                14            19
##   Felisha         11           16                16            23
##   Irene           17           17                23            15
##   Jacob           24           19                17            17
##   Jeffry          22           19                13            21
##   Julian          18           13                23            19
##   Kefas           17           19                12            11
##   Kevin           28           24                15            23
##   Lala            13           16                18            20
##   Michael         20           16                21            23
##   Naomi           21           14                12            18
##   Nikita          14           16                15            25
##   Patricia        21           19                14            13
##   Sherly          13           19                15            20
##   Siana           22           12                 8            20
##   Vanessa         17           20                11            19
##           
##            Hot Red Velvet Latte Ice Americano Ice Berry Coffee Ice Cafe Latte
##   Angel                      14            17               13             20
##   Ardifo                     14            14               17             13
##   Calisha                    23            20               18             19
##   Eric                       13            19               16             21
##   Fallen                     17            12               19             17
##   Felisha                    17            19               14             15
##   Irene                      16            22               14             26
##   Jacob                      16            14               16             14
##   Jeffry                     17            18               20             21
##   Julian                     20            26               14             18
##   Kefas                      19            15               15             10
##   Kevin                      14            28               15             14
##   Lala                       25            16               17             18
##   Michael                    22            14               15             18
##   Naomi                      15            20               23             23
##   Nikita                     19            17               20             18
##   Patricia                   21            10               20             12
##   Sherly                     18            19               26             18
##   Siana                      12            11               20             19
##   Vanessa                    14            17               16             10
##           
##            Ice Caramel Latte Ice Coffee Avocado Ice Coffee Lite
##   Angel                   11                 19              21
##   Ardifo                  14                 22              21
##   Calisha                 18                 17              17
##   Eric                    24                 21              12
##   Fallen                  21                 14              17
##   Felisha                  9                 22              26
##   Irene                   24                 11              18
##   Jacob                   29                 21              27
##   Jeffry                  15                 17              21
##   Julian                  12                 15              10
##   Kefas                   14                 18              21
##   Kevin                   19                 24              18
##   Lala                    21                 12              27
##   Michael                 23                 18              19
##   Naomi                   18                 19              21
##   Nikita                  17                 20              17
##   Patricia                20                 13              18
##   Sherly                  14                 18              10
##   Siana                   17                 21              14
##   Vanessa                 26                 22              15
##           
##            Ice Matcha Espresso Ice Matcha Latte Ice Red Velvet Latte
##   Angel                     20               18                   16
##   Ardifo                    17               24                   22
##   Calisha                   17               22                   16
##   Eric                      27               15                   13
##   Fallen                    19               17                   30
##   Felisha                   16                8                   20
##   Irene                     21               22                   30
##   Jacob                     15               17                   21
##   Jeffry                    16               16                   18
##   Julian                    19               14                   18
##   Kefas                     16               19                   23
##   Kevin                     19               22                   17
##   Lala                      18               15                   16
##   Michael                   16               27                   23
##   Naomi                     18               15                   19
##   Nikita                    17               17                   18
##   Patricia                  23               14                   12
##   Sherly                    24               14                   20
##   Siana                     16               24                   15
##   Vanessa                   22               12                   17
  • The Average of monthly sales per-menu item.
monthly.sales <- format(Date,"%B,%Y")
table(monthly.sales)
## monthly.sales
##     April,2018     April,2019     April,2020     April,2021     April,2022 
##             30             30             30             30             30 
##     April,2023     April,2024     April,2025     April,2026     April,2027 
##             30             30             30             30             30 
##     April,2028     April,2029     April,2030     April,2031    August,2018 
##             30             30             30             30             31 
##    August,2019    August,2020    August,2021    August,2022    August,2023 
##             31             31             31             31             31 
##    August,2024    August,2025    August,2026    August,2027    August,2028 
##             31             31             31             31             31 
##    August,2029    August,2030    August,2031  December,2018  December,2019 
##             31             31             31             31             31 
##  December,2020  December,2021  December,2022  December,2023  December,2024 
##             31             31             31             31             31 
##  December,2025  December,2026  December,2027  December,2028  December,2029 
##             31             31             31             31             31 
##  December,2030  February,2018  February,2019  February,2020  February,2021 
##             31             28             28             29             28 
##  February,2022  February,2023  February,2024  February,2025  February,2026 
##             28             28             29             28             28 
##  February,2027  February,2028  February,2029  February,2030  February,2031 
##             28             29             28             28             28 
##   January,2018   January,2019   January,2020   January,2021   January,2022 
##             31             31             31             31             31 
##   January,2023   January,2024   January,2025   January,2026   January,2027 
##             31             31             31             31             31 
##   January,2028   January,2029   January,2030   January,2031      July,2018 
##             31             31             31             31             31 
##      July,2019      July,2020      July,2021      July,2022      July,2023 
##             31             31             31             31             31 
##      July,2024      July,2025      July,2026      July,2027      July,2028 
##             31             31             31             31             31 
##      July,2029      July,2030      July,2031      June,2018      June,2019 
##             31             31             31             30             30 
##      June,2020      June,2021      June,2022      June,2023      June,2024 
##             30             30             30             30             30 
##      June,2025      June,2026      June,2027      June,2028      June,2029 
##             30             30             30             30             30 
##      June,2030      June,2031     March,2018     March,2019     March,2020 
##             30             30             31             31             31 
##     March,2021     March,2022     March,2023     March,2024     March,2025 
##             31             31             31             31             31 
##     March,2026     March,2027     March,2028     March,2029     March,2030 
##             31             31             31             31             31 
##     March,2031       May,2018       May,2019       May,2020       May,2021 
##             31             31             31             31             31 
##       May,2022       May,2023       May,2024       May,2025       May,2026 
##             31             31             31             31             31 
##       May,2027       May,2028       May,2029       May,2030       May,2031 
##             31             31             31             31             31 
##  November,2018  November,2019  November,2020  November,2021  November,2022 
##             30             30             30             30             30 
##  November,2023  November,2024  November,2025  November,2026  November,2027 
##             30             30             30             30             30 
##  November,2028  November,2029  November,2030   October,2018   October,2019 
##             30             30             30             31             31 
##   October,2020   October,2021   October,2022   October,2023   October,2024 
##             31             31             31             31             31 
##   October,2025   October,2026   October,2027   October,2028   October,2029 
##             31             31             31             31             31 
##   October,2030 September,2018 September,2019 September,2020 September,2021 
##             31             30             30             30             30 
## September,2022 September,2023 September,2024 September,2025 September,2026 
##             30             30             30             30             30 
## September,2027 September,2028 September,2029 September,2030 September,2031 
##             30             30             30             30              9
data.cafe <- data.frame(table(KopiKenangan$Menu))
data.cafe$monthly.average <- data.cafe$Freq/length(monthly.sales)
data.cafe
##                    Var1 Freq monthly.average
## 1             Cappucino  376          0.0752
## 2          Es Kopi Susu  344          0.0688
## 3     Hot Caramel Latte  305          0.0610
## 4         Hot Chocolate  377          0.0754
## 5  Hot Red Velvet Latte  346          0.0692
## 6         Ice Americano  348          0.0696
## 7      Ice Berry Coffee  348          0.0696
## 8        Ice Cafe Latte  344          0.0688
## 9     Ice Caramel Latte  366          0.0732
## 10   Ice Coffee Avocado  364          0.0728
## 11      Ice Coffee Lite  370          0.0740
## 12  Ice Matcha Espresso  376          0.0752
## 13     Ice Matcha Latte  352          0.0704
## 14 Ice Red Velvet Latte  384          0.0768