1 Your Exercise

In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:

  • Univariate variable (one dimension)
    • average
average<-function(x){
  sum(x)/length(x)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
average(x)
## [1] 10.5
  • middle_value
x<- c(2,3,4,5,6,3,2,3,4,5,9,80)
middle_value <- function(x)
{
  sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    mid <- sorted[c(floor(n/2),floor(n/2)+1)]
    med <- sum(mid)/2
  }else
  {med<-sorted[ceiling(n/2)]
      }
  return(med)
}
middle_value(x)
## [1] 4
  • most_frequent
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
Most_frequent <- function(x){
  y <- data.frame(table(x))
  y[y$Freq == max(y$Freq),1]
}
Most_frequent(x)
## [1] 3
## Levels: 2 3 4 5 6 9 80
  • max_value
max_value<-function(x)
{
  sorted<-sort(x)
  tail(sorted,1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
max_value(x)
## [1] 80
  • min_value
min_value<-function(x)
{
  sorted<-sort(x)
  head(sorted,1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
min_value(x)
## [1] 2
  • variance sample variance
variance_sample<-function(x)
{
  n<-length(x)
  (sum((x-mean(x))^2))/(n-1)
}

x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
variance_sample(x)
## [1] 482.8182

population variance

variance_population<-function(x)
{
  n<-length(x)
  sum((x-mean(x))^2)/n
}

x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
variance_population(x)
## [1] 442.5833
  • standard_deviation standard_deviation sample
standard_deviation_sample<-function(x)
{
  n<-length(x)
  sqrt((sum((x-mean(x))^2))/(n-1))
}

x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
standard_deviation_sample(x)
## [1] 21.97312

standard_deviation population

standard_deviation_population<-function(x)
{
  n<-length(x)
  sqrt((sum((x-mean(x))^2))/(n))
}

x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
standard_deviation_population(x)
## [1] 21.03766
  • Outliers
z<-c(2,3,4,5,6,3,2,3,4,5,80)
Outliers<-function(x){
  sorted<-sort(x)
  Q.1<-quantile(x,0.25)
  Q.3<-quantile(x,0.75)
  IQR<-Q.3-Q.1
  Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
  Outlier<-x[x<Gate[1]|x>Gate[2]]
  result<-paste("Outlier",sep = " = ", Outlier)
  return(result)
}
Outliers(z)
## [1] "Outlier = 80"
  • summary (all functions) - optional
x<-c(2,3,4,5,6,3,2,3,4,5,80)
summary<-function(x)
{
  n<-length(x)
  average      <-(sum(x)/n)
  middle_value <-ifelse(n%%2==0,             middle_value<-((sort(x))[n%%2]+(sort(x))[(n%%2)+1]/2),ifelse(n%%2==1,                   middle_value<-((sort(x))[n%%2])))
                        
  most_frequent <- unique(x)[which.max(tabulate(match(x,unique(x))))]
  max         <- tail(sort(x),1)
  min         <- head(sort(x),1)
  variance_s  <- sum((x-mean(x))^2)/(n-1)
  variance_p  <- sum((x-mean(x))^2)/n
  standar.dev_s <- sqrt(sum((x-mean(x))^2)/(n-1))
  standar.dev_p <- sqrt(sum((x-mean(x))^2)/n)
  Outliers    <- {
                  sorted<-sort(x)
                  Q.1<-quantile(x,0.25)
                  Q.3<-quantile(x,0.75)
                  IQR<-Q.3-Q.1
                  Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
                  result<-(which(x<Gate[1]|x>Gate[2]))
                  x[head(result)]
  }
 return(c(mean=average,
          med=middle_value,
          mode=most_frequent,
          max=max,
          min=min,
          var.s=variance_s,
          var.p=variance_p,
          stdev.s=standar.dev_s,
          stdev.p=standar.dev_p,
          Outlier=Outliers
          
          
          ))}
summary(x)
##      mean       med      mode       max       min     var.s     var.p   stdev.s 
##  10.63636   2.00000   3.00000  80.00000   2.00000 530.85455 482.59504  23.04028 
##   stdev.p   Outlier 
##  21.96805  80.00000
  • Multivariate variable (more dimension)

    • average
avarage_multi<-function(x,y)
{
  sum(x*y)/length(x)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
avarage_multi(x,y)
## [1] 24.83333
  • middle_value
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
middle_value <- function(x,y)
{
  sorted <- sort(list3)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    mid <- sorted[c(floor(n/2),floor(n/2)+1)]
    med <- sum(mid)/2
    }else
    {med<-sorted[ceiling(n/2)]}
  return(med)
}
middle_value(x,y)
## [1] 4
  • most_frequent
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
Most_frequent <- function(x,y){
  y <- data.frame(table(list3))
  y[y$Freq == max(y$Freq),1]
}
Most_frequent(x,y)
## [1] 4
## Levels: 2 3 4 5 9 70
  • max_value
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
max_value<-function(x,y)
{
  sorted<-sort(list3)
  tail(sorted,1)
}
max_value(x,y)
## [1] 70
  • min_value
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
min_value<-function(x,y)
{
  sorted<-sort(list3)
  head(sorted,1)
}
min_value(x,y)
## [1] 2
  • variance variance sample
variance_sample<-function(x,y)
{
  n<-length(list3)
  sum((list3-mean(list3))^2)/(n-1)
}

x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
variance_sample(x,y)
## [1] 255.1912

variance population

x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
variance_population<-function(x,y)
{
  n<-length(list3)
  sum((list3-mean(list3))^2)/n
}

variance_population(x,y)
## [1] 240.1799
  • standard_deviation standard_deviation sample
standar_deviation_sample<-function(x,y)
{
  list3<-rep(x,y)
  n<-sum(y)
  sqrt((sum((list3-mean(list3))^2))/(n-1))
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
standar_deviation_sample(x,y)
## [1] 15.9747

standard_deviation population

standar_deviation_population<-function(x,y)
{
  list3<-rep(x,y)
  n<-sum(y)
  sqrt((sum((list3-mean(list3))^2))/n)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
standar_deviation_population(x,y)
## [1] 15.49774
  • Outliers
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
Outliers<-function(x,y){
  list3<-rep(x,y)
  sorted<-sort(list3)
  n<-length(list3)
  Q.1<-quantile(sorted,0.25)
  Q.3<-quantile(sorted,0.75)
  IQR<-Q.3-Q.1
  Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
  Outlier<-x[x<Gate[1]|x>Gate[2]]
  result<-paste("Outlier",sep = " = ", Outlier)
  return(result)
}
Outliers(x,y)
## [1] "Outlier = 70"
  • summary (all functions) - optional
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
summary<-function(x,y)
{
  n<-length(list3)
  average      <-(sum(x*y)/n)
  middle_value <-ifelse(n%%2==0,             middle_value<-((sort(list3))[n%%2]+(sort(list3))[(n%%2)+1]/2),ifelse(n%%2==1,                   middle_value<-((sort(list3))[n%%2])))
                        
  most_frequent <- unique(list3)[which.max(tabulate(match(x,unique(list3))))]
  max         <- tail(sort(list3),1)
  min         <- head(sort(list3),1)
  variance_s  <- sum((list3-mean(list3))^2)/(n-1)
  variance_p  <- sum((list3-mean(list3))^2)/n
  standar.dev_s <- sqrt(sum((list3-mean(list3))^2)/(n-1))
  standar.dev_p <- sqrt(sum((list3-mean(list3))^2)/n)
  Outliers    <- {
                  sorted<-sort(list3)
                  Q.1<-quantile(list3,0.25)
                  Q.3<-quantile(list3,0.75)
                  IQR<-Q.3-Q.1
                  Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
                  result<-(which(x<Gate[1]|x>Gate[2]))
                  x[head(result)]
  }
 return(c(mean=average,
          med=middle_value,
          mode=most_frequent,
          max=max,
          min=min,
          var.s=variance_s,
          var.p=variance_p,
          stdev.s=standar.dev_s,
          stdev.p=standar.dev_p,
          Outlier=Outliers
          
          
          ))}
summary(x,y)
##       mean        med       mode        max        min      var.s      var.p 
##   8.764706   2.000000   2.000000  70.000000   2.000000 255.191176 240.179931 
##    stdev.s    stdev.p    Outlier 
##  15.974704  15.497740  70.000000
  • Simple Case Example
Id       <- (1:5000)
Date     <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)

Name     <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
               "Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
               5000, replace = T)

City     <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))

Outlet   <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)

Menu     <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
              "Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
              "Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price    <- sample(18000:45000,14, replace = T)
DFPrice  <- data.frame(Menu, Price)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Menu_Price <- left_join(data.frame(Menu = all_menu),DFPrice)
## Joining, by = "Menu"
KopiKenangan <- cbind(data.frame(Id,
                                 Date,
                                 Name,
                                 City,
                                 Outlet),
                                 Menu_Price)
head(KopiKenangan,5)
##   Id       Date    Name      City   Outlet                 Menu Price
## 1  1 2018-01-01 Vanessa     Bogor Outlet 4 Hot Red Velvet Latte 20218
## 2  2 2018-01-02  Nikita     Depok Outlet 4        Hot Chocolate 29327
## 3  3 2018-01-03   Kefas     Depok Outlet 3    Ice Caramel Latte 34588
## 4  4 2018-01-04  Julian Tangerang Outlet 2        Ice Americano 21643
## 5  5 2018-01-05    Eric Tangerang Outlet 2        Hot Chocolate 29327

Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:

  • The percentage of sales for each city.
Percentage <- function(x){
  percent <- round(x*100, 1)
  result <- paste(percent, sep = "", "%")
  return(result)
}
City.Sales <- aggregate(Price ~ City, data = KopiKenangan, sum)
Total.Sales <- sum(City.Sales$Price)
City.Sales$Percentage.City.Sales <- Percentage(City.Sales$Price/Total.Sales)
City.Sales
##        City    Price Percentage.City.Sales
## 1    Bekasi 28287234                 19.9%
## 2     Bogor 28410512                 19.9%
## 3     Depok 28622952                 20.1%
## 4   Jakarta 28476626                   20%
## 5 Tangerang 28624363                 20.1%
  • The frequency of Name and Menu. Frequency of Name
Name_freq<-data.frame(table(KopiKenangan$Name))
Name_freq
##        Var1 Freq
## 1     Angel  271
## 2    Ardifo  232
## 3   Calisha  261
## 4      Eric  257
## 5    Fallen  248
## 6   Felisha  250
## 7     Irene  255
## 8     Jacob  234
## 9    Jeffry  227
## 10   Julian  256
## 11    Kefas  253
## 12    Kevin  291
## 13     Lala  226
## 14  Michael  250
## 15    Naomi  264
## 16   Nikita  280
## 17 Patricia  249
## 18   Sherly  225
## 19    Siana  230
## 20  Vanessa  241

Frequency of Menu

Menu_freq<-data.frame(table(KopiKenangan$Menu))
Menu_freq
##                    Var1 Freq
## 1             Cappucino  379
## 2          Es Kopi Susu  375
## 3     Hot Caramel Latte  372
## 4         Hot Chocolate  393
## 5  Hot Red Velvet Latte  346
## 6         Ice Americano  323
## 7      Ice Berry Coffee  328
## 8        Ice Cafe Latte  383
## 9     Ice Caramel Latte  336
## 10   Ice Coffee Avocado  355
## 11      Ice Coffee Lite  385
## 12  Ice Matcha Espresso  342
## 13     Ice Matcha Latte  345
## 14 Ice Red Velvet Latte  338
table(KopiKenangan$Name,KopiKenangan$Menu)
##           
##            Cappucino Es Kopi Susu Hot Caramel Latte Hot Chocolate
##   Angel           20           24                27            17
##   Ardifo          10           21                20            18
##   Calisha         18           18                20            23
##   Eric            13           11                17            24
##   Fallen          13           21                17            29
##   Felisha         18           17                19            18
##   Irene           24           22                27            14
##   Jacob           14           18                16            19
##   Jeffry          20           16                18            19
##   Julian          18           21                22            29
##   Kefas           18           22                12            18
##   Kevin           30           20                17            27
##   Lala            18           16                15            21
##   Michael         18           16                16            14
##   Naomi           15           14                16            18
##   Nikita          24           23                20            23
##   Patricia        29           16                24            18
##   Sherly          25           20                 9            11
##   Siana           18           20                18            18
##   Vanessa         16           19                22            15
##           
##            Hot Red Velvet Latte Ice Americano Ice Berry Coffee Ice Cafe Latte
##   Angel                      17            19               19             22
##   Ardifo                     10            17               19             16
##   Calisha                    11            13               20             25
##   Eric                       24            20               16             16
##   Fallen                      9            18               13             23
##   Felisha                    22            14               19             12
##   Irene                      19            14               10             20
##   Jacob                      22            20               15             19
##   Jeffry                     18            16               14             11
##   Julian                     15            12               19             20
##   Kefas                      24            10               20             19
##   Kevin                      18            20               20             22
##   Lala                       14            17               14             14
##   Michael                    18            15               14             21
##   Naomi                      23            14               15             30
##   Nikita                     15            23               19             27
##   Patricia                   13            13               15             19
##   Sherly                     13            16               14             18
##   Siana                      22            15               13             14
##   Vanessa                    19            17               20             15
##           
##            Ice Caramel Latte Ice Coffee Avocado Ice Coffee Lite
##   Angel                   17                 14              18
##   Ardifo                  17                 14              16
##   Calisha                 18                 19              24
##   Eric                    15                 17              16
##   Fallen                  16                 14              16
##   Felisha                 16                 22              20
##   Irene                   18                 21              18
##   Jacob                   13                 15              18
##   Jeffry                  16                 18              18
##   Julian                  18                  8              22
##   Kefas                   14                 17              28
##   Kevin                   23                 21              28
##   Lala                    15                 18              20
##   Michael                 17                 24              22
##   Naomi                   21                 23              17
##   Nikita                  13                 13              23
##   Patricia                19                 26              17
##   Sherly                  14                 21              16
##   Siana                   18                 22              13
##   Vanessa                 18                  8              15
##           
##            Ice Matcha Espresso Ice Matcha Latte Ice Red Velvet Latte
##   Angel                     19               17                   21
##   Ardifo                    24               15                   15
##   Calisha                   14               17                   21
##   Eric                      24               26                   18
##   Fallen                    29               17                   13
##   Felisha                   21               15                   17
##   Irene                     18               14                   16
##   Jacob                     15               17                   13
##   Jeffry                    13               13                   17
##   Julian                    12               20                   20
##   Kefas                     14               20                   17
##   Kevin                     13               17                   15
##   Lala                      18               12                   14
##   Michael                   17               18                   20
##   Naomi                     22               18                   18
##   Nikita                    11               20                   26
##   Patricia                  14               15                   11
##   Sherly                    13               21                   14
##   Siana                     10               16                   13
##   Vanessa                   21               17                   19
  • The Average of monthy sales per-menu item.
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v stringr 1.4.0
## v tidyr   1.1.2     v forcats 0.5.0
## v readr   1.3.1
## -- Conflicts -------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
Month_year<-KopiKenangan%>%separate(Date,c("Year", "Month","Day"),sep="-")%>%select(Year,Month)
Month_freq<-paste(Month_year$Year, sep= "-", Month_year$Month)%>%table()%>%length()
Menu_freq<-as.data.frame(table(KopiKenangan$Menu))
Menu_freq$Monthly.sales<-Menu_freq$Freq/Month_freq
Menu_freq
##                    Var1 Freq Monthly.sales
## 1             Cappucino  379      2.296970
## 2          Es Kopi Susu  375      2.272727
## 3     Hot Caramel Latte  372      2.254545
## 4         Hot Chocolate  393      2.381818
## 5  Hot Red Velvet Latte  346      2.096970
## 6         Ice Americano  323      1.957576
## 7      Ice Berry Coffee  328      1.987879
## 8        Ice Cafe Latte  383      2.321212
## 9     Ice Caramel Latte  336      2.036364
## 10   Ice Coffee Avocado  355      2.151515
## 11      Ice Coffee Lite  385      2.333333
## 12  Ice Matcha Espresso  342      2.072727
## 13     Ice Matcha Latte  345      2.090909
## 14 Ice Red Velvet Latte  338      2.048485