1 Your Exercise

In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:

  • Univariate variable (one dimension)
    • average
average<-function(x){
  sum(x)/length(x)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
average(x)
## [1] 10.5
  • middle_value
x<- c(2,3,4,5,6,3,2,3,4,5,9,80)
middle_value <- function(x)
{
  sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    mid <- sorted[c(floor(n/2),floor(n/2)+1)]
    med <- sum(mid)/2
  }else
  {med<-sorted[ceiling(n/2)]
      }
  return(med)
}
middle_value(x)
## [1] 4
  • most_frequent
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
Most_frequent <- function(x){
  y <- data.frame(table(x))
  y[y$Freq == max(y$Freq),1]
}
Most_frequent(x)
## [1] 3
## Levels: 2 3 4 5 6 9 80
  • max_value
max_value<-function(x)
{
  sorted<-sort(x)
  tail(sorted,1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
max_value(x)
## [1] 80
  • min_value
min_value<-function(x)
{
  sorted<-sort(x)
  head(sorted,1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
min_value(x)
## [1] 2
  • variance sample variance
variance_sample<-function(x)
{
  n<-length(x)
  (sum((x-mean(x))^2))/(n-1)
}

x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
variance_sample(x)
## [1] 482.8182

population variance

variance_population<-function(x)
{
  n<-length(x)
  sum((x-mean(x))^2)/n
}

x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
variance_population(x)
## [1] 442.5833
  • standard_deviation standard_deviation sample
standard_deviation_sample<-function(x)
{
  n<-length(x)
  sqrt((sum((x-mean(x))^2))/(n-1))
}

x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
standard_deviation_sample(x)
## [1] 21.97312

standard_deviation population

standard_deviation_population<-function(x)
{
  n<-length(x)
  sqrt((sum((x-mean(x))^2))/(n))
}

x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
standard_deviation_population(x)
## [1] 21.03766
  • Outliers
z<-c(2,3,4,5,6,3,2,3,4,5,80)
Outliers<-function(x){
  sorted<-sort(x)
  Q.1<-quantile(x,0.25)
  Q.3<-quantile(x,0.75)
  IQR<-Q.3-Q.1
  Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
  Outlier<-x[x<Gate[1]|x>Gate[2]]
  result<-paste("Outlier",sep = " = ", Outlier)
  return(result)
}
Outliers(z)
## [1] "Outlier = 80"
  • summary (all functions) - optional
x<-c(2,3,4,5,6,3,2,3,4,5,80)
summary<-function(x)
{
  n<-length(x)
  average      <-(sum(x)/n)
  middle_value <-ifelse(n%%2==0,             middle_value<-((sort(x))[n%%2]+(sort(x))[(n%%2)+1]/2),ifelse(n%%2==1,                   middle_value<-((sort(x))[n%%2])))
                        
  most_frequent <- unique(x)[which.max(tabulate(match(x,unique(x))))]
  max         <- tail(sort(x),1)
  min         <- head(sort(x),1)
  variance_s  <- sum((x-mean(x))^2)/(n-1)
  variance_p  <- sum((x-mean(x))^2)/n
  standar.dev_s <- sqrt(sum((x-mean(x))^2)/(n-1))
  standar.dev_p <- sqrt(sum((x-mean(x))^2)/n)
  Outliers    <- {
                  sorted<-sort(x)
                  Q.1<-quantile(x,0.25)
                  Q.3<-quantile(x,0.75)
                  IQR<-Q.3-Q.1
                  Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
                  result<-(which(x<Gate[1]|x>Gate[2]))
                  x[head(result)]
  }
 return(c(mean=average,
          med=middle_value,
          mode=most_frequent,
          max=max,
          min=min,
          var.s=variance_s,
          var.p=variance_p,
          stdev.s=standar.dev_s,
          stdev.p=standar.dev_p,
          Outlier=Outliers
          
          
          ))}
summary(x)
##      mean       med      mode       max       min     var.s     var.p   stdev.s 
##  10.63636   2.00000   3.00000  80.00000   2.00000 530.85455 482.59504  23.04028 
##   stdev.p   Outlier 
##  21.96805  80.00000
  • Multivariate variable (more dimension)

    • average
avarage_multi<-function(x,y)
{
  sum(x*y)/length(x)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
avarage_multi(x,y)
## [1] 24.83333
  • middle_value
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
middle_value <- function(x,y)
{
  sorted <- sort(list3)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    mid <- sorted[c(floor(n/2),floor(n/2)+1)]
    med <- sum(mid)/2
    }else
    {med<-sorted[ceiling(n/2)]}
  return(med)
}
middle_value(x,y)
## [1] 4
  • most_frequent
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
Most_frequent <- function(x,y){
  y <- data.frame(table(list3))
  y[y$Freq == max(y$Freq),1]
}
Most_frequent(x,y)
## [1] 4
## Levels: 2 3 4 5 9 70
  • max_value
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
max_value<-function(x,y)
{
  sorted<-sort(list3)
  tail(sorted,1)
}
max_value(x,y)
## [1] 70
  • min_value
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
min_value<-function(x,y)
{
  sorted<-sort(list3)
  head(sorted,1)
}
min_value(x,y)
## [1] 2
  • variance variance sample
variance_sample<-function(x,y)
{
  n<-length(list3)
  sum((list3-mean(list3))^2)/(n-1)
}

x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
variance_sample(x,y)
## [1] 255.1912

variance population

x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
variance_population<-function(x,y)
{
  n<-length(list3)
  sum((list3-mean(list3))^2)/n
}

variance_population(x,y)
## [1] 240.1799
  • standard_deviation standard_deviation sample
standar_deviation_sample<-function(x,y)
{
  list3<-rep(x,y)
  n<-sum(y)
  sqrt((sum((list3-mean(list3))^2))/(n-1))
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
standar_deviation_sample(x,y)
## [1] 15.9747

standard_deviation population

standar_deviation_population<-function(x,y)
{
  list3<-rep(x,y)
  n<-sum(y)
  sqrt((sum((list3-mean(list3))^2))/n)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
standar_deviation_population(x,y)
## [1] 15.49774
  • Outliers
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
Outliers<-function(x,y){
  list3<-rep(x,y)
  sorted<-sort(list3)
  n<-length(list3)
  Q.1<-quantile(sorted,0.25)
  Q.3<-quantile(sorted,0.75)
  IQR<-Q.3-Q.1
  Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
  Outlier<-x[x<Gate[1]|x>Gate[2]]
  result<-paste("Outlier",sep = " = ", Outlier)
  return(result)
}
Outliers(x,y)
## [1] "Outlier = 70"
  • summary (all functions) - optional
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
summary<-function(x,y)
{
  n<-length(list3)
  average      <-(sum(x*y)/n)
  middle_value <-ifelse(n%%2==0,             middle_value<-((sort(list3))[n%%2]+(sort(list3))[(n%%2)+1]/2),ifelse(n%%2==1,                   middle_value<-((sort(list3))[n%%2])))
                        
  most_frequent <- unique(list3)[which.max(tabulate(match(x,unique(list3))))]
  max         <- tail(sort(list3),1)
  min         <- head(sort(list3),1)
  variance_s  <- sum((list3-mean(list3))^2)/(n-1)
  variance_p  <- sum((list3-mean(list3))^2)/n
  standar.dev_s <- sqrt(sum((list3-mean(list3))^2)/(n-1))
  standar.dev_p <- sqrt(sum((list3-mean(list3))^2)/n)
  Outliers    <- {
                  sorted<-sort(list3)
                  Q.1<-quantile(list3,0.25)
                  Q.3<-quantile(list3,0.75)
                  IQR<-Q.3-Q.1
                  Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
                  result<-(which(x<Gate[1]|x>Gate[2]))
                  x[head(result)]
  }
 return(c(mean=average,
          med=middle_value,
          mode=most_frequent,
          max=max,
          min=min,
          var.s=variance_s,
          var.p=variance_p,
          stdev.s=standar.dev_s,
          stdev.p=standar.dev_p,
          Outlier=Outliers
          
          
          ))}
summary(x,y)
##       mean        med       mode        max        min      var.s      var.p 
##   8.764706   2.000000   2.000000  70.000000   2.000000 255.191176 240.179931 
##    stdev.s    stdev.p    Outlier 
##  15.974704  15.497740  70.000000
  • Simple Case Example
Id       <- (1:5000)
Date     <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)

Name     <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
               "Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
               5000, replace = T)

City     <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))

Outlet   <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)

Menu     <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
              "Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
              "Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price    <- sample(18000:45000,14, replace = T)
DFPrice  <- data.frame(Menu, Price)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Menu_Price <- left_join(data.frame(Menu = all_menu),DFPrice)
## Joining, by = "Menu"
KopiKenangan <- cbind(data.frame(Id,
                                 Date,
                                 Name,
                                 City,
                                 Outlet),
                                 Menu_Price)
head(KopiKenangan,5)
##   Id       Date   Name      City   Outlet             Menu Price
## 1  1 2018-01-01  Kefas Tangerang Outlet 2        Cappucino 27199
## 2  2 2018-01-02   Eric Tangerang Outlet 4 Ice Matcha Latte 34515
## 3  3 2018-01-03  Kevin     Depok Outlet 1        Cappucino 27199
## 4  4 2018-01-04 Fallen   Jakarta Outlet 4     Es Kopi Susu 25315
## 5  5 2018-01-05  Kevin    Bekasi Outlet 5  Ice Coffee Lite 38897

Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:

  • The percentage of sales for each city.
Percentage <- function(x){
  percent <- round(x*100, 1)
  result <- paste(percent, sep = "", "%")
  return(result)
}
City.Sales <- aggregate(Price ~ City, data = KopiKenangan, sum)
Total.Sales <- sum(City.Sales$Price)
City.Sales$Percentage.City.Sales <- Percentage(City.Sales$Price/Total.Sales)
City.Sales
##        City    Price Percentage.City.Sales
## 1    Bekasi 32889066                   20%
## 2     Bogor 32812928                   20%
## 3     Depok 32588191                 19.8%
## 4   Jakarta 33149242                 20.2%
## 5 Tangerang 32749937                 19.9%
  • The frequency of Name and Menu. Frequency of Name
Name_freq<-data.frame(table(KopiKenangan$Name))
Name_freq
##        Var1 Freq
## 1     Angel  267
## 2    Ardifo  264
## 3   Calisha  259
## 4      Eric  269
## 5    Fallen  245
## 6   Felisha  244
## 7     Irene  226
## 8     Jacob  240
## 9    Jeffry  240
## 10   Julian  266
## 11    Kefas  279
## 12    Kevin  238
## 13     Lala  243
## 14  Michael  245
## 15    Naomi  257
## 16   Nikita  244
## 17 Patricia  255
## 18   Sherly  238
## 19    Siana  257
## 20  Vanessa  224

Frequency of Menu

Menu_freq<-data.frame(table(KopiKenangan$Menu))
Menu_freq
##                    Var1 Freq
## 1             Cappucino  359
## 2          Es Kopi Susu  370
## 3     Hot Caramel Latte  352
## 4         Hot Chocolate  382
## 5  Hot Red Velvet Latte  353
## 6         Ice Americano  392
## 7      Ice Berry Coffee  325
## 8        Ice Cafe Latte  323
## 9     Ice Caramel Latte  343
## 10   Ice Coffee Avocado  342
## 11      Ice Coffee Lite  343
## 12  Ice Matcha Espresso  371
## 13     Ice Matcha Latte  392
## 14 Ice Red Velvet Latte  353
table(KopiKenangan$Name,KopiKenangan$Menu)
##           
##            Cappucino Es Kopi Susu Hot Caramel Latte Hot Chocolate
##   Angel           34           14                17            11
##   Ardifo          18           22                19            14
##   Calisha         16           18                16            19
##   Eric            19           20                17            14
##   Fallen          17           12                19            22
##   Felisha         20           25                20            24
##   Irene           10           14                17            20
##   Jacob           19           11                21            14
##   Jeffry          18           15                15            13
##   Julian          14           21                14            21
##   Kefas           14           23                24            20
##   Kevin           18           18                18            29
##   Lala            15           17                22            20
##   Michael         14           17                12            24
##   Naomi           16           25                15            16
##   Nikita          23           24                10            18
##   Patricia        21           18                22            20
##   Sherly          12           20                18            15
##   Siana           22           18                23            24
##   Vanessa         19           18                13            24
##           
##            Hot Red Velvet Latte Ice Americano Ice Berry Coffee Ice Cafe Latte
##   Angel                      18            23               14             21
##   Ardifo                     16            20               15             23
##   Calisha                    24            26               17             13
##   Eric                       19            17               13             21
##   Fallen                     15            27               11             14
##   Felisha                    13            19               13             17
##   Irene                      19            18               17             17
##   Jacob                      21            16               17             12
##   Jeffry                     18            20               17             20
##   Julian                     24            19               20             12
##   Kefas                      22            26               13             13
##   Kevin                      24            20               20             13
##   Lala                       19            22               18             14
##   Michael                    14            19               23             18
##   Naomi                      19            19               17             17
##   Nikita                     15            16               18             14
##   Patricia                   14            15               17             14
##   Sherly                     14            20               15             17
##   Siana                      14            19               17             19
##   Vanessa                    11            11               13             14
##           
##            Ice Caramel Latte Ice Coffee Avocado Ice Coffee Lite
##   Angel                   16                 14              15
##   Ardifo                  17                 21              15
##   Calisha                 17                 14              22
##   Eric                    22                 18              22
##   Fallen                  18                 16              16
##   Felisha                 16                 15              13
##   Irene                   17                 19              14
##   Jacob                   17                 16              23
##   Jeffry                  14                 15              16
##   Julian                  18                 18              26
##   Kefas                   21                 25              18
##   Kevin                   13                  8              13
##   Lala                    17                 21              15
##   Michael                 18                 10              21
##   Naomi                   16                 22              13
##   Nikita                  11                 20              14
##   Patricia                16                 32              17
##   Sherly                  24                 17              13
##   Siana                   15                 10              18
##   Vanessa                 20                 11              19
##           
##            Ice Matcha Espresso Ice Matcha Latte Ice Red Velvet Latte
##   Angel                     23               29                   18
##   Ardifo                    21               19                   24
##   Calisha                   21               24                   12
##   Eric                      19               29                   19
##   Fallen                    20               17                   21
##   Felisha                   16               16                   17
##   Irene                     17               19                    8
##   Jacob                     20               19                   14
##   Jeffry                    21               18                   20
##   Julian                    21               22                   16
##   Kefas                     22               17                   21
##   Kevin                     13               11                   20
##   Lala                      14               15                   14
##   Michael                   24               20                   11
##   Naomi                     14               24                   24
##   Nikita                    17               25                   19
##   Patricia                  14               20                   15
##   Sherly                    21               10                   22
##   Siana                     18               21                   19
##   Vanessa                   15               17                   19
  • The Average of monthy sales per-menu item.
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v stringr 1.4.0
## v tidyr   1.1.2     v forcats 0.5.0
## v readr   1.3.1
## -- Conflicts -------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
Month_year<-KopiKenangan%>%separate(Date,c("Year", "Month","Day"),sep="-")%>%select(Year,Month)
Month_freq<-paste(Month_year$Year, sep= "-", Month_year$Month)%>%table()%>%length()
Menu_freq<-as.data.frame(table(KopiKenangan$Menu))
Menu_freq$Monthly.sales<-round(Menu_freq$Freq/Month_freq,2)
Menu_freq
##                    Var1 Freq Monthly.sales
## 1             Cappucino  359          2.18
## 2          Es Kopi Susu  370          2.24
## 3     Hot Caramel Latte  352          2.13
## 4         Hot Chocolate  382          2.32
## 5  Hot Red Velvet Latte  353          2.14
## 6         Ice Americano  392          2.38
## 7      Ice Berry Coffee  325          1.97
## 8        Ice Cafe Latte  323          1.96
## 9     Ice Caramel Latte  343          2.08
## 10   Ice Coffee Avocado  342          2.07
## 11      Ice Coffee Lite  343          2.08
## 12  Ice Matcha Espresso  371          2.25
## 13     Ice Matcha Latte  392          2.38
## 14 Ice Red Velvet Latte  353          2.14