1 Your Exercise

In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:

  • Univariate variable (one dimension)
    • average
average<-function(x){
  sum(x)/length(x)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
average(x)
## [1] 10.5
  • middle_value
x<- c(2,3,4,5,6,3,2,3,4,5,9,80)
middle_value <- function(x)
{
  sorted <- sort(x)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    mid <- sorted[c(floor(n/2),floor(n/2)+1)]
    med <- sum(mid)/2
  }else
  {med<-sorted[ceiling(n/2)]
      }
  return(med)
}
middle_value(x)
## [1] 4
  • most_frequent
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
Most_frequent <- function(x){
  y <- data.frame(table(x))
  y[y$Freq == max(y$Freq),1]
}
Most_frequent(x)
## [1] 3
## Levels: 2 3 4 5 6 9 80
  • max_value
max_value<-function(x)
{
  sorted<-sort(x)
  tail(sorted,1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
max_value(x)
## [1] 80
  • min_value
min_value<-function(x)
{
  sorted<-sort(x)
  head(sorted,1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
min_value(x)
## [1] 2
  • variance sample variance
variance_sample<-function(x)
{
  n<-length(x)
  (sum((x-mean(x))^2))/(n-1)
}

x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
variance_sample(x)
## [1] 482.8182

population variance

variance_population<-function(x)
{
  n<-length(x)
  sum((x-mean(x))^2)/n
}

x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
variance_population(x)
## [1] 442.5833
  • standard_deviation standard_deviation sample
standard_deviation_sample<-function(x)
{
  n<-length(x)
  sqrt((sum((x-mean(x))^2))/(n-1))
}

x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
standard_deviation_sample(x)
## [1] 21.97312

standard_deviation population

standard_deviation_population<-function(x)
{
  n<-length(x)
  sqrt((sum((x-mean(x))^2))/(n))
}

x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
standard_deviation_population(x)
## [1] 21.03766
  • Outliers
z<-c(2,3,4,5,6,3,2,3,4,5,80)
Outliers<-function(x){
  sorted<-sort(x)
  Q.1<-quantile(x,0.25)
  Q.3<-quantile(x,0.75)
  IQR<-Q.3-Q.1
  Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
  Outlier<-x[x<Gate[1]|x>Gate[2]]
  result<-paste("Outlier",sep = " = ", Outlier)
  return(result)
}
Outliers(z)
## [1] "Outlier = 80"
  • summary (all functions) - optional
x<-c(2,3,4,5,6,3,2,3,4,5,80)
summary<-function(x)
{
  n<-length(x)
  average      <-(sum(x)/n)
  middle_value <-ifelse(n%%2==0,             middle_value<-((sort(x))[n%%2]+(sort(x))[(n%%2)+1]/2),ifelse(n%%2==1,                   middle_value<-((sort(x))[n%%2])))
                        
  most_frequent <- unique(x)[which.max(tabulate(match(x,unique(x))))]
  max         <- tail(sort(x),1)
  min         <- head(sort(x),1)
  variance_s  <- sum((x-mean(x))^2)/(n-1)
  variance_p  <- sum((x-mean(x))^2)/n
  standar.dev_s <- sqrt(sum((x-mean(x))^2)/(n-1))
  standar.dev_p <- sqrt(sum((x-mean(x))^2)/n)
  Outliers    <- {
                  sorted<-sort(x)
                  Q.1<-quantile(x,0.25)
                  Q.3<-quantile(x,0.75)
                  IQR<-Q.3-Q.1
                  Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
                  result<-(which(x<Gate[1]|x>Gate[2]))
                  x[head(result)]
  }
 return(c(mean=average,
          med=middle_value,
          mode=most_frequent,
          max=max,
          min=min,
          var.s=variance_s,
          var.p=variance_p,
          stdev.s=standar.dev_s,
          stdev.p=standar.dev_p,
          Outlier=Outliers
          
          
          ))}
summary(x)
##      mean       med      mode       max       min     var.s     var.p   stdev.s 
##  10.63636   2.00000   3.00000  80.00000   2.00000 530.85455 482.59504  23.04028 
##   stdev.p   Outlier 
##  21.96805  80.00000
  • Multivariate variable (more dimension)

    • average
avarage_multi<-function(x,y)
{
  sum(x*y)/length(x)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
avarage_multi(x,y)
## [1] 24.83333
  • middle_value
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
middle_value <- function(x,y)
{
  sorted <- sort(list3)
  n <- length(sorted)
  if(n %% 2 == 0)
    {
    mid <- sorted[c(floor(n/2),floor(n/2)+1)]
    med <- sum(mid)/2
    }else
    {med<-sorted[ceiling(n/2)]}
  return(med)
}
middle_value(x,y)
## [1] 4
  • most_frequent
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
Most_frequent <- function(x,y){
  y <- data.frame(table(list3))
  y[y$Freq == max(y$Freq),1]
}
Most_frequent(x,y)
## [1] 4
## Levels: 2 3 4 5 9 70
  • max_value
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
max_value<-function(x,y)
{
  sorted<-sort(list3)
  tail(sorted,1)
}
max_value(x,y)
## [1] 70
  • min_value
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
min_value<-function(x,y)
{
  sorted<-sort(list3)
  head(sorted,1)
}
min_value(x,y)
## [1] 2
  • variance variance sample
variance_sample<-function(x,y)
{
  n<-length(list3)
  sum((list3-mean(list3))^2)/(n-1)
}

x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
variance_sample(x,y)
## [1] 255.1912

variance population

x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
variance_population<-function(x,y)
{
  n<-length(list3)
  sum((list3-mean(list3))^2)/n
}

variance_population(x,y)
## [1] 240.1799
  • standard_deviation standard_deviation sample
standar_deviation_sample<-function(x,y)
{
  list3<-rep(x,y)
  n<-sum(y)
  sqrt((sum((list3-mean(list3))^2))/(n-1))
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
standar_deviation_sample(x,y)
## [1] 15.9747

standard_deviation population

standar_deviation_population<-function(x,y)
{
  list3<-rep(x,y)
  n<-sum(y)
  sqrt((sum((list3-mean(list3))^2))/n)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
standar_deviation_population(x,y)
## [1] 15.49774
  • Outliers
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
Outliers<-function(x,y){
  list3<-rep(x,y)
  sorted<-sort(list3)
  n<-length(list3)
  Q.1<-quantile(sorted,0.25)
  Q.3<-quantile(sorted,0.75)
  IQR<-Q.3-Q.1
  Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
  Outlier<-x[x<Gate[1]|x>Gate[2]]
  result<-paste("Outlier",sep = " = ", Outlier)
  return(result)
}
Outliers(x,y)
## [1] "Outlier = 70"
  • summary (all functions) - optional
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
summary<-function(x,y)
{
  n<-length(list3)
  average      <-(sum(x*y)/n)
  middle_value <-ifelse(n%%2==0,             middle_value<-((sort(list3))[n%%2]+(sort(list3))[(n%%2)+1]/2),ifelse(n%%2==1,                   middle_value<-((sort(list3))[n%%2])))
                        
  most_frequent <- unique(list3)[which.max(tabulate(match(x,unique(list3))))]
  max         <- tail(sort(list3),1)
  min         <- head(sort(list3),1)
  variance_s  <- sum((list3-mean(list3))^2)/(n-1)
  variance_p  <- sum((list3-mean(list3))^2)/n
  standar.dev_s <- sqrt(sum((list3-mean(list3))^2)/(n-1))
  standar.dev_p <- sqrt(sum((list3-mean(list3))^2)/n)
  Outliers    <- {
                  sorted<-sort(list3)
                  Q.1<-quantile(list3,0.25)
                  Q.3<-quantile(list3,0.75)
                  IQR<-Q.3-Q.1
                  Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
                  result<-(which(x<Gate[1]|x>Gate[2]))
                  x[head(result)]
  }
 return(c(mean=average,
          med=middle_value,
          mode=most_frequent,
          max=max,
          min=min,
          var.s=variance_s,
          var.p=variance_p,
          stdev.s=standar.dev_s,
          stdev.p=standar.dev_p,
          Outlier=Outliers
          
          
          ))}
summary(x,y)
##       mean        med       mode        max        min      var.s      var.p 
##   8.764706   2.000000   2.000000  70.000000   2.000000 255.191176 240.179931 
##    stdev.s    stdev.p    Outlier 
##  15.974704  15.497740  70.000000
  • Simple Case Example
Id       <- (1:5000)
Date     <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)

Name     <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
               "Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
               5000, replace = T)

City     <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))

Outlet   <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)

Menu     <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
              "Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
              "Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price    <- sample(18000:45000,14, replace = T)
DFPrice  <- data.frame(Menu, Price)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Menu_Price <- left_join(data.frame(Menu = all_menu),DFPrice)
## Joining, by = "Menu"
KopiKenangan <- cbind(data.frame(Id,
                                 Date,
                                 Name,
                                 City,
                                 Outlet),
                                 Menu_Price)
head(KopiKenangan,5)
##   Id       Date   Name      City   Outlet                 Menu Price
## 1  1 2018-01-01 Julian     Depok Outlet 2     Ice Matcha Latte 26084
## 2  2 2018-01-02  Naomi Tangerang Outlet 1 Ice Red Velvet Latte 23818
## 3  3 2018-01-03 Sherly     Bogor Outlet 5         Es Kopi Susu 38275
## 4  4 2018-01-04  Kefas   Jakarta Outlet 1      Ice Coffee Lite 21213
## 5  5 2018-01-05  Irene Tangerang Outlet 2 Ice Red Velvet Latte 23818

Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:

  • The percentage of sales for each city.
Percentage <- function(x){
  percent <- round(x*100, 1)
  result <- paste(percent, sep = "", "%")
  return(result)
}
City.Sales <- aggregate(Price ~ City, data = KopiKenangan, sum)
Total.Sales <- sum(City.Sales$Price)
City.Sales$Percentage.City.Sales <- Percentage(City.Sales$Price/Total.Sales)
City.Sales
##        City    Price Percentage.City.Sales
## 1    Bekasi 30822771                 20.2%
## 2     Bogor 30044515                 19.7%
## 3     Depok 30540690                 20.1%
## 4   Jakarta 30593100                 20.1%
## 5 Tangerang 30256179                 19.9%
  • The frequency of Name and Menu. Frequency of Name
Name_freq<-data.frame(table(KopiKenangan$Name))
Name_freq
##        Var1 Freq
## 1     Angel  248
## 2    Ardifo  241
## 3   Calisha  243
## 4      Eric  241
## 5    Fallen  225
## 6   Felisha  249
## 7     Irene  257
## 8     Jacob  254
## 9    Jeffry  266
## 10   Julian  255
## 11    Kefas  255
## 12    Kevin  248
## 13     Lala  257
## 14  Michael  252
## 15    Naomi  250
## 16   Nikita  251
## 17 Patricia  248
## 18   Sherly  250
## 19    Siana  246
## 20  Vanessa  264

Frequency of Menu

Menu_freq<-data.frame(table(KopiKenangan$Menu))
Menu_freq
##                    Var1 Freq
## 1             Cappucino  339
## 2          Es Kopi Susu  364
## 3     Hot Caramel Latte  353
## 4         Hot Chocolate  361
## 5  Hot Red Velvet Latte  385
## 6         Ice Americano  357
## 7      Ice Berry Coffee  346
## 8        Ice Cafe Latte  370
## 9     Ice Caramel Latte  351
## 10   Ice Coffee Avocado  339
## 11      Ice Coffee Lite  371
## 12  Ice Matcha Espresso  337
## 13     Ice Matcha Latte  360
## 14 Ice Red Velvet Latte  367
table(KopiKenangan$Name,KopiKenangan$Menu)
##           
##            Cappucino Es Kopi Susu Hot Caramel Latte Hot Chocolate
##   Angel           16           13                18            24
##   Ardifo          24           11                20            21
##   Calisha         19           19                22            17
##   Eric            13           15                10            17
##   Fallen          13           14                17            17
##   Felisha         17           20                20            23
##   Irene           16           17                14            15
##   Jacob           14           18                21            18
##   Jeffry          19           18                19            20
##   Julian          25           29                20            16
##   Kefas           17           21                20            16
##   Kevin           16           16                21            20
##   Lala            22           15                19            12
##   Michael         20           20                11            18
##   Naomi           13           22                16            15
##   Nikita          19           21                21            17
##   Patricia        12           13                13             8
##   Sherly          13           17                20            20
##   Siana           11           23                12            17
##   Vanessa         20           22                19            30
##           
##            Hot Red Velvet Latte Ice Americano Ice Berry Coffee Ice Cafe Latte
##   Angel                      19            20               20             19
##   Ardifo                     20            18               22             20
##   Calisha                    17            12               17             12
##   Eric                       26            18               11             19
##   Fallen                     17            17               14              9
##   Felisha                    17            18               14             16
##   Irene                      19            24               12             20
##   Jacob                      21            17               21             20
##   Jeffry                     14            22               20             26
##   Julian                     15            18               17             15
##   Kefas                      25            23               16             12
##   Kevin                      22            13               14             22
##   Lala                       21            19               17             12
##   Michael                    18            21               10             17
##   Naomi                      24            16               22             23
##   Nikita                     18            16               20             18
##   Patricia                   16            26               28             26
##   Sherly                     19            11               17             24
##   Siana                      19            16               20             24
##   Vanessa                    18            12               14             16
##           
##            Ice Caramel Latte Ice Coffee Avocado Ice Coffee Lite
##   Angel                   19                 17              15
##   Ardifo                  16                 16              12
##   Calisha                 15                 17              20
##   Eric                    18                 20              11
##   Fallen                  18                 13              20
##   Felisha                 23                  9              26
##   Irene                   23                 18              21
##   Jacob                   23                 22              17
##   Jeffry                  18                 17              22
##   Julian                  13                 13              21
##   Kefas                   20                 17              19
##   Kevin                   16                 27              15
##   Lala                    12                 23              23
##   Michael                 20                 20              14
##   Naomi                   16                 16              23
##   Nikita                  15                 11              17
##   Patricia                22                 14              23
##   Sherly                  17                 15              17
##   Siana                   15                 16              13
##   Vanessa                 12                 18              22
##           
##            Ice Matcha Espresso Ice Matcha Latte Ice Red Velvet Latte
##   Angel                     16               15                   17
##   Ardifo                    13               15                   13
##   Calisha                   19               16                   21
##   Eric                      25               20                   18
##   Fallen                    16               17                   23
##   Felisha                   13               17                   16
##   Irene                     14               20                   24
##   Jacob                     20               13                    9
##   Jeffry                     9               17                   25
##   Julian                    17               21                   15
##   Kefas                     15               18                   16
##   Kevin                     15               19                   12
##   Lala                      23               16                   23
##   Michael                   15               22                   26
##   Naomi                     14               17                   13
##   Nikita                    24               14                   20
##   Patricia                  15               17                   15
##   Sherly                    17               18                   25
##   Siana                     15               26                   19
##   Vanessa                   22               22                   17
  • The Average of monthy sales per-menu item.
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v stringr 1.4.0
## v tidyr   1.1.2     v forcats 0.5.0
## v readr   1.3.1
## -- Conflicts -------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
Month_year<-KopiKenangan%>%separate(Date,c("Year", "Month","Day"),sep="-")%>%select(Year,Month)
Month_freq<-paste(Month_year$Year, sep= "-", Month_year$Month)%>%table()%>%length()
Menu_freq<-as.data.frame(table(KopiKenangan$Menu))
Menu_freq$Monthly.sales<-Menu_freq$Freq/Month_freq
Menu_freq
##                    Var1 Freq Monthly.sales
## 1             Cappucino  339      2.054545
## 2          Es Kopi Susu  364      2.206061
## 3     Hot Caramel Latte  353      2.139394
## 4         Hot Chocolate  361      2.187879
## 5  Hot Red Velvet Latte  385      2.333333
## 6         Ice Americano  357      2.163636
## 7      Ice Berry Coffee  346      2.096970
## 8        Ice Cafe Latte  370      2.242424
## 9     Ice Caramel Latte  351      2.127273
## 10   Ice Coffee Avocado  339      2.054545
## 11      Ice Coffee Lite  371      2.248485
## 12  Ice Matcha Espresso  337      2.042424
## 13     Ice Matcha Latte  360      2.181818
## 14 Ice Red Velvet Latte  367      2.224242