1 Your Exercise

In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:

  • Univariate variable (one dimension)
    • average
ratarata    <-function(a)
{
  (sum(a)/length(a))
}

a<-c(1,2,3,4,5,6,7)
ratarata(a)
## [1] 4
  • middle_value
titiktengah<-function(b)
{
  n<-length(b)
  sortedb<-sort(b)
  if(n%%2==0)
    {
      bentuk<-n%/%2
      titiktengah<-((sortedb[bentuk]+sortedb[bentuk+1])/2)
    }
  else if(n%%2==1)
    {
      bentuk<-(n+1)%/%2
      titiktengah<-(sortedb[bentuk])
    }
  return(titiktengah)
titiktengah
}
b<-c(1,2,3,4,5,6)
titiktengah(b)
## [1] 3.5
  • most_frequent
modus<-function(c)
{
  unique(c)[which.max(tabulate(match(c,unique(c))))]
}
c<-c(1,2,3,4,5,5,6,7,7,7,8,9)
modus(c)
## [1] 7
  • max_value
maksimum<-function(d)
{
  tail(sort(d),1)
}
d<-c(10,3,6,8,2,1,6)
maksimum (d)
## [1] 10
  • min_value
minimum<-function(e)
{
  head(sort(e),1)
}
e<-c(3,5,9,8,12,14,16)
minimum (e)
## [1] 3
  • variance
varians<-function(fs)
{
test_f1<-(f-(sum(f)/length(f)))^2
  sum(test_f1)/(length(f))          #For Sample Variance
}

f<-c(1,2,3,4,5,6)
varians(f)
## [1] 2.916667
  • standard_deviation
standardeviasi<-function(g)
{
test_g1<-(g-(sum(g)/length(g)))^2
  sqrt(sum(test_g1)/(length(g)))    #For Sample Variance
}

g<-c(1,2,3,4,5,6)
standardeviasi(g)
## [1] 1.707825
  • Outliers
pencilan<-function(h)
{
  q1<-quantile(h)[2]
  q3<-quantile(h)[4]
  jarak<-q3-q1
  atas<-(jarak*1.5)+q3
  bawah<- q1-(jarak*1.5)
  result<- (which(h<bawah|h>atas))
  h[(result)]
}
h<-c(1,2,3,4,9,7,5,3,30,50,1,6)
pencilan(h)
## [1] 30 50
  • summary
summary1<- function(i){
  ratarata <- round((sum(i)/length(i)),digits=2)
  titiktengah <- ifelse(length(i)%%2==0, middle_value <- (((sort(i))[length(i)%/%2]+(sort(i))[(length(i)%/%2)+1])/2),
                         ifelse(length(i)%%2==1, middle_value <- ((sort(i))[length(i)%/%2])))
  modus<- { unique(i)[which.max(tabulate(match(i,unique(i))))]}
  maksimum <- tail(sort(i),n = 1)
  minimum <- head(sort(i),n = 1)
  variansiS <- round((sum((i-(sum(i)/length(i)))^2)/(length(i)-1)),digits=2)
  variansiP <- round((sum((i-(sum(i)/length(i)))^2)/length(i)),digits=2)
  standarS <- round(sqrt((sum((i-(sum(i)/length(i)))^2)/(length(i)-1))),digits=2)
  standarP <- round(sqrt((sum((i-(sum(i)/length(i)))^2)/length(i))),digits=2)
  pencilan <- {
    q1 <- quantile(i)[2]
    q3 <- quantile(i)[4]
    jarak <- q3 - q1
    atas <- (jarak * 1.5)+ q3
    bawah <- q1 - (jarak * 1.5)
    result <-(which(i<bawah|i>atas))
    i[head(result)]}
  
return(c(ratarata=ratarata,
         titiktengah=titiktengah,
         modus=modus,
         maksimum=maksimum,
         minimum=minimum,
         variansiS=variansiS,
         standarS=standarS,
         pencilan=pencilan))
}
i<-c(1,1,2,5,3,5,4,70,6,7)
summary1(i)
##    ratarata titiktengah       modus    maksimum     minimum   variansiS 
##       10.40        4.50        1.00       70.00        1.00      442.71 
##    standarS    pencilan 
##       21.04       70.00
  • Multivariate variable (more dimension) #Suppose x1=value x2=freq
    • average
RataRata<-function(j,k)
{
sum(j*k)/sum(k)
}
j<-c(1,2,3,4,5)
k<-c(5,5,5,5,5)
RataRata(j,k)
## [1] 3
  • middle_value
TitikTengah<- function(l,m) {
  n <- sum(m)

  if(n%%2==0)
    {
      bentuk<-n%/%2
      TitikTengah<-((sort(rep.int(l,m))[bentuk]+sort(rep.int(l,m))[bentuk+1])/2)
    }
  else if(n%%2==1)
    {
      bentuk<-(n+1)%/%2
      TitikTengah<-(sort(rep.int(l,m))[bentuk])
    }
  return(TitikTengah)
TitikTengah
  
}
l<-c(1,3,2,4)
m<-c(4,4,4,4)
TitikTengah(l,m)
## [1] 2.5
  • most_frequent
Modus <- function(o,p)
{
  data  <- sort(rep.int(o,p))
  nilai <- unique(data)
  tab   <- (tabulate(match(data,nilai)))
  nilai[tab==max(tab)]
}
o<-c(1,2,5,3,4)
p<-c(10,3,12,14,2)
Modus(o,p)
## [1] 3
  • max_value
Maksimum<-function(q,r)
{
  sortqr<-sort(rep.int(q,r))
  tail(sortqr,1)
}
q<-c(2,2,3,40,10)
r<-c(3,3,3,3,3)
Maksimum(q,r)
## [1] 40
  • min_value
Minimum<-function(s,t)
{
  sortqr<-sort(rep.int(s,t))
  head(s,1)
}
s<-c(1,2,3,4,10)
t<-c(3,3,3,3,3)
Minimum(s,t)
## [1] 1
  • variance
Varians<-function(u,v)
{
  sum((sort(rep.int(u,v))-((sum(u*v))/(sum(v))))^2)/(sum(v)-1)
}
u<-c(1,2,3,4,5)
v<-c(2,2,2,2,2)
Varians(u,v)
## [1] 2.222222
  • standard_deviation
StandarDeviasi<-function(w,x)
{
  sqrt(sum((sort(rep.int(w,x))-((sum(w*x))/(sum(x))))^2)/(sum(x)-1))
}
w<-c(1,2,3,4,5)
x<-c(2,2,2,2,2)
StandarDeviasi(w,x)
## [1] 1.490712
  • Outliers
Pencilan<- function(y,z){
  q1    <- quantile(sort(rep.int(y,z)))[2]
  q3    <- quantile(sort(rep.int(y,z)))[4]
  jarak <- q3 - q1
  atas  <- (jarak * 1.5)+ q3
  bawah <- q1 - (jarak * 1.5)
  result<- (which(sort(rep.int(w,x))<bawah|sort(rep.int(y,z))>atas))
  sort(rep.int(y,z))[head(result)]
}
y<-c(1,2,3,4,100,5)
z<-c(1,2,1,5,1,3)
Pencilan(y,z)
## Warning in sort(rep.int(w, x)) < bawah | sort(rep.int(y, z)) > atas: longer
## object length is not a multiple of shorter object length
## [1] 100
  • summary
summary2<-function(x1,x2)
{
  RataRata      <-sum(x1*x2)/sum(x2)
  
  TitikTengah   <-{
                   n<-sum(x2)
                    if(n%%2==0)
                      {
                       bentuk<-n%/%2
                       TitikTengah<-((sort(rep.int(x1,x2))[bentuk]+sort(rep.int(x1,x2))[bentuk+1])/2)
                      }
                    else if(n%%2==1)
                      {
                       bentuk<-(n+1)%/%2
                       TitikTengah<-(sort(rep.int(x1,x2))[bentuk])
                      }
                    return(TitikTengah)
                  }
  
  Modus         <-{
                   data  <- sort(rep.int(x1,x2))
                   nilai <- unique(data)
                   tab   <- (tabulate(match(data,nilai)))
                   nilai[tab==max(tab)]
                  }
  
  Maksimum      <-{
                   sortqr<-sort(rep.int(x1,x2))
                   tail(sortqr,1)
                  }
  
  Minimum       <-{
                   sortqr<-sort(rep.int(x1,x2))
                   head(x1,1)
                  }
  
  Varians       <-{
                   sum((sort(rep.int(x1,x2))-((sum(x1*x2))/(sum(x2))))^2)/(sum(x2)-1)
                  }
  
  StandarDeviasi<-{
                   sqrt(sum((sort(rep.int(x1,x2))-((sum(x1*x2))/(sum(x2))))^2)/(sum(x2)-1))
                  }
  
  Pencilan      <-{
                   q1    <- quantile(sort(rep.int(x1,x2)))[2]
                   q3    <- quantile(sort(rep.int(x1,x2)))[4]
                   jarak <- q3 - q1
                   atas  <- (jarak * 1.5)+ q3
                   bawah <- q1 - (jarak * 1.5)
                   result<- (which(sort(rep.int(x1,x2))<bawah|sort(rep.int(x1,x2))>atas))
                   sort(rep.int(x1,x2))[head(result)]
                  }

return(c(RataRata=RataRata,
         TitikTengah=TitikTengah,
         Modus=Modus,
         Maksimum=Maksimum,
         Minimum=Minimum,
         Varians=Varians,
         StandarDeviasi=StandarDeviasi,
         Pencilan=Pencilan))    
}
x1<-c(1,2,3,4,5,6)
x2<-c(2,2,2,2,2,2)

summary2(x1,x2)
## [1] 3.5
  • Simple Case Example
Id       <- (1:5000)
Date     <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)

Name     <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
               "Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
               5000, replace = T)

City     <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))

Outlet   <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)

Menu     <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
              "Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
              "Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price    <- sample(18000:45000,14, replace = T)
DFPrice  <- data.frame(Menu, Price)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Menu_Price <- left_join(data.frame(Menu = all_menu),DFPrice)
## Joining, by = "Menu"
KopiKenangan <- cbind(data.frame(Id,
                                 Date,
                                 Name,
                                 City,
                                 Outlet),
                                 Menu_Price)
head(KopiKenangan,5)
##   Id       Date    Name      City   Outlet                Menu Price
## 1  1 2018-01-01 Michael     Depok Outlet 4   Hot Caramel Latte 41614
## 2  2 2018-01-02   Naomi    Bekasi Outlet 5 Ice Matcha Espresso 38289
## 3  3 2018-01-03   Irene Tangerang Outlet 2      Ice Cafe Latte 33846
## 4  4 2018-01-04    Eric     Bogor Outlet 1    Ice Matcha Latte 19944
## 5  5 2018-01-05    Lala     Bogor Outlet 4       Hot Chocolate 33679

Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:

  • The percentage of sales for each city.
  • The frequency of Name and Menu.
  • The Average of daily sales per-menu item.