1 Your Exercise

In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:

  • Univariate variable (one dimension)
    • average
average<-function(x)
{
  sum(x)/length(x)
}
x<-c(2,4,6,8,10)
average(x)
## [1] 6
  • middle_value
middle_value  <- function(b)
{
  n<-length(x)
  sorted<-sort(x)
  if(n%%2==0)
    {
      Form <- n%/%2
      Middle_value <-((sorted[Form]+sorted[Form + 1])/2)
    }
  else if(n%%2==1)
    {
      Form <-(n+1)%/%2
      middle_value <- (sorted[Form])
    }
  return(middle_value)
middle_value
}
b<-c(1,2,3,4,5,6)
middle_value(b)
## [1] 6
  • most_frequent
most_frequent <- function(x)
{
  uniqv <- unique(x)
  uniqv[which.max(tabulate(match(x,uniqv)))]
}
x <- c(2,3,4,5,3,2,3,4,5,5,5,1)
result <- most_frequent(x)
print(result)
## [1] 5
  • max_value
max_value <- function (x)
{
  max(x)
}
x <- c(2,3,4,5,3,6)
result <- max_value(x)
print(result)
## [1] 6
  • min_value

  • variance

variance <- function(x)
{
 Var <- ((x-mean(x))^2/length(x))
 sum(Var)
}

x <- c(2,3,5,4,5)
result <- variance(x)
print(result)
## [1] 1.36

` - standard_deviation

standardeviasi<-function(g)
{
test_g1<-(g-(sum(g)/length(g)))^2
  sqrt(sum(test_g1)/(length(g)))    #For Sample Variance
  {
  
}
test_g2<-(g-(sum(g)/length(g)))^2
  sqrt(sum(test_g2)/((length(g))-1))#For Population Variance
}

g<-c(1,2,3,4,5,6)
standardeviasi(g)
## [1] 1.870829
  • Outliers
pencilan<-function(h)
{
  q1<-quantile(h)[2]
  q3<-quantile(h)[4]
  jarak<-q3-q1
  atas<-(jarak*1.5)+q3
  bawah<- q1-(jarak*1.5)
  result<- (which(h<bawah|h>atas))
  h[(result)]
}
h<-c(1,2,4,5,2,3,4,1,3,5)
pencilan(h)
## numeric(0)
  • Multivariate variable (more dimension)

    • average
average<-function(x)
{
  sum(j*k)/sum(k)
}
j<-c(2,4,6,8,10)
k<-c(2,3,2,4,1)
average(x)
## [1] 5.833333
  • middle_value
TitikTengah<- function(y,z) {
  n <- sum(z)

  if(n%%2==0)
    {
      bentuk<-n%/%2
      titiktengah<-((sort(rep.int(y,z))[bentuk]+sort(rep.int(y,z))[bentuk+1])/2)
    }
  else if(n%%2==1)
    {
      bentuk<-(n+1)%/%2
      titiktengah<-(sort(rep.int(y,z))[bentuk])
    }
  return(titiktengah)
titiktengah
  
}
y<-c(1,3,2,4)
z<-c(3,2,1,6)
TitikTengah(y,z)
## [1] 3.5
  • most_frequent
Modus <- function(x,y)
{
  jabar <- sort(rep.int(x,y))
  nilai <- unique(jabar)
  tab   <- (tabulate(match(jabar,nilai)))
  nilai[tab==max(tab)]
}
x<-c(1,2,5,3,4)
y<-c(10,3,12,14,2)
Modus(x,y)
## [1] 3
  • max_value
max_value <- function(x,y)
{
  sort <- sort(rep.int(x,y))
  tail(sort,1)
}

x <- c(2,5,4,3,6)
y <- c(2,4,3,5,1)
result <- max_value(x,y)
print(result)
## [1] 6
  • min_value
min_value <- function(x,y)
{
  sort <- sort(rep.int(x,y))
  head(sort,1)
}
x <- c(2,3,4,5,1)
y <- c(1,2,3,4,5)
result <- min_value(x,y)
print(result)
## [1] 1
  • variance
Variansi<-function(s,t)
{
  sum((sort(rep.int(s,t))-((sum(s*t))/(sum(t))))^2)/(sum(t)-1)
}
s<-c(1,2,3,4,5)
t<-c(2,2,2,2,2)
Variansi(s,t)
## [1] 2.222222
  • standard_deviation
StandarDeviasi<-function(u,v)
{
  sqrt(sum((sort(rep.int(u,v))-((sum(u*v))/(sum(v))))^2)/(sum(v)-1))
}
u<-c(1,2,3,4,5)
v<-c(2,2,2,2,2)
StandarDeviasi(u,v)
## [1] 1.490712
  • Outliers
Pencilan<- function(w,x){
  q1    <- quantile(sort(rep.int(w,x)))[2]
  q3    <- quantile(sort(rep.int(w,x)))[4]
  jarak <- q3 - q1
  atas  <- (jarak * 1.5)+ q3
  bawah <- q1 - (jarak * 1.5)
  result<- (which(sort(rep.int(w,x))<bawah|sort(rep.int(w,x))>atas))
  sort(rep.int(w,x))[head(result)]
}
w<-c(1,2,3,4,100,5)
x<-c(1,2,1,5,1,3)
Pencilan(w,x)
## [1] 100
  • summary (all functions) - optional

  • Simple Case Example

Id       <- (1:5000)
Date     <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)

Name     <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
               "Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
               5000, replace = T)

City     <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))

Outlet   <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)

Menu     <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
              "Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
              "Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price    <- sample(18000:45000,14, replace = T)
DFPrice  <- data.frame(Menu, Price)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Menu_Price <- left_join(data.frame(Menu = all_menu),DFPrice)
## Joining, by = "Menu"
KopiKenangan <- cbind(data.frame(Id,
                                 Date,
                                 Name,
                                 City,
                                 Outlet),
                                 Menu_Price)
head(KopiKenangan,5)
##   Id       Date    Name    City   Outlet                 Menu Price
## 1  1 2018-01-01   Angel   Bogor Outlet 2       Ice Cafe Latte 30361
## 2  2 2018-01-02   Angel   Depok Outlet 5   Ice Coffee Avocado 18905
## 3  3 2018-01-03   Kevin  Bekasi Outlet 4 Ice Red Velvet Latte 42205
## 4  4 2018-01-04   Kefas Jakarta Outlet 4        Hot Chocolate 42258
## 5  5 2018-01-05 Vanessa Jakarta Outlet 1 Ice Red Velvet Latte 42205

Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:

  • The Average of monthly sales per-menu item.

  • The frequency of Name and Menu.

  • The Average of daily sales per-menu item.