In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:
## [1] 17.11765
middle_value <- function(x){
n <- length(x)
urutan <- sort(x)
if(n%%2==0){
bentuk <- n%/%2
middle_value <- (urutan[bentuk]+urutan[bentuk+1])/2}
else if(n%%2==1){
bentuk <- (n+1)%/%2
middle_value <- (urutan[bentuk])}
return(middle_value)}
middle_value(x)## [1] 6
most_frequent <- function(x){
nilai<- unique(x)
tab <- (tabulate(match(x,nilai)))
nilai[tab==max(tab)]
}
most_frequent(x)## [1] 8
## [1] 112
## [1] 1
variance_sample <- function(x) {
data_x <- (x-(sum(x)/length(x)))^2
sum(data_x)/(length(x)-1)
}
variance_sample(x) #kalau pake var biasa, hasilnya sama dengan ini## [1] 1119.485
#or
variance_populasi <- function(x) {
data_x <- (x-(sum(x)/length(x)))^2
sum(data_x)/length(x)
}
variance_populasi(x)## [1] 1053.633
standard_deviation_sample <- function(x) {
data_x <- (x-(sum(x)/length(x)))^2
sqrt(sum(data_x)/(length(x)-1))
}
standard_deviation_sample(x) ## [1] 33.45871
#or
standard_deviation_population <- function(x) {
data_x <- (x-(sum(x)/length(x)))^2
sqrt(sum(data_x)/length(x))
}
standard_deviation_population(x)## [1] 32.45972
outlier<- function(x){
Q1 <- quantile(x)[2]
Q3 <- quantile(x)[4]
JK <- Q3 - Q1
batas_atas <- (JK * 1.5)+ Q3
batas_bawah <- Q1 - (JK * 1.5)
result <- (which(x < batas_bawah | x > batas_atas))
x[head(result)]
}
outlier(x)## [1] 99 112
summary1<- function(x){
average <- round((sum(x)/length(x)),digits=2)
middle_value <- ifelse(length(x)%%2==0, middle_value <- (((sort(x))[length(x)%/%2]+(sort(x))[(length(x)%/%2)+1])/2),
ifelse(length(x)%%2==1, middle_value <- ((sort(x))[length(x)%/%2])))
most_frequent<- {nilai <- unique(x)
tab<- (tabulate(match(x,unique(x))))
nilai[tab==max(tab)]}
max_value <- tail(sort(x),n = 1)
min_value <- head(sort(x),n = 1)
variance_sample <- round((sum((x-(sum(x)/length(x)))^2)/(length(x)-1)),digits=2)
variance_population <- round((sum((x-(sum(x)/length(x)))^2)/length(x)),digits=2)
standard_deviation_sample <- round(sqrt((sum((x-(sum(x)/length(x)))^2)/(length(x)-1))),digits=2)
standard_deviation_population <- round(sqrt((sum((x-(sum(x)/length(x)))^2)/length(x))),digits=2)
outlier <- {
Q1 <- quantile(x)[2]
Q3 <- quantile(x)[4]
JK <- Q3 - Q1
batas_atas <- (JK * 1.5)+ Q3
batas_bawah <- Q1 - (JK * 1.5)
result <- (which(x < batas_bawah | x > batas_atas))
x[head(result)]}
return(c(average=average,
med= middle_value,
modus=most_frequent,
max=max_value,
min=min_value,
var.s=variance_sample,
var.p=variance_population,
std.s=standard_deviation_sample,
std.p=standard_deviation_population,
outlier=outlier))
}
summary1(x)## average med modus max min var.s var.p std.s
## 17.12 5.00 8.00 112.00 1.00 1119.49 1053.63 33.46
## std.p outlier1 outlier2
## 32.46 99.00 112.00
## [1] 5.117647
middle_value_m <- function(y,z) {
ttlfreq <- sum(z)
datautama <- sort(rep.int(y,z))
ifelse(ttlfreq%%2==0, middle_value_m <- ((datautama[ttlfreq%/%2]/2) + (datautama[ttlfreq%/%2]+1)/2),
ifelse(ttlfreq%%2==1, middle_value_m <- ((datautama[ttlfreq%/%2]))))
return(middle_value_m)
}
middle_value_m(y,freq)## [1] 4
most_frequent_m <- function(y,z){
jabar <- sort(rep.int(y,freq))
nilai <- unique(jabar)
tab <- (tabulate(match(jabar,nilai)))
nilai[tab==max(tab)]
}
most_frequent_m(y,freq)## [1] 4
## [1] 30
## [1] 1
#Variansi Sample
variance_sample_s <- function(y,z) {
rata <- ((sum(y*z))/(sum(z)))
yjabar <- sort(rep.int(y,z))
data_y <- ((yjabar-rata)^2)
(sum(data_y))/((sum(z))-1)
}
variance_sample_s(y,freq) ## [1] 43.61029
#Variansi Populasi
variance_sample_p <- function(y,z) {
rata <- ((sum(y*z))/(sum(z)))
yjabar <- sort(rep.int(y,z))
data_y <- ((yjabar-rata)^2)
(sum(data_y))/(sum(z))
}
variance_sample_p(y,freq)## [1] 41.04498
#Standar Deviasi Sample
std_s <- function(y,z) {
rata <- ((sum(y*z))/(sum(z)))
yjabar <- sort(rep.int(y,z))
data_y <- ((yjabar-rata)^2)
sqrt((sum(data_y))/((sum(z))-1))
}
std_s(y,freq) ## [1] 6.603809
#Standar Deviasi Populasi
std_p <- function(y,z) {
rata <- ((sum(y*z))/(sum(z)))
yjabar <- sort(rep.int(y,z))
data_y <- ((yjabar-rata)^2)
sqrt((sum(data_y))/(sum(z)))
}
std_p(y,freq) ## [1] 6.406636
outlier2<- function(y,z){
jabar <- sort(rep.int(y,z))
Q1 <- quantile(jabar)[2]
Q3 <- quantile(jabar)[4]
JK <- Q3 - Q1
batas_atas <- (JK * 1.5)+ Q3
batas_bawah <- Q1 - (JK * 1.5)
result <- (which(jabar < batas_bawah | jabar > batas_atas))
jabar[head(result)]
}
outlier2(y,freq)## [1] 30
summary2 <- function(y,z){
average_m <- (sum(y*z))/(sum(z))
middle_value_m <- { ttlfreq <- sum(z)
datautama <- sort(rep.int(y,z))
ifelse(ttlfreq%%2==0,
middle_value_m <- ((datautama[ttlfreq%/%2]/2)
+ (datautama[ttlfreq%/%2]+1)/2),
ifelse(ttlfreq%%2==1,
middle_value_m <- ((datautama[ttlfreq%/%2]))))}
most_frequent_m <- { jabar <- sort(rep.int(y,z))
nilai <- unique(jabar)
tab <-tabulate(match(jabar,nilai))
nilai[tab==max(tab)]}
max_value_m <- { urut<-(sort(y,decreasing=F))
tail(urut,n=1)}
min_value_m <- {urut<-(sort(y,decreasing=F))
head(urut, n=1)
}
variance_sample_s <- {rata <- ((sum(y*z))/(sum(z)))
yjabar <- sort(rep.int(y,z))
data_y <- ((yjabar-rata)^2)
(sum(data_y))/((sum(z))-1) }
variance_sample_p <- {rata <- ((sum(y*z))/(sum(z)))
yjabar <- sort(rep.int(y,z))
data_y <- ((yjabar-rata)^2)
(sum(data_y))/(sum(z)) }
std_s <- {rata <- ((sum(y*z))/(sum(z)))
yjabar <- sort(rep.int(y,z))
data_y <- ((yjabar-rata)^2)
sqrt(sum(data_y))/((sum(z))-1) }
std_p <- {rata <- ((sum(y*z))/(sum(z)))
yjabar <- sort(rep.int(y,z))
data_y <- ((yjabar-rata)^2)
sqrt(sum(data_y))/(sum(z)) }
outlier2 <- { jabar <- sort(rep.int(y,z))
Q1 <- quantile(jabar)[2]
Q3 <- quantile(jabar)[4]
JK <- Q3 - Q1
batas_atas <- (JK * 1.5)+ Q3
batas_bawah <- Q1 - (JK * 1.5)
result <- (which(jabar < batas_bawah | jabar > batas_atas))
jabar[head(result)] }
return(c(rata2= average_m,
mid= middle_value_m,
modus= most_frequent_m,
max= max_value_m,
min= min_value_m,
var.s= variance_sample_s,
var.p= variance_sample_p,
std.s= std_s,
std.p= std_p,
outlier= outlier2))
}
summary2(y,freq)## rata2 mid modus max min var.s var.p std.s
## 5.117647 4.000000 4.000000 30.000000 1.000000 43.610294 41.044983 1.650952
## std.p outlier
## 1.553837 30.000000
Id <- (1:5000)
Date <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)
Name <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
"Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
5000, replace = T)
City <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))
Outlet <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)
Menu <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
"Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
"Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price <- sample(18000:45000,14, replace = T)
DFPrice <- data.frame(Menu, Price)
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Joining, by = "Menu"
## Id Date Name City Outlet Menu Price
## 1 1 2018-01-01 Angel Bekasi Outlet 5 Hot Chocolate 22097
## 2 2 2018-01-02 Eric Depok Outlet 2 Hot Chocolate 22097
## 3 3 2018-01-03 Irene Depok Outlet 5 Ice Coffee Lite 44170
## 4 4 2018-01-04 Nikita Tangerang Outlet 1 Ice Cafe Latte 37304
## 5 5 2018-01-05 Angel Bogor Outlet 3 Ice Berry Coffee 39088
Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:
income_kota <- data.frame(aggregate(KopiKenangan$Price, by=list(city = KopiKenangan$City), FUN=sum))
x <- income_kota$x
y <- income_kota$city
percent_sales <- function(x,y){
percentage <- round((x*100)/(sum(x)), digits = 2)
result <- paste(percentage, sep = "", "%")
hasilakhir <- data.frame(y,x,result)
return(hasilakhir)
}
percent_sales(x,y)## y x result
## 1 Bekasi 34059579 19.94%
## 2 Bogor 34463969 20.18%
## 3 Depok 33932569 19.87%
## 4 Jakarta 34054383 19.94%
## 5 Tangerang 34301298 20.08%
x<- KopiKenangan$Menu
y<- KopiKenangan$Name
freq_name_menu <- function(x,y){
table(x,y)
}
freq_name_menu(x,y)## y
## x Angel Ardifo Calisha Eric Fallen Felisha Irene Jacob
## Cappucino 15 19 14 23 16 17 19 17
## Es Kopi Susu 12 15 22 23 23 19 18 17
## Hot Caramel Latte 29 25 11 22 16 16 19 10
## Hot Chocolate 16 16 21 16 17 14 15 17
## Hot Red Velvet Latte 13 27 23 23 18 21 14 16
## Ice Americano 15 18 15 15 15 21 15 11
## Ice Berry Coffee 20 11 13 21 19 18 21 12
## Ice Cafe Latte 19 20 14 13 24 17 13 28
## Ice Caramel Latte 26 20 19 23 22 28 14 22
## Ice Coffee Avocado 19 23 16 13 16 15 17 18
## Ice Coffee Lite 21 10 18 14 15 24 17 13
## Ice Matcha Espresso 22 11 21 17 20 20 18 25
## Ice Matcha Latte 19 16 23 20 25 12 13 14
## Ice Red Velvet Latte 25 15 20 15 21 16 12 21
## y
## x Jeffry Julian Kefas Kevin Lala Michael Naomi Nikita
## Cappucino 20 23 22 24 17 19 20 22
## Es Kopi Susu 18 25 22 14 22 17 16 21
## Hot Caramel Latte 23 16 19 17 14 23 14 17
## Hot Chocolate 23 14 12 14 16 18 20 20
## Hot Red Velvet Latte 27 15 17 17 18 16 13 14
## Ice Americano 22 11 16 15 13 17 14 12
## Ice Berry Coffee 11 11 11 20 19 19 17 14
## Ice Cafe Latte 28 15 14 14 9 10 14 17
## Ice Caramel Latte 15 13 15 18 17 18 25 27
## Ice Coffee Avocado 14 18 23 19 17 18 21 24
## Ice Coffee Lite 11 17 16 17 21 12 12 19
## Ice Matcha Espresso 27 20 17 14 22 18 17 20
## Ice Matcha Latte 23 15 12 15 18 12 19 14
## Ice Red Velvet Latte 15 17 13 25 19 18 15 16
## y
## x Patricia Sherly Siana Vanessa
## Cappucino 20 20 24 29
## Es Kopi Susu 20 25 28 16
## Hot Caramel Latte 21 18 22 17
## Hot Chocolate 16 14 11 21
## Hot Red Velvet Latte 14 21 19 16
## Ice Americano 9 19 15 21
## Ice Berry Coffee 16 17 20 24
## Ice Cafe Latte 21 13 21 11
## Ice Caramel Latte 22 13 14 18
## Ice Coffee Avocado 19 18 14 18
## Ice Coffee Lite 20 17 20 21
## Ice Matcha Espresso 13 19 24 21
## Ice Matcha Latte 22 14 21 12
## Ice Red Velvet Latte 20 15 18 22
average_monthly <- function(x){
library(sparklyr)
library(dplyr)
library(tidyverse)
sepdate <- x%>%
separate(Date, c("year","month","day"), sep="-")
totalbulan<-length(table(sepdate$month,sepdate$year))
sepdate1<- within(sepdate,Date <- sprintf("%s-%02s", year, month))
data1<- data.frame(table(sepdate1$Date,sepdate1$Menu))
data_2 <- data.frame(table(sepdate1$Menu))
data_3 <- round( data_2$Freq/totalbulan , digits = 2)
data_2$Monthly_average <- data_3
data_2%>%rename("Menu" = "Var1",
"Terjual"="Freq",
"Monthly_Average"="Monthly_average")
data_2
}
x <- KopiKenangan
average_monthly(x)## -- Attaching packages --------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v stringr 1.4.0
## v tidyr 1.1.2 v forcats 0.5.0
## v readr 1.3.1
## -- Conflicts ------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x purrr::invoke() masks sparklyr::invoke()
## x dplyr::lag() masks stats::lag()
## Var1 Freq Monthly_average
## 1 Cappucino 400 2.38
## 2 Es Kopi Susu 393 2.34
## 3 Hot Caramel Latte 369 2.20
## 4 Hot Chocolate 331 1.97
## 5 Hot Red Velvet Latte 362 2.15
## 6 Ice Americano 309 1.84
## 7 Ice Berry Coffee 334 1.99
## 8 Ice Cafe Latte 335 1.99
## 9 Ice Caramel Latte 389 2.32
## 10 Ice Coffee Avocado 360 2.14
## 11 Ice Coffee Lite 335 1.99
## 12 Ice Matcha Espresso 386 2.30
## 13 Ice Matcha Latte 339 2.02
## 14 Ice Red Velvet Latte 358 2.13