In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:
## [1] 2
middle_value_ganjil <-function(x)
{
(length(x)+1)/2 # untuk jumlah datanya ganjil
}
x <- c(1,2,3,4,5)
middle_value_ganjil(x)## [1] 3
middle_value_genap <-function(x)
{
1/2*((length(x)/2)+((length(x)+1)/2)) # untuk jumlah datanya genap
}
x <- c(1,2,3,4,5,6)
middle_value_genap(x)## [1] 3.25
most_frequent <- function(x)
{
y<-data.frame(table(x))
y[y$Freq==max(y$Freq),1]
}
x <- c(1,4,5,2,5,3,5,6,7,5)
most_frequent(x)## [1] 5
## Levels: 1 2 3 4 5 6 7
## [1] 7
## [1] 1
variance <-function(x)
{
((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
}
x <- c(1,2,3,4,5,6,7)
variance(x)## [1] 196
standard_deviation <-function(x)
{
sqrt(((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1))
}
x <- c(1,2,3,4,5,6,7)
standard_deviation(x)## [1] 14
Outliers
summary
x <- c(1,2,3,4,5,6,7)
Summary <- function(x)
{
average <- sum(x)/length(x)
middle_value_ganjil <- (length(x)+1)/2
middle_value_genap <- 1/2*((length(x)/2)+((length(x)+1)/2))
max <- max(x)
min <- min(x)
variance <- ((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
stdev <- sqrt(((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1))
result <- matrix(c(average, middle_value_ganjil, middle_value_genap, max, min, variance, stdev),
1,7,
dimnames = list("Value", c("Mean",
"Med.Ganjil",
"Med.Genap",
"Max",
"Min",
"Variance",
"StDeviasi")))
return(result)
}
Summary(x)## Mean Med.Ganjil Med.Genap Max Min Variance StDeviasi
## Value 4 4 3.75 7 1 196 14
Multivariate variable (more dimension)
average_freq <- function(x,freq)
{
sum(x*freq)/length(x)
}
x <- c(1,2,3,4,5)
freq <- c(2,4,3,6,4)
average_freq(x,freq)## [1] 12.6
K<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(K,freq)
middle_value <- function(x)
{
sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
mid <- sorted[c(floor(n/2),floor(n/2)+1)]
med <- sum(mid)/2
}
return(med)
}
middle_value(list3)## [1] 4
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
most_frequent <- function(x)
{
sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
y<-data.frame(table(x))
y[y$Freq==max(y$Freq),1]
}
return(y)
}
most_frequent(list3)## x Freq
## 1 2 2
## 2 3 3
## 3 4 5
## 4 5 2
## 5 9 4
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
max_value <- function(x)
{
sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
P<- max(x)
}
return(P)
}
max_value(list3)## [1] 9
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
min_value <- function(x)
{
sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
P<- min(x)
}
return(P)
}
min_value(list3)## [1] 2
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
variance <- function(x)
{
sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
Q<- ((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
}
return(Q)
}
variance(list3)## [1] 1583
standard_deviation
summary
M<-c(2,3,4,5,9)
x<- c(1,2,3,4,5)
freq<-c(2,3,5,2,4)
Summary <- function(x)
{
Average <- sum(x*freq)/length(x)
Median <- sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
mid <- sorted[c(floor(n/2),floor(n/2)+1)]
x <- sum(mid)/2
}
return(x)
Modus <- sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
P<- max(x)
}
return(P)
Max_Value <- sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
P<- max(x)
}
return(P)
Min_Value <- sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
P<- min(x)
}
return(P)
Variance <- sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
Q<- ((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
}
return(Q)
result <- matrix(c(Average,Median,Modus,Max_Value,Min_Value,Variance),1,6,
dimnames = list("Value", c("Average",
"Median",
"Max",
"Min",
"Variance")))
return(result)
}Id <- (1:5000)
Date <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)
Name <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
"Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
5000, replace = T)
City <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))
Outlet <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)
Menu <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
"Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
"Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price <- sample(18000:45000,14, replace = T)
DFPrice <- data.frame(Menu, Price)
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Joining, by = "Menu"
## Id Date Name City Outlet Menu Price
## 1 1 2018-01-01 Lala Tangerang Outlet 2 Hot Red Velvet Latte 34412
## 2 2 2018-01-02 Angel Bekasi Outlet 5 Ice Caramel Latte 26021
## 3 3 2018-01-03 Michael Tangerang Outlet 4 Hot Chocolate 19551
## 4 4 2018-01-04 Nikita Bekasi Outlet 2 Hot Caramel Latte 32576
## 5 5 2018-01-05 Sherly Depok Outlet 4 Ice Cafe Latte 25739
Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:
library(dplyr)
City_percentage <- data.frame(prop.table(table(KopiKenangan$City)*100))
addPercent <- function(x)
{
percent <- round(City_percentage[,2]*100,digits=1)
result <- paste(percent,sep="","%")
return(result)
}
percentage <-addPercent(x)
City_percentage$Freq <- NULL
cbind(City_percentage,percentage)## Var1 percentage
## 1 Bekasi 20%
## 2 Bogor 20%
## 3 Depok 20%
## 4 Jakarta 20%
## 5 Tangerang 20%
##
## Cappucino Es Kopi Susu Hot Caramel Latte Hot Chocolate
## Angel 16 18 13 19
## Ardifo 18 18 20 18
## Calisha 16 15 19 19
## Eric 13 14 17 18
## Fallen 13 18 11 19
## Felisha 17 19 21 28
## Irene 18 21 11 11
## Jacob 21 14 13 17
## Jeffry 18 21 22 17
## Julian 11 15 17 20
## Kefas 23 13 16 17
## Kevin 15 19 21 14
## Lala 10 21 10 21
## Michael 19 18 18 25
## Naomi 19 19 15 11
## Nikita 15 26 23 20
## Patricia 20 13 15 15
## Sherly 31 14 15 19
## Siana 18 19 20 21
## Vanessa 21 16 22 26
##
## Hot Red Velvet Latte Ice Americano Ice Berry Coffee Ice Cafe Latte
## Angel 24 14 21 17
## Ardifo 15 18 12 10
## Calisha 14 21 17 16
## Eric 10 24 22 20
## Fallen 13 15 15 16
## Felisha 19 20 19 13
## Irene 20 16 17 18
## Jacob 21 17 22 21
## Jeffry 24 15 17 14
## Julian 16 18 17 23
## Kefas 17 15 27 23
## Kevin 26 14 15 15
## Lala 17 20 26 19
## Michael 16 19 16 19
## Naomi 13 24 13 14
## Nikita 23 27 16 16
## Patricia 19 18 13 23
## Sherly 14 22 20 20
## Siana 17 24 12 26
## Vanessa 20 25 17 16
##
## Ice Caramel Latte Ice Coffee Avocado Ice Coffee Lite
## Angel 21 16 17
## Ardifo 11 18 17
## Calisha 18 24 21
## Eric 12 17 20
## Fallen 22 19 24
## Felisha 19 17 12
## Irene 17 16 27
## Jacob 19 18 9
## Jeffry 18 11 15
## Julian 17 25 20
## Kefas 16 15 11
## Kevin 20 22 15
## Lala 15 20 13
## Michael 21 17 21
## Naomi 16 18 24
## Nikita 18 17 21
## Patricia 16 21 19
## Sherly 20 23 12
## Siana 17 25 16
## Vanessa 21 13 11
##
## Ice Matcha Espresso Ice Matcha Latte Ice Red Velvet Latte
## Angel 21 16 18
## Ardifo 31 18 21
## Calisha 29 16 20
## Eric 13 25 21
## Fallen 17 17 16
## Felisha 18 13 20
## Irene 15 16 23
## Jacob 14 14 12
## Jeffry 20 15 27
## Julian 9 21 16
## Kefas 12 10 16
## Kevin 16 11 11
## Lala 18 21 17
## Michael 19 24 14
## Naomi 15 15 26
## Nikita 22 22 21
## Patricia 20 14 14
## Sherly 15 17 21
## Siana 14 16 24
## Vanessa 10 15 13
## monthly.sales
## April,2018 April,2019 April,2020 April,2021 April,2022
## 30 30 30 30 30
## April,2023 April,2024 April,2025 April,2026 April,2027
## 30 30 30 30 30
## April,2028 April,2029 April,2030 April,2031 August,2018
## 30 30 30 30 31
## August,2019 August,2020 August,2021 August,2022 August,2023
## 31 31 31 31 31
## August,2024 August,2025 August,2026 August,2027 August,2028
## 31 31 31 31 31
## August,2029 August,2030 August,2031 December,2018 December,2019
## 31 31 31 31 31
## December,2020 December,2021 December,2022 December,2023 December,2024
## 31 31 31 31 31
## December,2025 December,2026 December,2027 December,2028 December,2029
## 31 31 31 31 31
## December,2030 February,2018 February,2019 February,2020 February,2021
## 31 28 28 29 28
## February,2022 February,2023 February,2024 February,2025 February,2026
## 28 28 29 28 28
## February,2027 February,2028 February,2029 February,2030 February,2031
## 28 29 28 28 28
## January,2018 January,2019 January,2020 January,2021 January,2022
## 31 31 31 31 31
## January,2023 January,2024 January,2025 January,2026 January,2027
## 31 31 31 31 31
## January,2028 January,2029 January,2030 January,2031 July,2018
## 31 31 31 31 31
## July,2019 July,2020 July,2021 July,2022 July,2023
## 31 31 31 31 31
## July,2024 July,2025 July,2026 July,2027 July,2028
## 31 31 31 31 31
## July,2029 July,2030 July,2031 June,2018 June,2019
## 31 31 31 30 30
## June,2020 June,2021 June,2022 June,2023 June,2024
## 30 30 30 30 30
## June,2025 June,2026 June,2027 June,2028 June,2029
## 30 30 30 30 30
## June,2030 June,2031 March,2018 March,2019 March,2020
## 30 30 31 31 31
## March,2021 March,2022 March,2023 March,2024 March,2025
## 31 31 31 31 31
## March,2026 March,2027 March,2028 March,2029 March,2030
## 31 31 31 31 31
## March,2031 May,2018 May,2019 May,2020 May,2021
## 31 31 31 31 31
## May,2022 May,2023 May,2024 May,2025 May,2026
## 31 31 31 31 31
## May,2027 May,2028 May,2029 May,2030 May,2031
## 31 31 31 31 31
## November,2018 November,2019 November,2020 November,2021 November,2022
## 30 30 30 30 30
## November,2023 November,2024 November,2025 November,2026 November,2027
## 30 30 30 30 30
## November,2028 November,2029 November,2030 October,2018 October,2019
## 30 30 30 31 31
## October,2020 October,2021 October,2022 October,2023 October,2024
## 31 31 31 31 31
## October,2025 October,2026 October,2027 October,2028 October,2029
## 31 31 31 31 31
## October,2030 September,2018 September,2019 September,2020 September,2021
## 31 30 30 30 30
## September,2022 September,2023 September,2024 September,2025 September,2026
## 30 30 30 30 30
## September,2027 September,2028 September,2029 September,2030 September,2031
## 30 30 30 30 9
data.cafe <- data.frame(table(KopiKenangan$Menu))
data.cafe$monthly.average <- data.cafe$Freq/length(monthly.sales)
data.cafe## Var1 Freq monthly.average
## 1 Cappucino 352 0.0704
## 2 Es Kopi Susu 351 0.0702
## 3 Hot Caramel Latte 339 0.0678
## 4 Hot Chocolate 375 0.0750
## 5 Hot Red Velvet Latte 358 0.0716
## 6 Ice Americano 386 0.0772
## 7 Ice Berry Coffee 354 0.0708
## 8 Ice Cafe Latte 359 0.0718
## 9 Ice Caramel Latte 354 0.0708
## 10 Ice Coffee Avocado 372 0.0744
## 11 Ice Coffee Lite 345 0.0690
## 12 Ice Matcha Espresso 348 0.0696
## 13 Ice Matcha Latte 336 0.0672
## 14 Ice Red Velvet Latte 371 0.0742