In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:
## [1] 2
middle_value_ganjil <-function(x)
{
(length(x)+1)/2 # untuk jumlah datanya ganjil
}
x <- c(1,2,3,4,5)
middle_value_ganjil(x)## [1] 3
middle_value_genap <-function(x)
{
1/2*((length(x)/2)+((length(x)+1)/2)) # untuk jumlah datanya genap
}
x <- c(1,2,3,4,5,6)
middle_value_genap(x)## [1] 3.25
most_frequent <- function(x)
{
y<-data.frame(table(x))
y[y$Freq==max(y$Freq),1]
}
x <- c(1,4,5,2,5,3,5,6,7,5)
most_frequent(x)## [1] 5
## Levels: 1 2 3 4 5 6 7
## [1] 7
## [1] 1
variance <-function(x)
{
((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
}
x <- c(1,2,3,4,5,6,7)
variance(x)## [1] 196
standard_deviation <-function(x)
{
sqrt(((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1))
}
x <- c(1,2,3,4,5,6,7)
standard_deviation(x)## [1] 14
Outliers
summary
x <- c(1,2,3,4,5,6,7)
Summary <- function(x)
{
average <- sum(x)/length(x)
middle_value_ganjil <- (length(x)+1)/2
middle_value_genap <- 1/2*((length(x)/2)+((length(x)+1)/2))
max <- max(x)
min <- min(x)
variance <- ((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
stdev <- sqrt(((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1))
result <- matrix(c(average, middle_value_ganjil, middle_value_genap, max, min, variance, stdev),
1,7,
dimnames = list("Value", c("Mean",
"Med.Ganjil",
"Med.Genap",
"Max",
"Min",
"Variance",
"StDeviasi")))
return(result)
}
Summary(x)## Mean Med.Ganjil Med.Genap Max Min Variance StDeviasi
## Value 4 4 3.75 7 1 196 14
Multivariate variable (more dimension)
average_freq <- function(x,freq)
{
sum(x*freq)/length(x)
}
x <- c(1,2,3,4,5)
freq <- c(2,4,3,6,4)
average_freq(x,freq)## [1] 12.6
K<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(K,freq)
middle_value <- function(x)
{
sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
mid <- sorted[c(floor(n/2),floor(n/2)+1)]
med <- sum(mid)/2
}
return(med)
}
middle_value(list3)## [1] 4
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
most_frequent <- function(x)
{
sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
y<-data.frame(table(x))
y[y$Freq==max(y$Freq),1]
}
return(y)
}
most_frequent(list3)## x Freq
## 1 2 2
## 2 3 3
## 3 4 5
## 4 5 2
## 5 9 4
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
max_value <- function(x)
{
sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
P<- max(x)
}
return(P)
}
max_value(list3)## [1] 9
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
min_value <- function(x)
{
sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
P<- min(x)
}
return(P)
}
min_value(list3)## [1] 2
M<-c(2,3,4,5,9)
freq<-c(2,3,5,2,4)
list3<-rep(M,freq)
variance <- function(x)
{
sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
Q<- ((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
}
return(Q)
}
variance(list3)## [1] 1583
standard_deviation
summary
M<-c(2,3,4,5,9)
x<- c(1,2,3,4,5)
freq<-c(2,3,5,2,4)
Summary <- function(x)
{
Average <- sum(x*freq)/length(x)
Median <- sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
mid <- sorted[c(floor(n/2),floor(n/2)+1)]
x <- sum(mid)/2
}
return(x)
Modus <- sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
P<- max(x)
}
return(P)
Max_Value <- sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
P<- max(x)
}
return(P)
Min_Value <- sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
P<- min(x)
}
return(P)
Variance <- sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
Q<- ((length(x)*sum(x^2))-(sum(x))^2)/length(x)*length(x-1)
}
return(Q)
result <- matrix(c(Average,Median,Modus,Max_Value,Min_Value,Variance),1,6,
dimnames = list("Value", c("Average",
"Median",
"Max",
"Min",
"Variance")))
return(result)
}Id <- (1:5000)
Date <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)
Name <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
"Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
5000, replace = T)
City <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))
Outlet <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)
Menu <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
"Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
"Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price <- sample(18000:45000,14, replace = T)
DFPrice <- data.frame(Menu, Price)
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Joining, by = "Menu"
## Id Date Name City Outlet Menu Price
## 1 1 2018-01-01 Michael Depok Outlet 4 Es Kopi Susu 21984
## 2 2 2018-01-02 Ardifo Tangerang Outlet 3 Ice Americano 33053
## 3 3 2018-01-03 Fallen Jakarta Outlet 1 Ice Red Velvet Latte 18154
## 4 4 2018-01-04 Nikita Tangerang Outlet 1 Ice Red Velvet Latte 18154
## 5 5 2018-01-05 Patricia Bogor Outlet 4 Ice Matcha Espresso 19723
Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:
library(dplyr)
City_percentage <- data.frame(prop.table(table(KopiKenangan$City)*100))
addPercent <- function(x)
{
percent <- round(City_percentage[,2]*100,digits=1)
result <- paste(percent,sep="","%")
return(result)
}
percentage <-addPercent(x)
City_percentage$Freq <- NULL
cbind(City_percentage,percentage)## Var1 percentage
## 1 Bekasi 20%
## 2 Bogor 20%
## 3 Depok 20%
## 4 Jakarta 20%
## 5 Tangerang 20%
##
## Cappucino Es Kopi Susu Hot Caramel Latte Hot Chocolate
## Angel 16 16 14 19
## Ardifo 18 23 15 17
## Calisha 23 19 11 17
## Eric 22 14 18 18
## Fallen 19 13 14 19
## Felisha 11 16 16 23
## Irene 17 17 23 15
## Jacob 24 19 17 17
## Jeffry 22 19 13 21
## Julian 18 13 23 19
## Kefas 17 19 12 11
## Kevin 28 24 15 23
## Lala 13 16 18 20
## Michael 20 16 21 23
## Naomi 21 14 12 18
## Nikita 14 16 15 25
## Patricia 21 19 14 13
## Sherly 13 19 15 20
## Siana 22 12 8 20
## Vanessa 17 20 11 19
##
## Hot Red Velvet Latte Ice Americano Ice Berry Coffee Ice Cafe Latte
## Angel 14 17 13 20
## Ardifo 14 14 17 13
## Calisha 23 20 18 19
## Eric 13 19 16 21
## Fallen 17 12 19 17
## Felisha 17 19 14 15
## Irene 16 22 14 26
## Jacob 16 14 16 14
## Jeffry 17 18 20 21
## Julian 20 26 14 18
## Kefas 19 15 15 10
## Kevin 14 28 15 14
## Lala 25 16 17 18
## Michael 22 14 15 18
## Naomi 15 20 23 23
## Nikita 19 17 20 18
## Patricia 21 10 20 12
## Sherly 18 19 26 18
## Siana 12 11 20 19
## Vanessa 14 17 16 10
##
## Ice Caramel Latte Ice Coffee Avocado Ice Coffee Lite
## Angel 11 19 21
## Ardifo 14 22 21
## Calisha 18 17 17
## Eric 24 21 12
## Fallen 21 14 17
## Felisha 9 22 26
## Irene 24 11 18
## Jacob 29 21 27
## Jeffry 15 17 21
## Julian 12 15 10
## Kefas 14 18 21
## Kevin 19 24 18
## Lala 21 12 27
## Michael 23 18 19
## Naomi 18 19 21
## Nikita 17 20 17
## Patricia 20 13 18
## Sherly 14 18 10
## Siana 17 21 14
## Vanessa 26 22 15
##
## Ice Matcha Espresso Ice Matcha Latte Ice Red Velvet Latte
## Angel 20 18 16
## Ardifo 17 24 22
## Calisha 17 22 16
## Eric 27 15 13
## Fallen 19 17 30
## Felisha 16 8 20
## Irene 21 22 30
## Jacob 15 17 21
## Jeffry 16 16 18
## Julian 19 14 18
## Kefas 16 19 23
## Kevin 19 22 17
## Lala 18 15 16
## Michael 16 27 23
## Naomi 18 15 19
## Nikita 17 17 18
## Patricia 23 14 12
## Sherly 24 14 20
## Siana 16 24 15
## Vanessa 22 12 17
## monthly.sales
## April,2018 April,2019 April,2020 April,2021 April,2022
## 30 30 30 30 30
## April,2023 April,2024 April,2025 April,2026 April,2027
## 30 30 30 30 30
## April,2028 April,2029 April,2030 April,2031 August,2018
## 30 30 30 30 31
## August,2019 August,2020 August,2021 August,2022 August,2023
## 31 31 31 31 31
## August,2024 August,2025 August,2026 August,2027 August,2028
## 31 31 31 31 31
## August,2029 August,2030 August,2031 December,2018 December,2019
## 31 31 31 31 31
## December,2020 December,2021 December,2022 December,2023 December,2024
## 31 31 31 31 31
## December,2025 December,2026 December,2027 December,2028 December,2029
## 31 31 31 31 31
## December,2030 February,2018 February,2019 February,2020 February,2021
## 31 28 28 29 28
## February,2022 February,2023 February,2024 February,2025 February,2026
## 28 28 29 28 28
## February,2027 February,2028 February,2029 February,2030 February,2031
## 28 29 28 28 28
## January,2018 January,2019 January,2020 January,2021 January,2022
## 31 31 31 31 31
## January,2023 January,2024 January,2025 January,2026 January,2027
## 31 31 31 31 31
## January,2028 January,2029 January,2030 January,2031 July,2018
## 31 31 31 31 31
## July,2019 July,2020 July,2021 July,2022 July,2023
## 31 31 31 31 31
## July,2024 July,2025 July,2026 July,2027 July,2028
## 31 31 31 31 31
## July,2029 July,2030 July,2031 June,2018 June,2019
## 31 31 31 30 30
## June,2020 June,2021 June,2022 June,2023 June,2024
## 30 30 30 30 30
## June,2025 June,2026 June,2027 June,2028 June,2029
## 30 30 30 30 30
## June,2030 June,2031 March,2018 March,2019 March,2020
## 30 30 31 31 31
## March,2021 March,2022 March,2023 March,2024 March,2025
## 31 31 31 31 31
## March,2026 March,2027 March,2028 March,2029 March,2030
## 31 31 31 31 31
## March,2031 May,2018 May,2019 May,2020 May,2021
## 31 31 31 31 31
## May,2022 May,2023 May,2024 May,2025 May,2026
## 31 31 31 31 31
## May,2027 May,2028 May,2029 May,2030 May,2031
## 31 31 31 31 31
## November,2018 November,2019 November,2020 November,2021 November,2022
## 30 30 30 30 30
## November,2023 November,2024 November,2025 November,2026 November,2027
## 30 30 30 30 30
## November,2028 November,2029 November,2030 October,2018 October,2019
## 30 30 30 31 31
## October,2020 October,2021 October,2022 October,2023 October,2024
## 31 31 31 31 31
## October,2025 October,2026 October,2027 October,2028 October,2029
## 31 31 31 31 31
## October,2030 September,2018 September,2019 September,2020 September,2021
## 31 30 30 30 30
## September,2022 September,2023 September,2024 September,2025 September,2026
## 30 30 30 30 30
## September,2027 September,2028 September,2029 September,2030 September,2031
## 30 30 30 30 9
data.cafe <- data.frame(table(KopiKenangan$Menu))
data.cafe$monthly.average <- data.cafe$Freq/length(monthly.sales)
data.cafe## Var1 Freq monthly.average
## 1 Cappucino 376 0.0752
## 2 Es Kopi Susu 344 0.0688
## 3 Hot Caramel Latte 305 0.0610
## 4 Hot Chocolate 377 0.0754
## 5 Hot Red Velvet Latte 346 0.0692
## 6 Ice Americano 348 0.0696
## 7 Ice Berry Coffee 348 0.0696
## 8 Ice Cafe Latte 344 0.0688
## 9 Ice Caramel Latte 366 0.0732
## 10 Ice Coffee Avocado 364 0.0728
## 11 Ice Coffee Lite 370 0.0740
## 12 Ice Matcha Espresso 376 0.0752
## 13 Ice Matcha Latte 352 0.0704
## 14 Ice Red Velvet Latte 384 0.0768