In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:
middle_value <- function(x){
n <- length(x)
if(n%%2==0){
letak <- n%/%2
middle_value <- round((sort(x)[letak]+sort(x)[letak+1])/2,digit=2)}
else if(n%%2==1){
letak <- (n+1)%/%2
middle_value <- round((sort(x)[letak]),digit=2)}
return(middle_value)
} # Use this if you're looking for median (univariate)most_frequent <- function(x){
u <- unique(x)
tab <- tabulate(match(x,u))
u[tab==max(tab)]
} # Use this if you're looking for mode (univariate)max_value <- function(x){
s <- sort(x)
tail(s,n=1)
} # Use this if you're looking for maximum value (univariate)min_value <- function(x){
s <- sort(x)
head(s,n=1)
} # Use this if you're looking for minimum value (univariate)var.s <- function(x){
n <- length(x)
round(sum((x-(sum(x)/n))^2)/(n-1),digit=2)
} # Use this if you're looking for variance sample (univariate)stdev.s <- function(x){
n <- length(x)
round(sqrt(sum((x-(sum(x)/n))^2)/(n-1)),digit=2)
} # Use this if you're looking for standard deviation sample (univariate)Outlier <- function(x){
Q1 <- quantile(x)[2]
Q3 <- quantile(x)[4]
IQR <- Q3 - Q1
upper_bound <- (IQR * 1.5) + Q3
lower_bound <- Q1 - (IQR * 1.5)
result <- which(x < lower_bound | x > upper_bound)
x[head(result)]
} # Use this if you're looking for Outlier (univariate)statistics <- function(x){
average <- round((sum(x)/length(x)),digit = 2)
n <- length(x)
if(n%%2==0){
letak <- n%/%2
middle_value <- (sort(x)[letak]+sort(x)[letak+1])/2}
else if(n%%2==1){
letak <- (n+1)%/%2
middle_value <- (sort(x)[letak])}
u <- unique(x)
tab <- tabulate(match(x,u))
most_frequent <- u[tab==max(tab)]
s <- sort(x)
max_value <- tail(s,n=1)
min_value <- head(s,n=1)
variance_sample <- round(sum((x-(sum(x)/n-1))^2)/(n-1), digit = 2)
stdev_sample <- round(sqrt(sum((x-(sum(x)/n-1))^2)/(n-1)), digit = 2)
Q1 <- quantile(x)[2]
Q3 <- quantile(x)[4]
IQR <- Q3 - Q1
upper_bound <- (IQR * 1.5) + Q3
lower_bound <- Q1 - (IQR * 1.5)
result <- which(x < lower_bound | x > upper_bound)
Outlier <- x[head(result)]
return(c(average=average,
median=middle_value,
mode=most_frequent,
min=min_value,
max=max_value,
var.s=variance_sample,
stdev.s=stdev_sample,
Outlier=Outlier))
}## A
## 11 12 13 14 15 17 18 19 22 23 25 27 28 45
## 1 1 1 2 2 1 1 1 2 1 4 1 1 1
## [1] 20.75
## [1] 20.5
## [1] 25
## [1] 11
## [1] 45
## [1] 61.78
## [1] 7.86
## [1] 45
## average median mode min max var.s stdev.s Outlier
## 20.75 20.50 25.00 11.00 45.00 62.83 7.93 45.00
Multivariate variable (more dimension)
average.multi <- function(x,freq){
sum(x*freq)/sum(freq)
} # Use this if you're looking for Average (multivariate)median.multi <- function(x,freq){
n <- sum(freq)
data <- sort(rep.int(x,freq))
if(n%%2==0){
letak <- n%/%2
median.multi <- round((data[letak]+data[letak+1])/2,digit=2)}
else if(n%%2==1){
letak <- (n+1)%/%2
median.multi <- round((data[letak]),digit=2)}
return(median.multi)
} # Use this if you're looking for Median (multivariate)mode.multi <- function(x,freq){
data <- sort(rep.int(x,freq))
u <- unique(data)
tab <- tabulate(match(data,u))
u[tab==max(tab)]
} # Use this if you're looking for Mode (multivariate)max.multi <- function(x,freq){
data <- sort(rep.int(x,freq))
tail(data,n=1)
} # Use this if you're looking for Maximum (multivariate)min.multi <- function(x,freq){
data <- sort(rep.int(x,freq))
head(data,n=1)
} # Use this if you're looking for Minimum (multivariate)var.multi.s <- function(x,freq){
n <- sum(freq)
data <- sort(rep.int(x,freq))
round(sum((data-(sum(data)/n))^2)/(n-1),digit=2)
} # Use this if you're looking for Variance (multivariate)stdev.multi.s <- function(x,freq){
n <- length(x)
data <- sort(rep.int(x,freq))
round(sqrt(sum((data-(sum(data)/n))^2)/(n-1)),digit=2)
} # Use this if you're looking for Standard Deviation (multivariate)Outlier.multi <- function(x,freq){
data <- sort(rep.int(x,freq))
Q1 <- quantile(data)[2]
Q3 <- quantile(data)[4]
IQR <- Q3 - Q1
upper_bound <- (IQR * 1.5) + Q3
lower_bound <- Q1 - (IQR * 1.5)
result <- which(data < lower_bound | data > upper_bound)
data[head(result)]
} # Use this if you're looking for Outlier (multivariate)statistics.multi <- function(x,freq){
average.multi <- round((sum(x*freq)/sum(freq)),digit = 2)
n <- sum(freq)
data <- sort(rep.int(x,freq))
if(n%%2==0){
letak <- n%/%2
median.multi <- (data[letak]+data[letak+1])/2}
else if(n%%2==1){
letak <- (n+1)%/%2
median.multi <- (data[letak])}
u <- unique(data)
tab <- tabulate(match(data,u))
mode.multi <- u[tab==max(tab)]
max.multi <- tail(data,n=1)
min.multi <- head(data,n=1)
var.multi.s <- round((sum((data-(sum(data)/n))^2)/(n-1)),digit = 2)
stdev.multi.s <- round((sqrt(sum((data-(sum(data)/n))^2)/(n-1))),digit =2)
Q1 <- quantile(data)[2]
Q3 <- quantile(data)[4]
IQR <- Q3 - Q1
upper_bound <- (IQR * 1.5) + Q3
lower_bound <- Q1 - (IQR * 1.5)
result <- which(data < lower_bound | data > upper_bound)
Outlier.multi <- data[head(result)]
return(c(average=average.multi,
median=median.multi,
mode=mode.multi,
min=min.multi,
max=max.multi,
var.s=var.multi.s,
stdev.s=stdev.multi.s,
Outlier=Outlier.multi))
}## B Fi
## 1 36 3
## 2 23 1
## 3 37 2
## 4 33 6
## 5 32 3
## 6 34 2
## 7 39 5
## 8 31 7
## 9 53 1
## [1] "The average is 34.4"
## [1] "The median is 33"
## [1] "The mode is 31"
## [1] "The minimum value is 23"
## [1] "The maximum value is 53"
## [1] "The variance sample is 24.59"
## [1] "The standard deviation sample is 155.72"
## [1] "The outlier is 53"
## average median mode min max var.s stdev.s Outlier
## 34.40 33.00 31.00 23.00 53.00 24.59 4.96 53.00
Id <- (1:5000)
Date <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)
Name <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
"Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
5000, replace = T)
City <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))
Outlet <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)
Menu <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
"Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
"Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price <- sample(18000:45000,14, replace = T)
DFPrice <- data.frame(Menu, Price)
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Joining, by = "Menu"
## Id Date Name City Outlet Menu Price
## 1 1 2018-01-01 Irene Bekasi Outlet 3 Cappucino 31666
## 2 2 2018-01-02 Kefas Bogor Outlet 5 Hot Caramel Latte 27773
## 3 3 2018-01-03 Angel Bekasi Outlet 1 Ice Red Velvet Latte 28320
## 4 4 2018-01-04 Fallen Depok Outlet 3 Ice Matcha Espresso 30622
## 5 5 2018-01-05 Calisha Tangerang Outlet 2 Ice Matcha Latte 26485
Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:
library(dplyr)
Percentage <- function(value,Data){
n <- sum(value)
pembulatan <- round((value*100)/n, digits = 2)
Percentage <- paste(pembulatan, sep = "", "%")
result <- data.frame(Data,Percentage)
return(result)
}
sales_city <- aggregate(KopiKenangan$Price,
list(City=KopiKenangan$City),
FUN = sum)
Percentage(sales_city$x,sales_city$City)## Data Percentage
## 1 Bekasi 19.87%
## 2 Bogor 20.06%
## 3 Depok 20.07%
## 4 Jakarta 19.91%
## 5 Tangerang 20.09%
## y
## x Angel Ardifo Calisha Eric Fallen Felisha Irene Jacob
## Cappucino 27 18 23 26 23 13 23 20
## Es Kopi Susu 17 19 13 17 15 18 12 13
## Hot Caramel Latte 21 22 16 20 20 17 10 13
## Hot Chocolate 14 21 13 15 16 29 20 19
## Hot Red Velvet Latte 15 15 15 14 16 17 23 13
## Ice Americano 20 21 14 21 14 16 15 17
## Ice Berry Coffee 20 17 21 11 17 11 15 21
## Ice Cafe Latte 13 18 9 20 17 14 13 17
## Ice Caramel Latte 20 19 23 21 17 21 20 15
## Ice Coffee Avocado 19 19 18 19 13 20 14 21
## Ice Coffee Lite 23 21 22 16 20 23 29 21
## Ice Matcha Espresso 17 22 28 15 14 21 19 18
## Ice Matcha Latte 26 19 21 13 24 18 15 20
## Ice Red Velvet Latte 23 9 11 26 28 15 17 11
## y
## x Jeffry Julian Kefas Kevin Lala Michael Naomi Nikita
## Cappucino 20 20 18 20 20 12 21 17
## Es Kopi Susu 21 20 14 18 19 20 16 21
## Hot Caramel Latte 13 21 19 9 21 9 9 15
## Hot Chocolate 19 27 13 19 18 14 10 15
## Hot Red Velvet Latte 18 22 21 16 21 14 17 17
## Ice Americano 14 12 20 16 16 18 14 12
## Ice Berry Coffee 23 20 12 17 11 16 27 22
## Ice Cafe Latte 31 22 22 21 16 19 28 16
## Ice Caramel Latte 14 17 21 16 16 12 19 20
## Ice Coffee Avocado 11 20 18 24 16 29 18 13
## Ice Coffee Lite 19 23 15 17 13 17 11 15
## Ice Matcha Espresso 25 9 21 14 15 20 24 12
## Ice Matcha Latte 21 13 22 12 22 12 17 15
## Ice Red Velvet Latte 15 15 13 12 19 12 17 17
## y
## x Patricia Sherly Siana Vanessa
## Cappucino 20 20 17 20
## Es Kopi Susu 25 19 16 15
## Hot Caramel Latte 15 21 24 17
## Hot Chocolate 25 18 24 19
## Hot Red Velvet Latte 14 20 19 16
## Ice Americano 22 12 18 11
## Ice Berry Coffee 22 19 16 22
## Ice Cafe Latte 16 21 12 14
## Ice Caramel Latte 19 15 14 22
## Ice Coffee Avocado 21 11 15 24
## Ice Coffee Lite 23 20 17 22
## Ice Matcha Espresso 27 16 16 17
## Ice Matcha Latte 22 11 15 20
## Ice Red Velvet Latte 15 16 17 22
average.monthly <- function(Item){
month.year <- format(Date,"%B,%Y")
n <- length(table(month.year))
data.x <- data.frame(table(Item))
monthly.average <- round((data.x$Freq/n), digit=2)
result <- data.frame(data.x,monthly.average)
return(result)
}
average.monthly(KopiKenangan$Menu)## Item Freq monthly.average
## 1 Cappucino 398 2.41
## 2 Es Kopi Susu 348 2.11
## 3 Hot Caramel Latte 332 2.01
## 4 Hot Chocolate 368 2.23
## 5 Hot Red Velvet Latte 343 2.08
## 6 Ice Americano 323 1.96
## 7 Ice Berry Coffee 360 2.18
## 8 Ice Cafe Latte 359 2.18
## 9 Ice Caramel Latte 361 2.19
## 10 Ice Coffee Avocado 363 2.20
## 11 Ice Coffee Lite 387 2.35
## 12 Ice Matcha Espresso 370 2.24
## 13 Ice Matcha Latte 358 2.17
## 14 Ice Red Velvet Latte 330 2.00