In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:
## [1] 10.5
x<- c(2,3,4,5,6,3,2,3,4,5,9,80)
middle_value <- function(x)
{
sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
mid <- sorted[c(floor(n/2),floor(n/2)+1)]
med <- sum(mid)/2
}else
{med<-sorted[ceiling(n/2)]
}
return(med)
}
middle_value(x)## [1] 4
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
Most_frequent <- function(x){
y <- data.frame(table(x))
y[y$Freq == max(y$Freq),1]
}
Most_frequent(x)## [1] 3
## Levels: 2 3 4 5 6 9 80
max_value<-function(x)
{
sorted<-sort(x)
tail(sorted,1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
max_value(x)## [1] 80
min_value<-function(x)
{
sorted<-sort(x)
head(sorted,1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
min_value(x)## [1] 2
variance_sample<-function(x)
{
n<-length(x)
(sum((x-mean(x))^2))/(n-1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
variance_sample(x)## [1] 482.8182
population variance
variance_population<-function(x)
{
n<-length(x)
sum((x-mean(x))^2)/n
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
variance_population(x)## [1] 442.5833
standard_deviation_sample<-function(x)
{
n<-length(x)
sqrt((sum((x-mean(x))^2))/(n-1))
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
standard_deviation_sample(x)## [1] 21.97312
standard_deviation population
standard_deviation_population<-function(x)
{
n<-length(x)
sqrt((sum((x-mean(x))^2))/(n))
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
standard_deviation_population(x)## [1] 21.03766
z<-c(2,3,4,5,6,3,2,3,4,5,80)
Outliers<-function(x){
sorted<-sort(x)
Q.1<-quantile(x,0.25)
Q.3<-quantile(x,0.75)
IQR<-Q.3-Q.1
Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
Outlier<-x[x<Gate[1]|x>Gate[2]]
result<-paste("Outlier",sep = " = ", Outlier)
return(result)
}
Outliers(z)## [1] "Outlier = 80"
x<-c(2,3,4,5,6,3,2,3,4,5,80)
summary<-function(x)
{
n<-length(x)
average <-(sum(x)/n)
middle_value <-ifelse(n%%2==0, middle_value<-((sort(x))[n%%2]+(sort(x))[(n%%2)+1]/2),ifelse(n%%2==1, middle_value<-((sort(x))[n%%2])))
most_frequent <- unique(x)[which.max(tabulate(match(x,unique(x))))]
max <- tail(sort(x),1)
min <- head(sort(x),1)
variance_s <- sum((x-mean(x))^2)/(n-1)
variance_p <- sum((x-mean(x))^2)/n
standar.dev_s <- sqrt(sum((x-mean(x))^2)/(n-1))
standar.dev_p <- sqrt(sum((x-mean(x))^2)/n)
Outliers <- {
sorted<-sort(x)
Q.1<-quantile(x,0.25)
Q.3<-quantile(x,0.75)
IQR<-Q.3-Q.1
Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
result<-(which(x<Gate[1]|x>Gate[2]))
x[head(result)]
}
return(c(mean=average,
med=middle_value,
mode=most_frequent,
max=max,
min=min,
var.s=variance_s,
var.p=variance_p,
stdev.s=standar.dev_s,
stdev.p=standar.dev_p,
Outlier=Outliers
))}
summary(x)## mean med mode max min var.s var.p stdev.s
## 10.63636 2.00000 3.00000 80.00000 2.00000 530.85455 482.59504 23.04028
## stdev.p Outlier
## 21.96805 80.00000
Multivariate variable (more dimension)
avarage_multi<-function(x,y)
{
sum(x*y)/length(x)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
avarage_multi(x,y)## [1] 24.83333
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
middle_value <- function(x,y)
{
sorted <- sort(list3)
n <- length(sorted)
if(n %% 2 == 0)
{
mid <- sorted[c(floor(n/2),floor(n/2)+1)]
med <- sum(mid)/2
}else
{med<-sorted[ceiling(n/2)]}
return(med)
}
middle_value(x,y)## [1] 4
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
Most_frequent <- function(x,y){
y <- data.frame(table(list3))
y[y$Freq == max(y$Freq),1]
}
Most_frequent(x,y)## [1] 4
## Levels: 2 3 4 5 9 70
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
max_value<-function(x,y)
{
sorted<-sort(list3)
tail(sorted,1)
}
max_value(x,y)## [1] 70
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
min_value<-function(x,y)
{
sorted<-sort(list3)
head(sorted,1)
}
min_value(x,y)## [1] 2
variance_sample<-function(x,y)
{
n<-length(list3)
sum((list3-mean(list3))^2)/(n-1)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
variance_sample(x,y)## [1] 255.1912
variance population
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
variance_population<-function(x,y)
{
n<-length(list3)
sum((list3-mean(list3))^2)/n
}
variance_population(x,y)## [1] 240.1799
standar_deviation_sample<-function(x,y)
{
list3<-rep(x,y)
n<-sum(y)
sqrt((sum((list3-mean(list3))^2))/(n-1))
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
standar_deviation_sample(x,y)## [1] 15.9747
standard_deviation population
standar_deviation_population<-function(x,y)
{
list3<-rep(x,y)
n<-sum(y)
sqrt((sum((list3-mean(list3))^2))/n)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
standar_deviation_population(x,y)## [1] 15.49774
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
Outliers<-function(x,y){
list3<-rep(x,y)
sorted<-sort(list3)
n<-length(list3)
Q.1<-quantile(sorted,0.25)
Q.3<-quantile(sorted,0.75)
IQR<-Q.3-Q.1
Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
Outlier<-x[x<Gate[1]|x>Gate[2]]
result<-paste("Outlier",sep = " = ", Outlier)
return(result)
}
Outliers(x,y)## [1] "Outlier = 70"
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
summary<-function(x,y)
{
n<-length(list3)
average <-(sum(x*y)/n)
middle_value <-ifelse(n%%2==0, middle_value<-((sort(list3))[n%%2]+(sort(list3))[(n%%2)+1]/2),ifelse(n%%2==1, middle_value<-((sort(list3))[n%%2])))
most_frequent <- unique(list3)[which.max(tabulate(match(x,unique(list3))))]
max <- tail(sort(list3),1)
min <- head(sort(list3),1)
variance_s <- sum((list3-mean(list3))^2)/(n-1)
variance_p <- sum((list3-mean(list3))^2)/n
standar.dev_s <- sqrt(sum((list3-mean(list3))^2)/(n-1))
standar.dev_p <- sqrt(sum((list3-mean(list3))^2)/n)
Outliers <- {
sorted<-sort(list3)
Q.1<-quantile(list3,0.25)
Q.3<-quantile(list3,0.75)
IQR<-Q.3-Q.1
Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
result<-(which(x<Gate[1]|x>Gate[2]))
x[head(result)]
}
return(c(mean=average,
med=middle_value,
mode=most_frequent,
max=max,
min=min,
var.s=variance_s,
var.p=variance_p,
stdev.s=standar.dev_s,
stdev.p=standar.dev_p,
Outlier=Outliers
))}
summary(x,y)## mean med mode max min var.s var.p
## 8.764706 2.000000 2.000000 70.000000 2.000000 255.191176 240.179931
## stdev.s stdev.p Outlier
## 15.974704 15.497740 70.000000
Id <- (1:5000)
Date <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)
Name <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
"Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
5000, replace = T)
City <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))
Outlet <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)
Menu <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
"Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
"Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price <- sample(18000:45000,14, replace = T)
DFPrice <- data.frame(Menu, Price)
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Joining, by = "Menu"
## Id Date Name City Outlet Menu Price
## 1 1 2018-01-01 Kefas Tangerang Outlet 2 Cappucino 27199
## 2 2 2018-01-02 Eric Tangerang Outlet 4 Ice Matcha Latte 34515
## 3 3 2018-01-03 Kevin Depok Outlet 1 Cappucino 27199
## 4 4 2018-01-04 Fallen Jakarta Outlet 4 Es Kopi Susu 25315
## 5 5 2018-01-05 Kevin Bekasi Outlet 5 Ice Coffee Lite 38897
Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:
Percentage <- function(x){
percent <- round(x*100, 1)
result <- paste(percent, sep = "", "%")
return(result)
}
City.Sales <- aggregate(Price ~ City, data = KopiKenangan, sum)
Total.Sales <- sum(City.Sales$Price)
City.Sales$Percentage.City.Sales <- Percentage(City.Sales$Price/Total.Sales)
City.Sales## City Price Percentage.City.Sales
## 1 Bekasi 32889066 20%
## 2 Bogor 32812928 20%
## 3 Depok 32588191 19.8%
## 4 Jakarta 33149242 20.2%
## 5 Tangerang 32749937 19.9%
## Var1 Freq
## 1 Angel 267
## 2 Ardifo 264
## 3 Calisha 259
## 4 Eric 269
## 5 Fallen 245
## 6 Felisha 244
## 7 Irene 226
## 8 Jacob 240
## 9 Jeffry 240
## 10 Julian 266
## 11 Kefas 279
## 12 Kevin 238
## 13 Lala 243
## 14 Michael 245
## 15 Naomi 257
## 16 Nikita 244
## 17 Patricia 255
## 18 Sherly 238
## 19 Siana 257
## 20 Vanessa 224
Frequency of Menu
## Var1 Freq
## 1 Cappucino 359
## 2 Es Kopi Susu 370
## 3 Hot Caramel Latte 352
## 4 Hot Chocolate 382
## 5 Hot Red Velvet Latte 353
## 6 Ice Americano 392
## 7 Ice Berry Coffee 325
## 8 Ice Cafe Latte 323
## 9 Ice Caramel Latte 343
## 10 Ice Coffee Avocado 342
## 11 Ice Coffee Lite 343
## 12 Ice Matcha Espresso 371
## 13 Ice Matcha Latte 392
## 14 Ice Red Velvet Latte 353
##
## Cappucino Es Kopi Susu Hot Caramel Latte Hot Chocolate
## Angel 34 14 17 11
## Ardifo 18 22 19 14
## Calisha 16 18 16 19
## Eric 19 20 17 14
## Fallen 17 12 19 22
## Felisha 20 25 20 24
## Irene 10 14 17 20
## Jacob 19 11 21 14
## Jeffry 18 15 15 13
## Julian 14 21 14 21
## Kefas 14 23 24 20
## Kevin 18 18 18 29
## Lala 15 17 22 20
## Michael 14 17 12 24
## Naomi 16 25 15 16
## Nikita 23 24 10 18
## Patricia 21 18 22 20
## Sherly 12 20 18 15
## Siana 22 18 23 24
## Vanessa 19 18 13 24
##
## Hot Red Velvet Latte Ice Americano Ice Berry Coffee Ice Cafe Latte
## Angel 18 23 14 21
## Ardifo 16 20 15 23
## Calisha 24 26 17 13
## Eric 19 17 13 21
## Fallen 15 27 11 14
## Felisha 13 19 13 17
## Irene 19 18 17 17
## Jacob 21 16 17 12
## Jeffry 18 20 17 20
## Julian 24 19 20 12
## Kefas 22 26 13 13
## Kevin 24 20 20 13
## Lala 19 22 18 14
## Michael 14 19 23 18
## Naomi 19 19 17 17
## Nikita 15 16 18 14
## Patricia 14 15 17 14
## Sherly 14 20 15 17
## Siana 14 19 17 19
## Vanessa 11 11 13 14
##
## Ice Caramel Latte Ice Coffee Avocado Ice Coffee Lite
## Angel 16 14 15
## Ardifo 17 21 15
## Calisha 17 14 22
## Eric 22 18 22
## Fallen 18 16 16
## Felisha 16 15 13
## Irene 17 19 14
## Jacob 17 16 23
## Jeffry 14 15 16
## Julian 18 18 26
## Kefas 21 25 18
## Kevin 13 8 13
## Lala 17 21 15
## Michael 18 10 21
## Naomi 16 22 13
## Nikita 11 20 14
## Patricia 16 32 17
## Sherly 24 17 13
## Siana 15 10 18
## Vanessa 20 11 19
##
## Ice Matcha Espresso Ice Matcha Latte Ice Red Velvet Latte
## Angel 23 29 18
## Ardifo 21 19 24
## Calisha 21 24 12
## Eric 19 29 19
## Fallen 20 17 21
## Felisha 16 16 17
## Irene 17 19 8
## Jacob 20 19 14
## Jeffry 21 18 20
## Julian 21 22 16
## Kefas 22 17 21
## Kevin 13 11 20
## Lala 14 15 14
## Michael 24 20 11
## Naomi 14 24 24
## Nikita 17 25 19
## Patricia 14 20 15
## Sherly 21 10 22
## Siana 18 21 19
## Vanessa 15 17 19
## -- Attaching packages ----------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v stringr 1.4.0
## v tidyr 1.1.2 v forcats 0.5.0
## v readr 1.3.1
## -- Conflicts -------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
Month_year<-KopiKenangan%>%separate(Date,c("Year", "Month","Day"),sep="-")%>%select(Year,Month)
Month_freq<-paste(Month_year$Year, sep= "-", Month_year$Month)%>%table()%>%length()
Menu_freq<-as.data.frame(table(KopiKenangan$Menu))
Menu_freq$Monthly.sales<-round(Menu_freq$Freq/Month_freq,2)
Menu_freq## Var1 Freq Monthly.sales
## 1 Cappucino 359 2.18
## 2 Es Kopi Susu 370 2.24
## 3 Hot Caramel Latte 352 2.13
## 4 Hot Chocolate 382 2.32
## 5 Hot Red Velvet Latte 353 2.14
## 6 Ice Americano 392 2.38
## 7 Ice Berry Coffee 325 1.97
## 8 Ice Cafe Latte 323 1.96
## 9 Ice Caramel Latte 343 2.08
## 10 Ice Coffee Avocado 342 2.07
## 11 Ice Coffee Lite 343 2.08
## 12 Ice Matcha Espresso 371 2.25
## 13 Ice Matcha Latte 392 2.38
## 14 Ice Red Velvet Latte 353 2.14