In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:
## [1] 10.5
x<- c(2,3,4,5,6,3,2,3,4,5,9,80)
middle_value <- function(x)
{
sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
mid <- sorted[c(floor(n/2),floor(n/2)+1)]
med <- sum(mid)/2
}else
{med<-sorted[ceiling(n/2)]
}
return(med)
}
middle_value(x)## [1] 4
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
Most_frequent <- function(x){
y <- data.frame(table(x))
y[y$Freq == max(y$Freq),1]
}
Most_frequent(x)## [1] 3
## Levels: 2 3 4 5 6 9 80
max_value<-function(x)
{
sorted<-sort(x)
tail(sorted,1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
max_value(x)## [1] 80
min_value<-function(x)
{
sorted<-sort(x)
head(sorted,1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
min_value(x)## [1] 2
variance_sample<-function(x)
{
n<-length(x)
(sum((x-mean(x))^2))/(n-1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
variance_sample(x)## [1] 482.8182
population variance
variance_population<-function(x)
{
n<-length(x)
sum((x-mean(x))^2)/n
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
variance_population(x)## [1] 442.5833
standard_deviation_sample<-function(x)
{
n<-length(x)
sqrt((sum((x-mean(x))^2))/(n-1))
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
standard_deviation_sample(x)## [1] 21.97312
standard_deviation population
standard_deviation_population<-function(x)
{
n<-length(x)
sqrt((sum((x-mean(x))^2))/(n))
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
standard_deviation_population(x)## [1] 21.03766
z<-c(2,3,4,5,6,3,2,3,4,5,80)
Outliers<-function(x){
sorted<-sort(x)
Q.1<-quantile(x,0.25)
Q.3<-quantile(x,0.75)
IQR<-Q.3-Q.1
Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
Outlier<-x[x<Gate[1]|x>Gate[2]]
result<-paste("Outlier",sep = " = ", Outlier)
return(result)
}
Outliers(z)## [1] "Outlier = 80"
x<-c(2,3,4,5,6,3,2,3,4,5,80)
summary<-function(x)
{
n<-length(x)
average <-(sum(x)/n)
middle_value <-ifelse(n%%2==0, middle_value<-((sort(x))[n%%2]+(sort(x))[(n%%2)+1]/2),ifelse(n%%2==1, middle_value<-((sort(x))[n%%2])))
most_frequent <- unique(x)[which.max(tabulate(match(x,unique(x))))]
max <- tail(sort(x),1)
min <- head(sort(x),1)
variance_s <- sum((x-mean(x))^2)/(n-1)
variance_p <- sum((x-mean(x))^2)/n
standar.dev_s <- sqrt(sum((x-mean(x))^2)/(n-1))
standar.dev_p <- sqrt(sum((x-mean(x))^2)/n)
Outliers <- {
sorted<-sort(x)
Q.1<-quantile(x,0.25)
Q.3<-quantile(x,0.75)
IQR<-Q.3-Q.1
Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
result<-(which(x<Gate[1]|x>Gate[2]))
x[head(result)]
}
return(c(mean=average,
med=middle_value,
mode=most_frequent,
max=max,
min=min,
var.s=variance_s,
var.p=variance_p,
stdev.s=standar.dev_s,
stdev.p=standar.dev_p,
Outlier=Outliers
))}
summary(x)## mean med mode max min var.s var.p stdev.s
## 10.63636 2.00000 3.00000 80.00000 2.00000 530.85455 482.59504 23.04028
## stdev.p Outlier
## 21.96805 80.00000
Multivariate variable (more dimension)
avarage_multi<-function(x,y)
{
sum(x*y)/length(x)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
avarage_multi(x,y)## [1] 24.83333
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
middle_value <- function(x,y)
{
sorted <- sort(list3)
n <- length(sorted)
if(n %% 2 == 0)
{
mid <- sorted[c(floor(n/2),floor(n/2)+1)]
med <- sum(mid)/2
}else
{med<-sorted[ceiling(n/2)]}
return(med)
}
middle_value(x,y)## [1] 4
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
Most_frequent <- function(x,y){
y <- data.frame(table(list3))
y[y$Freq == max(y$Freq),1]
}
Most_frequent(x,y)## [1] 4
## Levels: 2 3 4 5 9 70
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
max_value<-function(x,y)
{
sorted<-sort(list3)
tail(sorted,1)
}
max_value(x,y)## [1] 70
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
min_value<-function(x,y)
{
sorted<-sort(list3)
head(sorted,1)
}
min_value(x,y)## [1] 2
variance_sample<-function(x,y)
{
n<-length(list3)
sum((list3-mean(list3))^2)/(n-1)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
variance_sample(x,y)## [1] 255.1912
variance population
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
variance_population<-function(x,y)
{
n<-length(list3)
sum((list3-mean(list3))^2)/n
}
variance_population(x,y)## [1] 240.1799
standar_deviation_sample<-function(x,y)
{
list3<-rep(x,y)
n<-sum(y)
sqrt((sum((list3-mean(list3))^2))/(n-1))
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
standar_deviation_sample(x,y)## [1] 15.9747
standard_deviation population
standar_deviation_population<-function(x,y)
{
list3<-rep(x,y)
n<-sum(y)
sqrt((sum((list3-mean(list3))^2))/n)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
standar_deviation_population(x,y)## [1] 15.49774
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
Outliers<-function(x,y){
list3<-rep(x,y)
sorted<-sort(list3)
n<-length(list3)
Q.1<-quantile(sorted,0.25)
Q.3<-quantile(sorted,0.75)
IQR<-Q.3-Q.1
Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
Outlier<-x[x<Gate[1]|x>Gate[2]]
result<-paste("Outlier",sep = " = ", Outlier)
return(result)
}
Outliers(x,y)## [1] "Outlier = 70"
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
summary<-function(x,y)
{
n<-length(list3)
average <-(sum(x*y)/n)
middle_value <-ifelse(n%%2==0, middle_value<-((sort(list3))[n%%2]+(sort(list3))[(n%%2)+1]/2),ifelse(n%%2==1, middle_value<-((sort(list3))[n%%2])))
most_frequent <- unique(list3)[which.max(tabulate(match(x,unique(list3))))]
max <- tail(sort(list3),1)
min <- head(sort(list3),1)
variance_s <- sum((list3-mean(list3))^2)/(n-1)
variance_p <- sum((list3-mean(list3))^2)/n
standar.dev_s <- sqrt(sum((list3-mean(list3))^2)/(n-1))
standar.dev_p <- sqrt(sum((list3-mean(list3))^2)/n)
Outliers <- {
sorted<-sort(list3)
Q.1<-quantile(list3,0.25)
Q.3<-quantile(list3,0.75)
IQR<-Q.3-Q.1
Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
result<-(which(x<Gate[1]|x>Gate[2]))
x[head(result)]
}
return(c(mean=average,
med=middle_value,
mode=most_frequent,
max=max,
min=min,
var.s=variance_s,
var.p=variance_p,
stdev.s=standar.dev_s,
stdev.p=standar.dev_p,
Outlier=Outliers
))}
summary(x,y)## mean med mode max min var.s var.p
## 8.764706 2.000000 2.000000 70.000000 2.000000 255.191176 240.179931
## stdev.s stdev.p Outlier
## 15.974704 15.497740 70.000000
Id <- (1:5000)
Date <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)
Name <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
"Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
5000, replace = T)
City <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))
Outlet <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)
Menu <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
"Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
"Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price <- sample(18000:45000,14, replace = T)
DFPrice <- data.frame(Menu, Price)
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Joining, by = "Menu"
## Id Date Name City Outlet Menu Price
## 1 1 2018-01-01 Vanessa Bogor Outlet 4 Hot Red Velvet Latte 20218
## 2 2 2018-01-02 Nikita Depok Outlet 4 Hot Chocolate 29327
## 3 3 2018-01-03 Kefas Depok Outlet 3 Ice Caramel Latte 34588
## 4 4 2018-01-04 Julian Tangerang Outlet 2 Ice Americano 21643
## 5 5 2018-01-05 Eric Tangerang Outlet 2 Hot Chocolate 29327
Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:
Percentage <- function(x){
percent <- round(x*100, 1)
result <- paste(percent, sep = "", "%")
return(result)
}
City.Sales <- aggregate(Price ~ City, data = KopiKenangan, sum)
Total.Sales <- sum(City.Sales$Price)
City.Sales$Percentage.City.Sales <- Percentage(City.Sales$Price/Total.Sales)
City.Sales## City Price Percentage.City.Sales
## 1 Bekasi 28287234 19.9%
## 2 Bogor 28410512 19.9%
## 3 Depok 28622952 20.1%
## 4 Jakarta 28476626 20%
## 5 Tangerang 28624363 20.1%
## Var1 Freq
## 1 Angel 271
## 2 Ardifo 232
## 3 Calisha 261
## 4 Eric 257
## 5 Fallen 248
## 6 Felisha 250
## 7 Irene 255
## 8 Jacob 234
## 9 Jeffry 227
## 10 Julian 256
## 11 Kefas 253
## 12 Kevin 291
## 13 Lala 226
## 14 Michael 250
## 15 Naomi 264
## 16 Nikita 280
## 17 Patricia 249
## 18 Sherly 225
## 19 Siana 230
## 20 Vanessa 241
Frequency of Menu
## Var1 Freq
## 1 Cappucino 379
## 2 Es Kopi Susu 375
## 3 Hot Caramel Latte 372
## 4 Hot Chocolate 393
## 5 Hot Red Velvet Latte 346
## 6 Ice Americano 323
## 7 Ice Berry Coffee 328
## 8 Ice Cafe Latte 383
## 9 Ice Caramel Latte 336
## 10 Ice Coffee Avocado 355
## 11 Ice Coffee Lite 385
## 12 Ice Matcha Espresso 342
## 13 Ice Matcha Latte 345
## 14 Ice Red Velvet Latte 338
##
## Cappucino Es Kopi Susu Hot Caramel Latte Hot Chocolate
## Angel 20 24 27 17
## Ardifo 10 21 20 18
## Calisha 18 18 20 23
## Eric 13 11 17 24
## Fallen 13 21 17 29
## Felisha 18 17 19 18
## Irene 24 22 27 14
## Jacob 14 18 16 19
## Jeffry 20 16 18 19
## Julian 18 21 22 29
## Kefas 18 22 12 18
## Kevin 30 20 17 27
## Lala 18 16 15 21
## Michael 18 16 16 14
## Naomi 15 14 16 18
## Nikita 24 23 20 23
## Patricia 29 16 24 18
## Sherly 25 20 9 11
## Siana 18 20 18 18
## Vanessa 16 19 22 15
##
## Hot Red Velvet Latte Ice Americano Ice Berry Coffee Ice Cafe Latte
## Angel 17 19 19 22
## Ardifo 10 17 19 16
## Calisha 11 13 20 25
## Eric 24 20 16 16
## Fallen 9 18 13 23
## Felisha 22 14 19 12
## Irene 19 14 10 20
## Jacob 22 20 15 19
## Jeffry 18 16 14 11
## Julian 15 12 19 20
## Kefas 24 10 20 19
## Kevin 18 20 20 22
## Lala 14 17 14 14
## Michael 18 15 14 21
## Naomi 23 14 15 30
## Nikita 15 23 19 27
## Patricia 13 13 15 19
## Sherly 13 16 14 18
## Siana 22 15 13 14
## Vanessa 19 17 20 15
##
## Ice Caramel Latte Ice Coffee Avocado Ice Coffee Lite
## Angel 17 14 18
## Ardifo 17 14 16
## Calisha 18 19 24
## Eric 15 17 16
## Fallen 16 14 16
## Felisha 16 22 20
## Irene 18 21 18
## Jacob 13 15 18
## Jeffry 16 18 18
## Julian 18 8 22
## Kefas 14 17 28
## Kevin 23 21 28
## Lala 15 18 20
## Michael 17 24 22
## Naomi 21 23 17
## Nikita 13 13 23
## Patricia 19 26 17
## Sherly 14 21 16
## Siana 18 22 13
## Vanessa 18 8 15
##
## Ice Matcha Espresso Ice Matcha Latte Ice Red Velvet Latte
## Angel 19 17 21
## Ardifo 24 15 15
## Calisha 14 17 21
## Eric 24 26 18
## Fallen 29 17 13
## Felisha 21 15 17
## Irene 18 14 16
## Jacob 15 17 13
## Jeffry 13 13 17
## Julian 12 20 20
## Kefas 14 20 17
## Kevin 13 17 15
## Lala 18 12 14
## Michael 17 18 20
## Naomi 22 18 18
## Nikita 11 20 26
## Patricia 14 15 11
## Sherly 13 21 14
## Siana 10 16 13
## Vanessa 21 17 19
## -- Attaching packages ----------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v stringr 1.4.0
## v tidyr 1.1.2 v forcats 0.5.0
## v readr 1.3.1
## -- Conflicts -------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
Month_year<-KopiKenangan%>%separate(Date,c("Year", "Month","Day"),sep="-")%>%select(Year,Month)
Month_freq<-paste(Month_year$Year, sep= "-", Month_year$Month)%>%table()%>%length()
Menu_freq<-as.data.frame(table(KopiKenangan$Menu))
Menu_freq$Monthly.sales<-Menu_freq$Freq/Month_freq
Menu_freq## Var1 Freq Monthly.sales
## 1 Cappucino 379 2.296970
## 2 Es Kopi Susu 375 2.272727
## 3 Hot Caramel Latte 372 2.254545
## 4 Hot Chocolate 393 2.381818
## 5 Hot Red Velvet Latte 346 2.096970
## 6 Ice Americano 323 1.957576
## 7 Ice Berry Coffee 328 1.987879
## 8 Ice Cafe Latte 383 2.321212
## 9 Ice Caramel Latte 336 2.036364
## 10 Ice Coffee Avocado 355 2.151515
## 11 Ice Coffee Lite 385 2.333333
## 12 Ice Matcha Espresso 342 2.072727
## 13 Ice Matcha Latte 345 2.090909
## 14 Ice Red Velvet Latte 338 2.048485