In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:
## [1] 10.5
x<- c(2,3,4,5,6,3,2,3,4,5,9,80)
middle_value <- function(x)
{
sorted <- sort(x)
n <- length(sorted)
if(n %% 2 == 0)
{
mid <- sorted[c(floor(n/2),floor(n/2)+1)]
med <- sum(mid)/2
}else
{med<-sorted[ceiling(n/2)]
}
return(med)
}
middle_value(x)
## [1] 4
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
Most_frequent <- function(x){
y <- data.frame(table(x))
y[y$Freq == max(y$Freq),1]
}
Most_frequent(x)
## [1] 3
## Levels: 2 3 4 5 6 9 80
max_value<-function(x)
{
sorted<-sort(x)
tail(sorted,1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
max_value(x)
## [1] 80
min_value<-function(x)
{
sorted<-sort(x)
head(sorted,1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
min_value(x)
## [1] 2
variance_sample<-function(x)
{
n<-length(x)
(sum((x-mean(x))^2))/(n-1)
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
variance_sample(x)
## [1] 482.8182
population variance
variance_population<-function(x)
{
n<-length(x)
sum((x-mean(x))^2)/n
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
variance_population(x)
## [1] 442.5833
standard_deviation_sample<-function(x)
{
n<-length(x)
sqrt((sum((x-mean(x))^2))/(n-1))
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
standard_deviation_sample(x)
## [1] 21.97312
standard_deviation population
standard_deviation_population<-function(x)
{
n<-length(x)
sqrt((sum((x-mean(x))^2))/(n))
}
x<-c(2,3,4,5,6,3,2,3,4,5,9,80)
standard_deviation_population(x)
## [1] 21.03766
z<-c(2,3,4,5,6,3,2,3,4,5,80)
Outliers<-function(x){
sorted<-sort(x)
Q.1<-quantile(x,0.25)
Q.3<-quantile(x,0.75)
IQR<-Q.3-Q.1
Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
Outlier<-x[x<Gate[1]|x>Gate[2]]
result<-paste("Outlier",sep = " = ", Outlier)
return(result)
}
Outliers(z)
## [1] "Outlier = 80"
x<-c(2,3,4,5,6,3,2,3,4,5,80)
summary<-function(x)
{
n<-length(x)
average <-(sum(x)/n)
middle_value <-ifelse(n%%2==0, middle_value<-((sort(x))[n%%2]+(sort(x))[(n%%2)+1]/2),ifelse(n%%2==1, middle_value<-((sort(x))[n%%2])))
most_frequent <- unique(x)[which.max(tabulate(match(x,unique(x))))]
max <- tail(sort(x),1)
min <- head(sort(x),1)
variance_s <- sum((x-mean(x))^2)/(n-1)
variance_p <- sum((x-mean(x))^2)/n
standar.dev_s <- sqrt(sum((x-mean(x))^2)/(n-1))
standar.dev_p <- sqrt(sum((x-mean(x))^2)/n)
Outliers <- {
sorted<-sort(x)
Q.1<-quantile(x,0.25)
Q.3<-quantile(x,0.75)
IQR<-Q.3-Q.1
Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
result<-(which(x<Gate[1]|x>Gate[2]))
x[head(result)]
}
return(c(mean=average,
med=middle_value,
mode=most_frequent,
max=max,
min=min,
var.s=variance_s,
var.p=variance_p,
stdev.s=standar.dev_s,
stdev.p=standar.dev_p,
Outlier=Outliers
))}
summary(x)
## mean med mode max min var.s var.p stdev.s
## 10.63636 2.00000 3.00000 80.00000 2.00000 530.85455 482.59504 23.04028
## stdev.p Outlier
## 21.96805 80.00000
Multivariate variable (more dimension)
avarage_multi<-function(x,y)
{
sum(x*y)/length(x)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
avarage_multi(x,y)
## [1] 24.83333
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
middle_value <- function(x,y)
{
sorted <- sort(list3)
n <- length(sorted)
if(n %% 2 == 0)
{
mid <- sorted[c(floor(n/2),floor(n/2)+1)]
med <- sum(mid)/2
}else
{med<-sorted[ceiling(n/2)]}
return(med)
}
middle_value(x,y)
## [1] 4
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
Most_frequent <- function(x,y){
y <- data.frame(table(list3))
y[y$Freq == max(y$Freq),1]
}
Most_frequent(x,y)
## [1] 4
## Levels: 2 3 4 5 9 70
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
max_value<-function(x,y)
{
sorted<-sort(list3)
tail(sorted,1)
}
max_value(x,y)
## [1] 70
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
min_value<-function(x,y)
{
sorted<-sort(list3)
head(sorted,1)
}
min_value(x,y)
## [1] 2
variance_sample<-function(x,y)
{
n<-length(list3)
sum((list3-mean(list3))^2)/(n-1)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
variance_sample(x,y)
## [1] 255.1912
variance population
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
variance_population<-function(x,y)
{
n<-length(list3)
sum((list3-mean(list3))^2)/n
}
variance_population(x,y)
## [1] 240.1799
standar_deviation_sample<-function(x,y)
{
list3<-rep(x,y)
n<-sum(y)
sqrt((sum((list3-mean(list3))^2))/(n-1))
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
standar_deviation_sample(x,y)
## [1] 15.9747
standard_deviation population
standar_deviation_population<-function(x,y)
{
list3<-rep(x,y)
n<-sum(y)
sqrt((sum((list3-mean(list3))^2))/n)
}
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
standar_deviation_population(x,y)
## [1] 15.49774
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
Outliers<-function(x,y){
list3<-rep(x,y)
sorted<-sort(list3)
n<-length(list3)
Q.1<-quantile(sorted,0.25)
Q.3<-quantile(sorted,0.75)
IQR<-Q.3-Q.1
Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
Outlier<-x[x<Gate[1]|x>Gate[2]]
result<-paste("Outlier",sep = " = ", Outlier)
return(result)
}
Outliers(x,y)
## [1] "Outlier = 70"
x<-c(2,3,4,5,9,70)
y<-c(2,3,5,2,4,1)
list3<-rep(x,y)
summary<-function(x,y)
{
n<-length(list3)
average <-(sum(x*y)/n)
middle_value <-ifelse(n%%2==0, middle_value<-((sort(list3))[n%%2]+(sort(list3))[(n%%2)+1]/2),ifelse(n%%2==1, middle_value<-((sort(list3))[n%%2])))
most_frequent <- unique(list3)[which.max(tabulate(match(x,unique(list3))))]
max <- tail(sort(list3),1)
min <- head(sort(list3),1)
variance_s <- sum((list3-mean(list3))^2)/(n-1)
variance_p <- sum((list3-mean(list3))^2)/n
standar.dev_s <- sqrt(sum((list3-mean(list3))^2)/(n-1))
standar.dev_p <- sqrt(sum((list3-mean(list3))^2)/n)
Outliers <- {
sorted<-sort(list3)
Q.1<-quantile(list3,0.25)
Q.3<-quantile(list3,0.75)
IQR<-Q.3-Q.1
Gate<-c(Q.1-IQR*3,Q.3+IQR*3)
result<-(which(x<Gate[1]|x>Gate[2]))
x[head(result)]
}
return(c(mean=average,
med=middle_value,
mode=most_frequent,
max=max,
min=min,
var.s=variance_s,
var.p=variance_p,
stdev.s=standar.dev_s,
stdev.p=standar.dev_p,
Outlier=Outliers
))}
summary(x,y)
## mean med mode max min var.s var.p
## 8.764706 2.000000 2.000000 70.000000 2.000000 255.191176 240.179931
## stdev.s stdev.p Outlier
## 15.974704 15.497740 70.000000
Id <- (1:5000)
Date <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)
Name <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
"Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
5000, replace = T)
City <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))
Outlet <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)
Menu <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
"Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
"Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price <- sample(18000:45000,14, replace = T)
DFPrice <- data.frame(Menu, Price)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Joining, by = "Menu"
## Id Date Name City Outlet Menu Price
## 1 1 2018-01-01 Julian Depok Outlet 2 Ice Matcha Latte 26084
## 2 2 2018-01-02 Naomi Tangerang Outlet 1 Ice Red Velvet Latte 23818
## 3 3 2018-01-03 Sherly Bogor Outlet 5 Es Kopi Susu 38275
## 4 4 2018-01-04 Kefas Jakarta Outlet 1 Ice Coffee Lite 21213
## 5 5 2018-01-05 Irene Tangerang Outlet 2 Ice Red Velvet Latte 23818
Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:
Percentage <- function(x){
percent <- round(x*100, 1)
result <- paste(percent, sep = "", "%")
return(result)
}
City.Sales <- aggregate(Price ~ City, data = KopiKenangan, sum)
Total.Sales <- sum(City.Sales$Price)
City.Sales$Percentage.City.Sales <- Percentage(City.Sales$Price/Total.Sales)
City.Sales
## City Price Percentage.City.Sales
## 1 Bekasi 30822771 20.2%
## 2 Bogor 30044515 19.7%
## 3 Depok 30540690 20.1%
## 4 Jakarta 30593100 20.1%
## 5 Tangerang 30256179 19.9%
## Var1 Freq
## 1 Angel 248
## 2 Ardifo 241
## 3 Calisha 243
## 4 Eric 241
## 5 Fallen 225
## 6 Felisha 249
## 7 Irene 257
## 8 Jacob 254
## 9 Jeffry 266
## 10 Julian 255
## 11 Kefas 255
## 12 Kevin 248
## 13 Lala 257
## 14 Michael 252
## 15 Naomi 250
## 16 Nikita 251
## 17 Patricia 248
## 18 Sherly 250
## 19 Siana 246
## 20 Vanessa 264
Frequency of Menu
## Var1 Freq
## 1 Cappucino 339
## 2 Es Kopi Susu 364
## 3 Hot Caramel Latte 353
## 4 Hot Chocolate 361
## 5 Hot Red Velvet Latte 385
## 6 Ice Americano 357
## 7 Ice Berry Coffee 346
## 8 Ice Cafe Latte 370
## 9 Ice Caramel Latte 351
## 10 Ice Coffee Avocado 339
## 11 Ice Coffee Lite 371
## 12 Ice Matcha Espresso 337
## 13 Ice Matcha Latte 360
## 14 Ice Red Velvet Latte 367
##
## Cappucino Es Kopi Susu Hot Caramel Latte Hot Chocolate
## Angel 16 13 18 24
## Ardifo 24 11 20 21
## Calisha 19 19 22 17
## Eric 13 15 10 17
## Fallen 13 14 17 17
## Felisha 17 20 20 23
## Irene 16 17 14 15
## Jacob 14 18 21 18
## Jeffry 19 18 19 20
## Julian 25 29 20 16
## Kefas 17 21 20 16
## Kevin 16 16 21 20
## Lala 22 15 19 12
## Michael 20 20 11 18
## Naomi 13 22 16 15
## Nikita 19 21 21 17
## Patricia 12 13 13 8
## Sherly 13 17 20 20
## Siana 11 23 12 17
## Vanessa 20 22 19 30
##
## Hot Red Velvet Latte Ice Americano Ice Berry Coffee Ice Cafe Latte
## Angel 19 20 20 19
## Ardifo 20 18 22 20
## Calisha 17 12 17 12
## Eric 26 18 11 19
## Fallen 17 17 14 9
## Felisha 17 18 14 16
## Irene 19 24 12 20
## Jacob 21 17 21 20
## Jeffry 14 22 20 26
## Julian 15 18 17 15
## Kefas 25 23 16 12
## Kevin 22 13 14 22
## Lala 21 19 17 12
## Michael 18 21 10 17
## Naomi 24 16 22 23
## Nikita 18 16 20 18
## Patricia 16 26 28 26
## Sherly 19 11 17 24
## Siana 19 16 20 24
## Vanessa 18 12 14 16
##
## Ice Caramel Latte Ice Coffee Avocado Ice Coffee Lite
## Angel 19 17 15
## Ardifo 16 16 12
## Calisha 15 17 20
## Eric 18 20 11
## Fallen 18 13 20
## Felisha 23 9 26
## Irene 23 18 21
## Jacob 23 22 17
## Jeffry 18 17 22
## Julian 13 13 21
## Kefas 20 17 19
## Kevin 16 27 15
## Lala 12 23 23
## Michael 20 20 14
## Naomi 16 16 23
## Nikita 15 11 17
## Patricia 22 14 23
## Sherly 17 15 17
## Siana 15 16 13
## Vanessa 12 18 22
##
## Ice Matcha Espresso Ice Matcha Latte Ice Red Velvet Latte
## Angel 16 15 17
## Ardifo 13 15 13
## Calisha 19 16 21
## Eric 25 20 18
## Fallen 16 17 23
## Felisha 13 17 16
## Irene 14 20 24
## Jacob 20 13 9
## Jeffry 9 17 25
## Julian 17 21 15
## Kefas 15 18 16
## Kevin 15 19 12
## Lala 23 16 23
## Michael 15 22 26
## Naomi 14 17 13
## Nikita 24 14 20
## Patricia 15 17 15
## Sherly 17 18 25
## Siana 15 26 19
## Vanessa 22 22 17
## -- Attaching packages ----------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v stringr 1.4.0
## v tidyr 1.1.2 v forcats 0.5.0
## v readr 1.3.1
## -- Conflicts -------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
Month_year<-KopiKenangan%>%separate(Date,c("Year", "Month","Day"),sep="-")%>%select(Year,Month)
Month_freq<-paste(Month_year$Year, sep= "-", Month_year$Month)%>%table()%>%length()
Menu_freq<-as.data.frame(table(KopiKenangan$Menu))
Menu_freq$Monthly.sales<-Menu_freq$Freq/Month_freq
Menu_freq
## Var1 Freq Monthly.sales
## 1 Cappucino 339 2.054545
## 2 Es Kopi Susu 364 2.206061
## 3 Hot Caramel Latte 353 2.139394
## 4 Hot Chocolate 361 2.187879
## 5 Hot Red Velvet Latte 385 2.333333
## 6 Ice Americano 357 2.163636
## 7 Ice Berry Coffee 346 2.096970
## 8 Ice Cafe Latte 370 2.242424
## 9 Ice Caramel Latte 351 2.127273
## 10 Ice Coffee Avocado 339 2.054545
## 11 Ice Coffee Lite 371 2.248485
## 12 Ice Matcha Espresso 337 2.042424
## 13 Ice Matcha Latte 360 2.181818
## 14 Ice Red Velvet Latte 367 2.224242