In this section, you are expected to be more confident to create your own function. Here I advise you to create a function for each tasks bellow:
## [1] 1 3 5 9 9 11 35 15 17 19
## [1] 12.4
Middle_value <- function(x)
{ sorted <- sort(x)
n <- length(x)
if( n %% 2==0)
{ mid <- sorted[(n/2):(n/2+1)]
med <- sum(mid)/2 }else
{ med <- sorted[(n+1)/2] }
return(med) }
Middle_value(x)## [1] 10
Most_frequent <- function(x)
{ Data <- unique(x)
Tab <- tabulate(match(x, Data))
Data[ Tab == max(Tab)] }
Most_frequent(x)## [1] 9
## [1] 35
## [1] 1
## [1] 97.82222
Standard_deviation <- function(x)
{ n <- length(x)
sqrt((sum((x-mean(x))^2))/(n-1)) }
Standard_deviation(x)## [1] 9.890512
Outlier <- function(x)
{ Q1 <- quantile(x)[2]
Q3 <- quantile(x)[4]
Range <- Q3 - Q1
Head_line <- (Range * 1.5)+ Q3
Down_line <- Q1 - (Range * 1.5)
result <- (which(x < Down_line | x > Head_line))
x[(result)] }
Outlier(x)## [1] 35
summary (all functions) - optional
Multivariate variable (more dimension) Here an example data we are going to test, x = (1,3,25,7,9) y = (9,11,13,15,17)
## x y
## 1 1 9
## 2 3 11
## 3 25 13
## 4 7 15
## 5 9 17
## [1] 125
x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
Function <- rep(x,y)
Middle_value <- function(x,y)
{ sorted <- sort(Function)
n <- length(sorted)
if( n %% 2==0)
{ mid <- sorted[(n/2):(n/2+1)]
med <- sum(mid)/2 }else
{ med <- sorted[(n+1)/2] }
return(med) }
Middle_value(x,y)## [1] 7
x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
Function <- rep(x,y)
Most_frequent <- function(x,y)
{ Data <- unique(x,y)
Tab <- tabulate(match(Function, Data))
Data[Tab == max(Tab)] }
Most_frequent(x,y)## [1] 9
x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
Function <- rep(x,y)
Max_value <- function(x,y)
{ sorted <- sort(Function)
tail(sorted, 1) }
Max_value(x,y)## [1] 25
x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
Function <- rep(x,y)
Min_value <- function(x,y)
{ sorted <- sort(Function)
head(sorted,1) }
Min_value(x,y)## [1] 1
x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
Function <- rep(x,y)
Variance <- function(x,y)
{ n <- length(Function)
sum((Function - Average(x,y))^2)/(n-1) }
Variance(x,y)## [1] 13589.38
x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
Function <- rep(x,y)
Standard_deviation <- function(x,y)
{ n <- length(Function)
sqrt((sum((Function - Average(x,y))^2))/(n-1)) }
Standard_deviation(x,y)## [1] 116.5735
Outliers <- function(x,y)
{ Q1 <- quantile(sort(rep.int(x,y)))[2]
Q3 <- quantile(sort(rep.int(x,y)))[4]
Range <- Q3 - Q1
Head_line <- (Range*1.5) + Q3
Down_line <- Q1 - (Range*1.5)
result <- which(sort(rep.int(x,y)) < Down_line | sort(rep.int(x,y)) > Head_line)
sort(rep.int(x,y)) [head(result, 1)] }
x <- c(1,3,25,7,9)
y <- c(9,11,13,15,17)
Outliers(x,y)## [1] 25
summary (all functions) - optional
Simple Case Example
Id <- (1:5000)
Date <- seq(as.Date("2018/01/01"), by = "day", length.out = 5000)
Name <- sample(c("Angel","Sherly","Vanessa","Irene","Julian","Jeffry","Nikita","Kefas","Siana","Lala",
"Fallen","Ardifo","Kevin","Michael","Felisha","Calisha","Patricia","Naomi","Eric","Jacob"),
5000, replace = T)
City <- sample(rep(c("Jakarta","Bogor","Depok","Tangerang","Bekasi"), times = 1000))
Outlet <- sample(c("Outlet 1","Outlet 2","Outlet 3","Outlet 4","Outlet 5"),5000, replace = T)
Menu <- c("Cappucino","Es Kopi Susu","Hot Caramel Latte","Hot Chocolate","Hot Red Velvet Latte","Ice Americano",
"Ice Berry Coffee","Ice Cafe Latte","Ice Caramel Latte","Ice Coffee Avocado","Ice Coffee Lite",
"Ice Matcha Espresso","Ice Matcha Latte","Ice Red Velvet Latte")
all_menu <- sample(Menu, 5000, replace = T)
Price <- sample(18000:45000,14, replace = T)
DFPrice <- data.frame(Menu, Price)
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Joining, by = "Menu"
## Id Date Name City Outlet Menu Price
## 1 1 2018-01-01 Jeffry Jakarta Outlet 3 Ice Caramel Latte 30222
## 2 2 2018-01-02 Kevin Jakarta Outlet 2 Ice Coffee Avocado 21897
## 3 3 2018-01-03 Ardifo Bekasi Outlet 5 Ice Matcha Espresso 30220
## 4 4 2018-01-04 Sherly Bogor Outlet 5 Ice Americano 20955
## 5 5 2018-01-05 Nikita Depok Outlet 2 Hot Red Velvet Latte 33742
Let’s say, you have a data set already in your hand as you can see above. Please create a function to calculate the following tasks:
Percentage <- function(x)
{ Percent <- round(x*100, 1)
result <- paste(Percent, sep = "", "%")
return(result) }
City <- aggregate(Price ~ City, data = KopiKenangan, sum)
Sales <- sum(City$Price)
City$Percent <- Percentage(City$Price/Sales)
City## City Price Percent
## 1 Bekasi 29076291 20%
## 2 Bogor 29093671 20%
## 3 Depok 28986523 19.9%
## 4 Jakarta 29425492 20.2%
## 5 Tangerang 28755425 19.8%
## Var1 Freq
## 1 Angel 225
## 2 Ardifo 244
## 3 Calisha 248
## 4 Eric 264
## 5 Fallen 271
## 6 Felisha 254
## 7 Irene 229
## 8 Jacob 245
## 9 Jeffry 218
## 10 Julian 273
## 11 Kefas 275
## 12 Kevin 263
## 13 Lala 259
## 14 Michael 242
## 15 Naomi 248
## 16 Nikita 228
## 17 Patricia 262
## 18 Sherly 247
## 19 Siana 246
## 20 Vanessa 259
## Var1 Freq
## 1 Cappucino 380
## 2 Es Kopi Susu 365
## 3 Hot Caramel Latte 378
## 4 Hot Chocolate 333
## 5 Hot Red Velvet Latte 365
## 6 Ice Americano 354
## 7 Ice Berry Coffee 357
## 8 Ice Cafe Latte 357
## 9 Ice Caramel Latte 341
## 10 Ice Coffee Avocado 346
## 11 Ice Coffee Lite 356
## 12 Ice Matcha Espresso 369
## 13 Ice Matcha Latte 354
## 14 Ice Red Velvet Latte 345
## -- Attaching packages -------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v stringr 1.4.0
## v tidyr 1.1.2 v forcats 0.5.0
## v readr 1.3.1
## -- Conflicts ----------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Yearly <- KopiKenangan %>%
separate(Date,c("Year","Month","Day"), sep ="-") %>%
select(Year, Month)
Monthly <- paste(Yearly$Year, sep = "-", ... = Yearly$Month) %>%
table() %>%
length()
Frequency <- as.data.frame(table(KopiKenangan$Menu))
Frequency$Sales <- round(Frequency$Freq/Monthly, 2)
Frequency## Var1 Freq Sales
## 1 Cappucino 380 2.30
## 2 Es Kopi Susu 365 2.21
## 3 Hot Caramel Latte 378 2.29
## 4 Hot Chocolate 333 2.02
## 5 Hot Red Velvet Latte 365 2.21
## 6 Ice Americano 354 2.15
## 7 Ice Berry Coffee 357 2.16
## 8 Ice Cafe Latte 357 2.16
## 9 Ice Caramel Latte 341 2.07
## 10 Ice Coffee Avocado 346 2.10
## 11 Ice Coffee Lite 356 2.16
## 12 Ice Matcha Espresso 369 2.24
## 13 Ice Matcha Latte 354 2.15
## 14 Ice Red Velvet Latte 345 2.09