This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
# Load data
data <- read.csv("C:/Users/kms57/Desktop/UTS/Sem 2/32558 Business Intelligence/ass2/cleaneddata.csv")
# Select data range
selected_data <- data[, 1:20]
# Meta analysis data
metadata <- data.frame(
Field_Name = names(selected_data),
Data_Type = sapply(selected_data, class),
Field_Size = sapply(selected_data, function(x) max(nchar(as.character(x)))),
Sample_Data = sapply(selected_data, function(x) sample(x, 1))
)
setwd("C:/Users/kms57/Desktop/UTS/Sem 2/32558 Business Intelligence/ass2")
# Write to CSV
write.csv(metadata, "cleanddata_metadataanalysis.csv", row.names = FALSE)
# Profiling data
profiling <- data.frame(
Field_Name = names(selected_data),
Distinct_Count = sapply(selected_data, function(x) length(unique(x))),
Zero_Blank_Null_Percent = sapply(selected_data, function(x) mean(x == 0 | x == "" | is.na(x))),
Sum = sapply(selected_data, function(x) if(is.numeric(x)) sum(x, na.rm = TRUE) else NA),
Min = sapply(selected_data, function(x) if(is.numeric(x)) min(x, na.rm = TRUE) else NA),
Max = sapply(selected_data, function(x) if(is.numeric(x)) max(x, na.rm = TRUE) else NA),
Mean = sapply(selected_data, function(x) if(is.numeric(x)) mean(x, na.rm = TRUE) else NA),
Std_Dev = sapply(selected_data, function(x) if(is.numeric(x)) sd(x, na.rm = TRUE) else NA)
)
setwd("C:/Users/kms57/Desktop/UTS/Sem 2/32558 Business Intelligence/ass2")
# Write to CSV
write.csv(profiling, "cleanddata_dataprofiling.csv", row.names = FALSE)