#install package yang dibutuhkan install.packages(“gsheet”) library(gsheet) install.packages(“tidyverse”) library(tidyverse)

#load data url <- “https://docs.google.com/spreadsheets/d/1osK6KmUP7JEcXYquvR1KjCehccNw9ZvG/edit?usp=drivesdk&ouid=103856525979157124221&rtpof=true&sd=true” data <- gsheet2tbl(url)

Transform Data Types

Convert CustomerCode to character

data$CustomerCode <- as.character(data$CustomerCode)

Convert categorical variables to factors

data$Category <- as.factor(data$Category) data$Department <- as.factor(data$Department)

Convert date column (assuming it’s named “Date”) to Date format

data$Date <- as.Date(data$Date, format=“%Y-%m-%d”)

Summarize Quantity and Price Columns

summary(data$Quantity) summary(data$Price)

Count NA Values in Each Column

colSums(is.na(data))

#Bar Chart for the Category Column ggplot(data, aes(x = Category)) + geom_bar(fill = “blue”) + theme_minimal() + labs(title = “Category Frequency”, x = “Category”, y = “Count”)

#data cleaning data$Revenue[is.na(data$Revenue)] <- mean(data$Revenue, na.rm = TRUE) data$Quantity[is.na(data$Quantity)] <- mean(data$Quantity, na.rm = TRUE) data$Price[is.na(data$Price)] <- mean(data$Price, na.rm = TRUE)

Bar Chart for Department Revenue (Ordered by Revenue)

data$Revenue <- data$Price * data$Quantity ggplot(data %>% group_by(Department) %>% summarize(TotalRevenue = sum(Revenue), .groups = “drop”), aes(x = reorder(Department, -TotalRevenue), y = TotalRevenue)) + geom_bar(stat = “identity”, fill = “green”) + theme_minimal() + labs(title = “Revenue by Department”, x = “Department”, y = “Total Revenue”) + coord_flip()

Create Univariate Plots (Boxplot & Histogram for Price and Quantity)

Boxplot for Price

ggplot(data, aes(y = Price)) + geom_boxplot(fill = “purple”) + theme_minimal() + labs(title = “Boxplot of Price”, y = “Price”)

Histogram for Price

ggplot(data, aes(x = Price)) + geom_histogram(fill = “purple”, bins = 30) + theme_minimal() + labs(title = “Histogram of Price”, x = “Price”, y = “Frequency”)

Boxplot for Quantity

ggplot(data, aes(y = Quantity)) + geom_boxplot(fill = “red”) + theme_minimal() + labs(title = “Boxplot of Quantity”, y = “Quantity”)

Histogram for Quantity

ggplot(data, aes(x = Quantity)) + geom_histogram(fill = “red”, bins = 30) + theme_minimal() + labs(title = “Histogram of Quantity”, x = “Quantity”, y = “Frequency”)

#Essay Comparing Power BI, Alteryx, and R Power BI vs. Alteryx vs. R for Data Analysis

Power BI, Alteryx, and R each offer distinct advantages and limitations for data analysis.

Power BI is widely used for data visualization and business intelligence. It allows users to create interactive dashboards without extensive coding knowledge. One of its biggest strengths is ease of use and seamless integration with Microsoft products (e.g., Excel, Azure). However, Power BI has limitations in handling large datasets and advanced statistical modeling, which R excels at.

Alteryx is a drag-and-drop tool designed for data preparation, blending, and automation. It is powerful for non-coders who need to process and analyze data quickly. Alteryx also provides built-in machine learning models. However, its licensing cost is high, and it lacks the flexibility and community support that R offers.

R is an open-source programming language that excels in statistical computing and machine learning. It is highly customizable and offers extensive packages for data analysis, visualization, and machine learning. R is great for handling complex statistical models, but it has a steeper learning curve compared to Power BI and Alteryx. In summary, Power BI is best for visualization, Alteryx is strong for ETL (Extract, Transform, Load), and R is ideal for in-depth statistical analysis. The choice depends on the user’s technical skills, budget, and specific analysis needs.

mod4_peer_rev

Tegar

24/2/2025