#install package yang dibutuhkan install.packages(“gsheet”) library(gsheet) install.packages(“tidyverse”) library(tidyverse)
#load data url <- “https://docs.google.com/spreadsheets/d/1osK6KmUP7JEcXYquvR1KjCehccNw9ZvG/edit?usp=drivesdk&ouid=103856525979157124221&rtpof=true&sd=true” data <- gsheet2tbl(url)
data\(CustomerCode <- as.character(data\)CustomerCode)
data\(Category <- as.factor(data\)Category) data\(Department <- as.factor(data\)Department)
data\(Date <- as.Date(data\)Date, format=“%Y-%m-%d”)
summary(data\(Quantity) summary(data\)Price)
colSums(is.na(data))
#Bar Chart for the Category Column ggplot(data, aes(x = Category)) + geom_bar(fill = “blue”) + theme_minimal() + labs(title = “Category Frequency”, x = “Category”, y = “Count”)
#data cleaning data\(Revenue[is.na(data\)Revenue)] <- mean(data\(Revenue, na.rm = TRUE) data\)Quantity[is.na(data$Quantity)] <- mean(data\(Quantity, na.rm = TRUE) data\)Price[is.na(data$Price)] <- mean(data$Price, na.rm = TRUE)
data\(Revenue <- data\)Price * data$Quantity ggplot(data %>% group_by(Department) %>% summarize(TotalRevenue = sum(Revenue), .groups = “drop”), aes(x = reorder(Department, -TotalRevenue), y = TotalRevenue)) + geom_bar(stat = “identity”, fill = “green”) + theme_minimal() + labs(title = “Revenue by Department”, x = “Department”, y = “Total Revenue”) + coord_flip()
ggplot(data, aes(y = Price)) + geom_boxplot(fill = “purple”) + theme_minimal() + labs(title = “Boxplot of Price”, y = “Price”)
ggplot(data, aes(x = Price)) + geom_histogram(fill = “purple”, bins = 30) + theme_minimal() + labs(title = “Histogram of Price”, x = “Price”, y = “Frequency”)
ggplot(data, aes(y = Quantity)) + geom_boxplot(fill = “red”) + theme_minimal() + labs(title = “Boxplot of Quantity”, y = “Quantity”)
ggplot(data, aes(x = Quantity)) + geom_histogram(fill = “red”, bins = 30) + theme_minimal() + labs(title = “Histogram of Quantity”, x = “Quantity”, y = “Frequency”)
#Essay Comparing Power BI, Alteryx, and R Power BI vs. Alteryx vs. R for Data Analysis
Power BI, Alteryx, and R each offer distinct advantages and limitations for data analysis.
Power BI is widely used for data visualization and business intelligence. It allows users to create interactive dashboards without extensive coding knowledge. One of its biggest strengths is ease of use and seamless integration with Microsoft products (e.g., Excel, Azure). However, Power BI has limitations in handling large datasets and advanced statistical modeling, which R excels at.
Alteryx is a drag-and-drop tool designed for data preparation, blending, and automation. It is powerful for non-coders who need to process and analyze data quickly. Alteryx also provides built-in machine learning models. However, its licensing cost is high, and it lacks the flexibility and community support that R offers.
R is an open-source programming language that excels in statistical computing and machine learning. It is highly customizable and offers extensive packages for data analysis, visualization, and machine learning. R is great for handling complex statistical models, but it has a steeper learning curve compared to Power BI and Alteryx. In summary, Power BI is best for visualization, Alteryx is strong for ETL (Extract, Transform, Load), and R is ideal for in-depth statistical analysis. The choice depends on the user’s technical skills, budget, and specific analysis needs.