library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
mtcars <- datasets::mtcars
data("mtcars")
mtcars
##                      mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
dim(mtcars)
## [1] 32 11
colnames(mtcars)
##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"
sum(is.na(mtcars))
## [1] 0
hist(mtcars$mpg, main = "distribusi MPG", xlab="MPG", col="blue")

boxplot(mtcars$hp,main = "Distribusi Horse Power", ylab ="Horsepower", col="green")

plot(mtcars$mpg, mtcars$hp, main = "Scatterplot MPG vs HOrsepower", xlab="MPG", ylab = "horsepower", col="cyan")

cor(mtcars$mpg,mtcars$hp)
## [1] -0.7761684
heatmap(cor(mtcars), main = "Heatmap Korelasi",col=colorRampPalette(c("blue","white","red" ))(100))

pairs(mtcars[,1:4], main= "Pair Plot")

plot(mtcars$mpg, mtcars$hp,col =mtcars$cyl,pch =19,main = "MPG vs Horsepower by Cylinder", xlab="MPG",ylab= "Horsepower")
legend("topright", legend = unique(mtcars$cyl),col = unique(mtcars$cyl),pch = 19)

boxplot(mpg~am,data = mtcars,main="MPG by Transmission",xlab = "Transmission (0= Automatic,1=Manual",ylab = "MPG",col=c("blue","green"))

data("mtcars")
mtcars_selected <- mtcars[, c("mpg","cyl","disp","hp","hp","wt","qsec")]
mtcars_scaled <- scale(mtcars_selected)
wcss<- vector()
for (i in 1:10) {
  kmeans_model <- kmeans(mtcars_scaled,centers = i,nstart = 25)
wcss[i] <- kmeans_model$tot.withinss
}
plot(1:10,wcss,type = "b",main = "Elbow Method", xlab = "Numbers of Clusters (K)",ylab = "WCSS", col = "blue")

set.seed(123)
kmeans_result <- kmeans(mtcars_scaled,centers = 3, nstart = 25)
mtcars$cluster <- as.factor(kmeans_result$cluster)
kmeans_result$cluster
##           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
##                   3                   3                   2                   3 
##   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
##                   1                   3                   1                   2 
##            Merc 230            Merc 280           Merc 280C          Merc 450SE 
##                   2                   3                   3                   1 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
##                   1                   1                   1                   1 
##   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
##                   1                   2                   2                   2 
##       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
##                   2                   1                   1                   1 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
##                   1                   2                   2                   2 
##      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
##                   1                   3                   1                   2
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
 fviz_cluster(kmeans_result, data = mtcars_scaled, 
palette = "jco", 
geom = "point",
 ellipse.type = "convex",
 ggtheme = theme_minimal())

 pca_result <- prcomp(mtcars_scaled, scale = TRUE)
 pca_df <- as.data.frame(pca_result$x)
 pca_df$cluster <- as.factor(kmeans_result$cluster)
 ggplot(pca_df, aes(x = PC1, y = PC2, color = cluster)) +
 geom_point(size = 3) +
 ggtitle("Clustering of mtcars Data (K-Means)") +
 theme_minimal()

 mtcars$cluster <- as.factor(kmeans_result$cluster)
 mtcars$cluster <- as.factor(kmeans_result$cluster)
mtcars%>%
 group_by(cluster) %>%
 summarise(across(everything(), list(mean = mean, sd = sd)))
## # A tibble: 3 × 23
##   cluster mpg_mean mpg_sd cyl_mean cyl_sd disp_mean disp_sd hp_mean hp_sd
##   <fct>      <dbl>  <dbl>    <dbl>  <dbl>     <dbl>   <dbl>   <dbl> <dbl>
## 1 1           15.1   2.56        8      0      353.    67.8   209.   51.0
## 2 2           26.7   4.51        4      0      105.    26.9    82.6  20.9
## 3 3           19.7   1.45        6      0      183.    41.6   122.   24.3
## # ℹ 14 more variables: drat_mean <dbl>, drat_sd <dbl>, wt_mean <dbl>,
## #   wt_sd <dbl>, qsec_mean <dbl>, qsec_sd <dbl>, vs_mean <dbl>, vs_sd <dbl>,
## #   am_mean <dbl>, am_sd <dbl>, gear_mean <dbl>, gear_sd <dbl>,
## #   carb_mean <dbl>, carb_sd <dbl>