# Membuat dataframe dari data yang diberikan
data <- data.frame(
housing = c(820, 184, 921, 488, 721, 614, 301, 801, 864, 457, 781, 1029, 552, 715, 1047, 110, 382, 839, 798, 1555, 1046, 388, 875, 214, 1611, 1180, 619, 253, 861, 1449, 1746, 1865, 385, 119, 1524),
food = c(114, 74, 66, 80, 83, 55, 56, 56, 161, 103, 47, 71, 114, 71, 90, 104, 73, 942, 842, 781, 764, 655, 686, 438, 440, 1243, 684, 232, 739, 860, 746, 915, 522, 1056, 961),
goods = c(183, 6, 1686, 103, 176, 441, 357, 357, 1618, 136, 1906, 244, 583, 357, 653, 583, 230, 302, 287, 4176, 428, 153, 757, 22, 2063, 768, 99, 15, 71, 1489, 2662, 5184, 29, 261, 1739),
service = c(154, 20, 455, 115, 104, 193, 214, 214, 3535, 108, 452, 189, 304, 214, 298, 304, 147, 302, 356, 1740, 438, 233, 719, 65, 1594, 813, 204, 94, 188, 1032, 1594, 1767, 75, 344, 1410),
gender = c("female", "female", "female", "female", "female", "female", "female", "female", "female", "female", "female", "female", "female", "female", "female", "female", "female", "male", "male", "male", "male","male", "male", "male", "male", "male", "male", "male", "male", "male", "male", "male", "male", "male", "male")
)
# Menampilkan data
data
## housing food goods service gender
## 1 820 114 183 154 female
## 2 184 74 6 20 female
## 3 921 66 1686 455 female
## 4 488 80 103 115 female
## 5 721 83 176 104 female
## 6 614 55 441 193 female
## 7 301 56 357 214 female
## 8 801 56 357 214 female
## 9 864 161 1618 3535 female
## 10 457 103 136 108 female
## 11 781 47 1906 452 female
## 12 1029 71 244 189 female
## 13 552 114 583 304 female
## 14 715 71 357 214 female
## 15 1047 90 653 298 female
## 16 110 104 583 304 female
## 17 382 73 230 147 female
## 18 839 942 302 302 male
## 19 798 842 287 356 male
## 20 1555 781 4176 1740 male
## 21 1046 764 428 438 male
## 22 388 655 153 233 male
## 23 875 686 757 719 male
## 24 214 438 22 65 male
## 25 1611 440 2063 1594 male
## 26 1180 1243 768 813 male
## 27 619 684 99 204 male
## 28 253 232 15 94 male
## 29 861 739 71 188 male
## 30 1449 860 1489 1032 male
## 31 1746 746 2662 1594 male
## 32 1865 915 5184 1767 male
## 33 385 522 29 75 male
## 34 119 1056 261 344 male
## 35 1524 961 1739 1410 male
# Menghitung total pengeluaran 'housing' berdasarkan 'gender'
housing_by_gender <- tapply(data$housing, data$gender, sum)
# Membuat bar plot
barplot(housing_by_gender,
main = "Total Housing Expenditure by Gender",
xlab = "Gender",
ylab = "Total Expenditure (HKD)",
col = c("lightblue", "pink"),
ylim = c(0, max(housing_by_gender) + 500)) # menyesuaikan skala sumbu y

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.3.3
# Ex. 2.2
# Membuat dataframe dari data di tabel
suicide_data <- data.frame(
country = c("Canada", "Israel", "Japan", "Austria", "France", "Germany", "Hungary",
"Italy", "Netherlands", "Poland", "Spain", "Sweden", "Switzerland",
"UK", "USA"),
A25_34 = c(22, 9, 22, 29, 16, 28, 48, 7, 8, 26, 4, 28, 22, 10, 20),
A35_44 = c(27, 19, 19, 40, 25, 35, 65, 8, 11, 29, 7, 41, 34, 13, 22),
A45_54 = c(31, 10, 21, 52, 36, 41, 84, 11, 18, 36, 10, 46, 41, 15, 28),
A55_64 = c(34, 14, 31, 53, 47, 49, 81, 18, 20, 32, 16, 51, 50, 17, 33),
A65_74 = c(24, 27, 49, 69, 56, 52, 107, 27, 28, 28, 22, 35, 51, 22, 37)
)
suicide_data
## country A25_34 A35_44 A45_54 A55_64 A65_74
## 1 Canada 22 27 31 34 24
## 2 Israel 9 19 10 14 27
## 3 Japan 22 19 21 31 49
## 4 Austria 29 40 52 53 69
## 5 France 16 25 36 47 56
## 6 Germany 28 35 41 49 52
## 7 Hungary 48 65 84 81 107
## 8 Italy 7 8 11 18 27
## 9 Netherlands 8 11 18 20 28
## 10 Poland 26 29 36 32 28
## 11 Spain 4 7 10 16 22
## 12 Sweden 28 41 46 51 35
## 13 Switzerland 22 34 41 50 51
## 14 UK 10 13 15 17 22
## 15 USA 20 22 28 33 37
# Melakukan reshape data agar sesuai dengan ggplot
suicide_data_long <- reshape2::melt(suicide_data, id.vars = "country",
variable.name = "age_group",
value.name = "mortality_rate")
# Membuat side-by-side box plot
ggplot(suicide_data_long, aes(x = age_group, y = mortality_rate)) +
geom_boxplot(fill = "lightblue", color = "black") +
labs(title = "Male Suicide Mortality Rates per 100,000 by Age Group",
x = "Age Group",
y = "Mortality Rate per 100,000") +
theme_minimal()

# Ex. 2.3
# Membuat dataframe dari tabel
state_data <- data.frame(
State = c("State1", "State2", "State3", "State4", "State5",
"State6", "State7", "State8", "State9", "State10"),
Population = c(3615, 21198, 2861, 2341, 812, 10735, 2284, 11860, 681, 472),
Income = c(3624, 5114, 4628, 3098, 4281, 4561, 4660, 4449, 4167, 3907),
Illiteracy = c(2.1, 1.1, 0.5, 2.4, 0.7, 0.8, 0.6, 1.0, 0.5, 0.6),
Life.Expectancy = c(69.05, 71.71, 72.56, 68.09, 71.23, 70.82, 72.13, 70.43, 72.08, 71.64),
Homicide = c(15.1, 10.3, 2.3, 12.5, 3.3, 7.4, 4.2, 6.1, 1.7, 5.5),
Graduates = c(41.3, 62.6, 59.0, 41.0, 57.6, 53.2, 60.0, 50.2, 52.3, 57.1),
Freezing = c(20, 20, 140, 50, 174, 124, 44, 126, 172, 168)
)
# Melihat data
state_data
## State Population Income Illiteracy Life.Expectancy Homicide Graduates
## 1 State1 3615 3624 2.1 69.05 15.1 41.3
## 2 State2 21198 5114 1.1 71.71 10.3 62.6
## 3 State3 2861 4628 0.5 72.56 2.3 59.0
## 4 State4 2341 3098 2.4 68.09 12.5 41.0
## 5 State5 812 4281 0.7 71.23 3.3 57.6
## 6 State6 10735 4561 0.8 70.82 7.4 53.2
## 7 State7 2284 4660 0.6 72.13 4.2 60.0
## 8 State8 11860 4449 1.0 70.43 6.1 50.2
## 9 State9 681 4167 0.5 72.08 1.7 52.3
## 10 State10 472 3907 0.6 71.64 5.5 57.1
## Freezing
## 1 20
## 2 20
## 3 140
## 4 50
## 5 174
## 6 124
## 7 44
## 8 126
## 9 172
## 10 168
# Scatterplot matrix
pairs(state_data[ ,2:8], main = "Scatterplot Matrix of US States Data", pch = 21, bg = "lightblue")
# Menambahkan nama negara bagian ke scatterplot
text(x = state_data$Population, y = state_data$Income, labels = state_data$State, pos = 4, cex = 0.8)

library(ggplot2)
# Membuat plot dengan ggplot
ggplot(state_data, aes(x = Income)) +
geom_point(aes(y = Life.Expectancy, color = "Life Expectancy"), size = 3) +
geom_point(aes(y = Homicide, color = "Homicide Rate"), size = 3) +
labs(title = "Life Expectancy and Homicide Rate vs Income",
x = "Average Per Capita Income",
y = "Values",
color = "Metrics") +
theme_minimal()

# Ex. 2.4
# Membuat dataframe dari sampel banknote data yang ada pada gambar
banknote_data <- data.frame(
Length = c(214.8, 214.6, 214.8, 214.8, 215.0, 214.4, 214.9, 214.9, 215.0, 214.7),
Left = c(131.0, 129.7, 129.7, 129.7, 129.6, 130.1, 130.5, 130.3, 130.4, 130.2),
Right = c(131.1, 129.7, 129.7, 129.6, 129.7, 130.3, 130.2, 130.1, 130.6, 130.3),
Bottom = c(9.0, 8.1, 8.7, 7.5, 10.4, 9.7, 11.0, 8.7, 9.9, 11.8),
Top = c(9.7, 9.5, 9.6, 10.4, 7.7, 11.7, 11.5, 11.7, 10.9, 10.9),
Diagonal = c(141.0, 141.7, 142.2, 142.0, 141.8, 139.8, 139.5, 140.2, 140.3, 139.7)
)
banknote_data
## Length Left Right Bottom Top Diagonal
## 1 214.8 131.0 131.1 9.0 9.7 141.0
## 2 214.6 129.7 129.7 8.1 9.5 141.7
## 3 214.8 129.7 129.7 8.7 9.6 142.2
## 4 214.8 129.7 129.6 7.5 10.4 142.0
## 5 215.0 129.6 129.7 10.4 7.7 141.8
## 6 214.4 130.1 130.3 9.7 11.7 139.8
## 7 214.9 130.5 130.2 11.0 11.5 139.5
## 8 214.9 130.3 130.1 8.7 11.7 140.2
## 9 215.0 130.4 130.6 9.9 10.9 140.3
## 10 214.7 130.2 130.3 11.8 10.9 139.7
# Membuat scatterplot matrix
pairs(banknote_data, main = "Scatterplot Matrix of Swiss Banknote Data", pch = 21, bg = "lightblue")

# Membuat boxplot untuk setiap variabel
boxplot(banknote_data, main = "Boxplot of Swiss Banknote Data", col = c("lightblue"))

# Memuat package yang diperlukan
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.3
## corrplot 0.94 loaded
# Membuat matriks korelasi
cor_matrix <- cor(banknote_data)
# Membuat heatmap dari korelasi
corrplot(cor_matrix, method = "color", addCoef.col = "black", tl.col = "black", tl.srt = 45)

# Standarisasi data
banknote_data_scaled <- scale(banknote_data)
# Melakukan PCA
pca <- prcomp(banknote_data_scaled, center = TRUE, scale. = TRUE)
# Plot hasil PCA
biplot(pca, main = "PCA of Swiss Banknote Data")
