This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point(size = 3, color = "pink") +
labs(title = "Scatter Plot: mpg vs wt",
x = "Weight (1000 lbs)",
y = "Miles per Gallon") +
theme_minimal()
library(ggplot2)
ggplot(ChickWeight, aes(x = Time, y = weight, group = Chick)) +
geom_line(alpha = 0.3) +
stat_summary(aes(group = 1), fun = "mean", geom = "line",
size = 1.5, color = "orange") +
labs(title = "melihat weight dan time",
x = "Waktu (hari)",
y = "Berat (gram)") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
data(mtcars)
cor(mtcars$mpg, mtcars$cyl)
## [1] -0.852162
Jadi, -0,85 merupakan korelasi negatif kuat
plot(mtcars$cyl, mtcars$mpg,
main = "Korelasi mpg vs cyl",
xlab = "Jumlah Silinder (cyl)",
ylab = "Miles per Gallon (mpg)",
pch = 19, col = "green")
data(airquality)
sum(is.na(airquality$Ozone))
## [1] 37
med_ozone <- median(airquality$Ozone, na.rm = TRUE)
airquality$Ozone_imputed <- ifelse(is.na(airquality$Ozone), med_ozone, airquality$Ozone)
head(airquality[, c("Ozone", "Ozone_imputed")])
## Ozone Ozone_imputed
## 1 41 41.0
## 2 36 36.0
## 3 12 12.0
## 4 18 18.0
## 5 NA 31.5
## 6 28 28.0
library(ggplot2)
ggplot(iris, aes(x = Species, y = Sepal.Length, fill = Species)) +
geom_boxplot() +
labs(title = "perbedaan distribusi sepal length per spesies",
x = "Species",
y = "Sepal Length (cm)") +
theme_minimal()
library(titanic)
## Warning: package 'titanic' was built under R version 4.5.3
sum(titanic_train$Survived)
## [1] 342
data("titanic_train")
data(faithful)
cor(faithful$eruptions, faithful$waiting)
## [1] 0.9008112
plot(faithful$eruptions, faithful$waiting,
main = "Hubungan Eruptions vs Waiting",
xlab = "Durasi Erupsi (menit)",
ylab = "Waktu Tunggu (menit)",
pch = 19, col = "brown")
library(ggplot2)
class(diamonds$cut)
## [1] "ordered" "factor"
class(diamonds$cut)
## [1] "ordered" "factor"
levels(diamonds$cut)
## [1] "Fair" "Good" "Very Good" "Premium" "Ideal"
str(diamonds$cut)
## Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
data(mtcars)
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
cor(mtcars[, c("mpg", "hp", "wt")])
## mpg hp wt
## mpg 1.0000000 -0.7761684 -0.8676594
## hp -0.7761684 1.0000000 0.6587479
## wt -0.8676594 0.6587479 1.0000000
plot(mtcars$wt, mtcars$mpg,
main = "hubungan antara Weight vs MPG",
xlab = "Weight (1000 lbs)", ylab = "MPG")
library(ggplot2)
ggplot(diamonds, aes(x = cut, y = price, fill = cut)) +
geom_boxplot() +
labs(title = "distribusi price berdasarkan cut",
x = "cut (kualitas potongan)",
y = "price (USD)") +
theme_minimal()
Jadi kesimmpulannya Fair dan Good harganya paling murah dan variasi
sempit. Ideal harganya menengah tapi banyak sekali outlier mahal.
Premium dan Very Good harganya lebih tinggi dengan sebaran lebar. Yang
intinya, hampir semua kategori mempunyai outlier harga selangit atau
tinggi.