datasets::Titanic
## , , Age = Child, Survived = No
##
## Sex
## Class Male Female
## 1st 0 0
## 2nd 0 0
## 3rd 35 17
## Crew 0 0
##
## , , Age = Adult, Survived = No
##
## Sex
## Class Male Female
## 1st 118 4
## 2nd 154 13
## 3rd 387 89
## Crew 670 3
##
## , , Age = Child, Survived = Yes
##
## Sex
## Class Male Female
## 1st 5 1
## 2nd 11 13
## 3rd 13 14
## Crew 0 0
##
## , , Age = Adult, Survived = Yes
##
## Sex
## Class Male Female
## 1st 57 140
## 2nd 14 80
## 3rd 75 76
## Crew 192 20
data("Titanic")
week4 <- as.data.frame(Titanic)
Titanic
## , , Age = Child, Survived = No
##
## Sex
## Class Male Female
## 1st 0 0
## 2nd 0 0
## 3rd 35 17
## Crew 0 0
##
## , , Age = Adult, Survived = No
##
## Sex
## Class Male Female
## 1st 118 4
## 2nd 154 13
## 3rd 387 89
## Crew 670 3
##
## , , Age = Child, Survived = Yes
##
## Sex
## Class Male Female
## 1st 5 1
## 2nd 11 13
## 3rd 13 14
## Crew 0 0
##
## , , Age = Adult, Survived = Yes
##
## Sex
## Class Male Female
## 1st 57 140
## 2nd 14 80
## 3rd 75 76
## Crew 192 20
#Import data dari file csv
databaru <- read.csv("house_price.csv")
head(databaru)
## date price bedrooms bathrooms sqft_living sqft_lot floors
## 1 2014-05-02 00:00:00 313000 3 1.50 1340 7912 1.5
## 2 2014-05-02 00:00:00 2384000 5 2.50 3650 9050 2.0
## 3 2014-05-02 00:00:00 342000 3 2.00 1930 11947 1.0
## 4 2014-05-02 00:00:00 420000 3 2.25 2000 8030 1.0
## 5 2014-05-02 00:00:00 550000 4 2.50 1940 10500 1.0
## 6 2014-05-02 00:00:00 490000 2 1.00 880 6380 1.0
## waterfront view condition sqft_above sqft_basement yr_built yr_renovated
## 1 0 0 3 1340 0 1955 2005
## 2 0 4 5 3370 280 1921 0
## 3 0 0 4 1930 0 1966 0
## 4 0 0 4 1000 1000 1963 0
## 5 0 0 4 1140 800 1976 1992
## 6 0 0 3 880 0 1938 1994
## street city statezip country
## 1 18810 Densmore Ave N Shoreline WA 98133 USA
## 2 709 W Blaine St Seattle WA 98119 USA
## 3 26206-26214 143rd Ave SE Kent WA 98042 USA
## 4 857 170th Pl NE Bellevue WA 98008 USA
## 5 9105 170th Ave NE Redmond WA 98052 USA
## 6 522 NE 88th St Seattle WA 98115 USA
week4
## Class Sex Age Survived Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 1st Female Child No 0
## 6 2nd Female Child No 0
## 7 3rd Female Child No 17
## 8 Crew Female Child No 0
## 9 1st Male Adult No 118
## 10 2nd Male Adult No 154
## 11 3rd Male Adult No 387
## 12 Crew Male Adult No 670
## 13 1st Female Adult No 4
## 14 2nd Female Adult No 13
## 15 3rd Female Adult No 89
## 16 Crew Female Adult No 3
## 17 1st Male Child Yes 5
## 18 2nd Male Child Yes 11
## 19 3rd Male Child Yes 13
## 20 Crew Male Child Yes 0
## 21 1st Female Child Yes 1
## 22 2nd Female Child Yes 13
## 23 3rd Female Child Yes 14
## 24 Crew Female Child Yes 0
## 25 1st Male Adult Yes 57
## 26 2nd Male Adult Yes 14
## 27 3rd Male Adult Yes 75
## 28 Crew Male Adult Yes 192
## 29 1st Female Adult Yes 140
## 30 2nd Female Adult Yes 80
## 31 3rd Female Adult Yes 76
## 32 Crew Female Adult Yes 20
#lihat struktur data
str(Titanic)
## 'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
## - attr(*, "dimnames")=List of 4
## ..$ Class : chr [1:4] "1st" "2nd" "3rd" "Crew"
## ..$ Sex : chr [1:2] "Male" "Female"
## ..$ Age : chr [1:2] "Child" "Adult"
## ..$ Survived: chr [1:2] "No" "Yes"
#Import data dari file csv
databaru <- read.csv("house_price.csv")
head(databaru)
## date price bedrooms bathrooms sqft_living sqft_lot floors
## 1 2014-05-02 00:00:00 313000 3 1.50 1340 7912 1.5
## 2 2014-05-02 00:00:00 2384000 5 2.50 3650 9050 2.0
## 3 2014-05-02 00:00:00 342000 3 2.00 1930 11947 1.0
## 4 2014-05-02 00:00:00 420000 3 2.25 2000 8030 1.0
## 5 2014-05-02 00:00:00 550000 4 2.50 1940 10500 1.0
## 6 2014-05-02 00:00:00 490000 2 1.00 880 6380 1.0
## waterfront view condition sqft_above sqft_basement yr_built yr_renovated
## 1 0 0 3 1340 0 1955 2005
## 2 0 4 5 3370 280 1921 0
## 3 0 0 4 1930 0 1966 0
## 4 0 0 4 1000 1000 1963 0
## 5 0 0 4 1140 800 1976 1992
## 6 0 0 3 880 0 1938 1994
## street city statezip country
## 1 18810 Densmore Ave N Shoreline WA 98133 USA
## 2 709 W Blaine St Seattle WA 98119 USA
## 3 26206-26214 143rd Ave SE Kent WA 98042 USA
## 4 857 170th Pl NE Bellevue WA 98008 USA
## 5 9105 170th Ave NE Redmond WA 98052 USA
## 6 522 NE 88th St Seattle WA 98115 USA
#lihat struktur data
str(Titanic)
## 'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
## - attr(*, "dimnames")=List of 4
## ..$ Class : chr [1:4] "1st" "2nd" "3rd" "Crew"
## ..$ Sex : chr [1:2] "Male" "Female"
## ..$ Age : chr [1:2] "Child" "Adult"
## ..$ Survived: chr [1:2] "No" "Yes"
summary(Titanic)
## Number of cases in table: 2201
## Number of factors: 4
## Test for independence of all factors:
## Chisq = 1637.4, df = 25, p-value = 0
## Chi-squared approximation may be incorrect
colSums(is.na(Titanic))
## , , Survived = No
##
## Age
## Sex Child Adult
## Male 0 0
## Female 0 0
##
## , , Survived = Yes
##
## Age
## Sex Child Adult
## Male 0 0
## Female 0 0
# Cek apakah ada missing value
colSums(is.na(airquality))
## Ozone Solar.R Wind Temp Month Day
## 37 7 0 0 0 0
# Ganti NA pada kolom Ozone dengan median
airquality$Ozone[is.na(airquality$Ozone)] <- median(airquality$Ozone, na.rm = TRUE)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union