library(dplyr)
data(“Titanic”) titanic <- as.data.frame(Titanic)
house_price <- read.csv(“house_price.csv”) df <- data.frame(house_price)
head(df)
colSums(is.na(df))
sum(duplicated(df))
colSums(is.na(airquality))
airquality\(Ozone[is.na(airquality\)Ozone)] <- median(airquality\(Ozone, na.rm = TRUE) airquality\)Solar.R[is.na(airquality\(Solar.R)] <- median(airquality\)Solar.R, na.rm = TRUE)
colSums(is.na(airquality)) sum(duplicated(airquality))
select_data <- select(df, price, bedrooms, bathrooms, floors) head(select_data)
harga_lebih_350000 <- filter(house_price, price > 350000)
length(harga_lebih_350000\(price) length(df\)price)
rename_feature <- rename(df, harga = price) rename_feature <- rename(rename_feature, negara = country) head(rename_feature)
head(titanic) titanic_mutate <- mutate(titanic, proporsi = Freq / sum(Freq)) head(titanic_mutate)
extra <- data.frame( Class = c(“1st”,“2nd”,“3rd”,“Crew”), Kapal = c(“titanic”, “titanic”, “titanic”, “titanic”) ) titanic_joined <- left_join(titanic, extra, by = “Class”) head(titanic_joined)
titanic_summary <- titanic %>% group_by(Class, Survived) %>% summarise(total = sum(Freq)) titanic_summary
index <- sample(1:nrow(titanic), 0.7*nrow(titanic)) set.seed(123)
train_data <- titanic[index, ] test_data <- titanic[-index, ]
nrow(train_data); nrow(test_data)