data("Titanic")
titanic <- as.data.frame(Titanic)
titanic
##    Class    Sex   Age Survived Freq
## 1    1st   Male Child       No    0
## 2    2nd   Male Child       No    0
## 3    3rd   Male Child       No   35
## 4   Crew   Male Child       No    0
## 5    1st Female Child       No    0
## 6    2nd Female Child       No    0
## 7    3rd Female Child       No   17
## 8   Crew Female Child       No    0
## 9    1st   Male Adult       No  118
## 10   2nd   Male Adult       No  154
## 11   3rd   Male Adult       No  387
## 12  Crew   Male Adult       No  670
## 13   1st Female Adult       No    4
## 14   2nd Female Adult       No   13
## 15   3rd Female Adult       No   89
## 16  Crew Female Adult       No    3
## 17   1st   Male Child      Yes    5
## 18   2nd   Male Child      Yes   11
## 19   3rd   Male Child      Yes   13
## 20  Crew   Male Child      Yes    0
## 21   1st Female Child      Yes    1
## 22   2nd Female Child      Yes   13
## 23   3rd Female Child      Yes   14
## 24  Crew Female Child      Yes    0
## 25   1st   Male Adult      Yes   57
## 26   2nd   Male Adult      Yes   14
## 27   3rd   Male Adult      Yes   75
## 28  Crew   Male Adult      Yes  192
## 29   1st Female Adult      Yes  140
## 30   2nd Female Adult      Yes   80
## 31   3rd Female Adult      Yes   76
## 32  Crew Female Adult      Yes   20
databaru <- read.csv("house_price.csv")
head(databaru)
##                  date   price bedrooms bathrooms sqft_living sqft_lot floors
## 1 2014-05-02 00:00:00  313000        3      1.50        1340     7912    1.5
## 2 2014-05-02 00:00:00 2384000        5      2.50        3650     9050    2.0
## 3 2014-05-02 00:00:00  342000        3      2.00        1930    11947    1.0
## 4 2014-05-02 00:00:00  420000        3      2.25        2000     8030    1.0
## 5 2014-05-02 00:00:00  550000        4      2.50        1940    10500    1.0
## 6 2014-05-02 00:00:00  490000        2      1.00         880     6380    1.0
##   waterfront view condition sqft_above sqft_basement yr_built yr_renovated
## 1          0    0         3       1340             0     1955         2005
## 2          0    4         5       3370           280     1921            0
## 3          0    0         4       1930             0     1966            0
## 4          0    0         4       1000          1000     1963            0
## 5          0    0         4       1140           800     1976         1992
## 6          0    0         3        880             0     1938         1994
##                     street      city statezip country
## 1     18810 Densmore Ave N Shoreline WA 98133     USA
## 2          709 W Blaine St   Seattle WA 98119     USA
## 3 26206-26214 143rd Ave SE      Kent WA 98042     USA
## 4          857 170th Pl NE  Bellevue WA 98008     USA
## 5        9105 170th Ave NE   Redmond WA 98052     USA
## 6           522 NE 88th St   Seattle WA 98115     USA
str(titanic)
## 'data.frame':    32 obs. of  5 variables:
##  $ Class   : Factor w/ 4 levels "1st","2nd","3rd",..: 1 2 3 4 1 2 3 4 1 2 ...
##  $ Sex     : Factor w/ 2 levels "Male","Female": 1 1 1 1 2 2 2 2 1 1 ...
##  $ Age     : Factor w/ 2 levels "Child","Adult": 1 1 1 1 1 1 1 1 2 2 ...
##  $ Survived: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Freq    : num  0 0 35 0 0 0 17 0 118 154 ...
summary(titanic)
##   Class       Sex        Age     Survived      Freq       
##  1st :8   Male  :16   Child:16   No :16   Min.   :  0.00  
##  2nd :8   Female:16   Adult:16   Yes:16   1st Qu.:  0.75  
##  3rd :8                                   Median : 13.50  
##  Crew:8                                   Mean   : 68.78  
##                                           3rd Qu.: 77.00  
##                                           Max.   :670.00
colSums(is.na(titanic))
##    Class      Sex      Age Survived     Freq 
##        0        0        0        0        0
colSums(is.na(airquality))
##   Ozone Solar.R    Wind    Temp   Month     Day 
##      37       7       0       0       0       0
airquality$Ozone[is.na(airquality$Ozone)] <- median(airquality$Ozone, na.rm = TRUE)
airquality$Solar.R[is.na(airquality$Solar.R)] <- median(airquality$Solar.R, na.rm = TRUE)
colSums(is.na(airquality))
##   Ozone Solar.R    Wind    Temp   Month     Day 
##       0       0       0       0       0       0
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
titanic_selected <- select(titanic, Class, Sex, Survived)
head(titanic)
##   Class    Sex   Age Survived Freq
## 1   1st   Male Child       No    0
## 2   2nd   Male Child       No    0
## 3   3rd   Male Child       No   35
## 4  Crew   Male Child       No    0
## 5   1st Female Child       No    0
## 6   2nd Female Child       No    0
library(dplyr)
titanic_child <- filter(titanic, Age == "Child")
titanic_sorted_asc <- arrange(titanic, Freq)
head(titanic_sorted_asc)
##   Class    Sex   Age Survived Freq
## 1   1st   Male Child       No    0
## 2   2nd   Male Child       No    0
## 3  Crew   Male Child       No    0
## 4   1st Female Child       No    0
## 5   2nd Female Child       No    0
## 6  Crew Female Child       No    0
library(dplyr)
titanic_rename <- rename(titanic, Umur = Age)
titanic_rename
##    Class    Sex  Umur Survived Freq
## 1    1st   Male Child       No    0
## 2    2nd   Male Child       No    0
## 3    3rd   Male Child       No   35
## 4   Crew   Male Child       No    0
## 5    1st Female Child       No    0
## 6    2nd Female Child       No    0
## 7    3rd Female Child       No   17
## 8   Crew Female Child       No    0
## 9    1st   Male Adult       No  118
## 10   2nd   Male Adult       No  154
## 11   3rd   Male Adult       No  387
## 12  Crew   Male Adult       No  670
## 13   1st Female Adult       No    4
## 14   2nd Female Adult       No   13
## 15   3rd Female Adult       No   89
## 16  Crew Female Adult       No    3
## 17   1st   Male Child      Yes    5
## 18   2nd   Male Child      Yes   11
## 19   3rd   Male Child      Yes   13
## 20  Crew   Male Child      Yes    0
## 21   1st Female Child      Yes    1
## 22   2nd Female Child      Yes   13
## 23   3rd Female Child      Yes   14
## 24  Crew Female Child      Yes    0
## 25   1st   Male Adult      Yes   57
## 26   2nd   Male Adult      Yes   14
## 27   3rd   Male Adult      Yes   75
## 28  Crew   Male Adult      Yes  192
## 29   1st Female Adult      Yes  140
## 30   2nd Female Adult      Yes   80
## 31   3rd Female Adult      Yes   76
## 32  Crew Female Adult      Yes   20
titanic_mutate <- mutate(titanic, Proporsi = Freq / sum(Freq))
head(titanic_mutate)
##   Class    Sex   Age Survived Freq   Proporsi
## 1   1st   Male Child       No    0 0.00000000
## 2   2nd   Male Child       No    0 0.00000000
## 3   3rd   Male Child       No   35 0.01590186
## 4  Crew   Male Child       No    0 0.00000000
## 5   1st Female Child       No    0 0.00000000
## 6   2nd Female Child       No    0 0.00000000