Notebook.utf8

setwd(“~/OneDrive - Montivory Co., Ltd/R”)

custdata <- read.table('/Users/pittaya/OneDrive - Montivory Co., Ltd/R/Custdata/custdata.tsv', header = T, sep ='\t')
summary(custdata)

##      custid        sex     is.employed         income      
##  Min.   :   2068   F:440   Mode :logical   Min.   : -8700  
##  1st Qu.: 345667   M:560   FALSE:73        1st Qu.: 14600  
##  Median : 693403           TRUE :599       Median : 35000  
##  Mean   : 698500           NA's :328       Mean   : 53505  
##  3rd Qu.:1044606                           3rd Qu.: 67000  
##  Max.   :1414286                           Max.   :615000  
##                                                            
##              marital.stat health.ins                            housing.type
##  Divorced/Separated:155   Mode :logical   Homeowner free and clear    :157  
##  Married           :516   FALSE:159       Homeowner with mortgage/loan:412  
##  Never Married     :233   TRUE :841       Occupied with no rent       : 11  
##  Widowed           : 96                   Rented                      :364  
##                                           NA's                        : 56  
##                                                                             
##                                                                             
##  recent.move      num.vehicles        age              state.of.res
##  Mode :logical   Min.   :0.000   Min.   :  0.0   California  :100  
##  FALSE:820       1st Qu.:1.000   1st Qu.: 38.0   New York    : 71  
##  TRUE :124       Median :2.000   Median : 50.0   Pennsylvania: 70  
##  NA's :56        Mean   :1.916   Mean   : 51.7   Texas       : 56  
##                  3rd Qu.:2.000   3rd Qu.: 64.0   Michigan    : 52  
##                  Max.   :6.000   Max.   :146.7   Ohio        : 51  
##                  NA's   :56                      (Other)     :600

summary(custdata$income)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -8700   14600   35000   53505   67000  615000

summary(custdata$age)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    38.0    50.0    51.7    64.0   146.7

summary(custdata$income)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -8700   14600   35000   53505   67000  615000

Income <- custdata$income/1000
summary(Income)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    -8.7    14.6    35.0    53.5    67.0   615.0

summary(custdata$age)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    38.0    50.0    51.7    64.0   146.7

#install.packages("ggplot2")
library(ggplot2)
summary(custdata$age)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    38.0    50.0    51.7    64.0   146.7

library(ggplot2)
ggplot(custdata) + 
  geom_histogram(aes(x = age),
                 binwidth = 5, fill = "gray")

library(scales)
ggplot(custdata) + geom_density(aes(x = income)) + 
  scale_x_continuous(labels = dollar)

ggplot(custdata) + geom_density(aes(x=income)) + 
  scale_x_log10(breaks = c(100,1000,10000,100000), labels = dollar) +
  annotation_logticks(sides = "bt")

## Warning in self$trans$transform(x): NaNs produced

## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Removed 79 rows containing non-finite values (stat_density).

ggplot(custdata) +
  geom_histogram(aes(x=age),
                 binwidth = 5, fill = "gray")

#install.packages("scales")
library(ggplot2)
library(scales)
ggplot(custdata) + geom_density(aes(x=income)) +
  scale_x_continuous(labels = dollar)

ggplot(custdata) + geom_density(aes(x = income)) + 
  scale_x_log10(breaks = c(100,1000,10000,100000), labels = dollar) +
  annotation_logticks(sides ="bt")

## Warning in self$trans$transform(x): NaNs produced

## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Removed 79 rows containing non-finite values (stat_density).

ggplot(custdata) + 
  geom_bar(aes(x = state.of.res), fill = "gray") + 
  coord_flip() +  ##Flip x --> y
  theme(axis.text.y = element_text(size = rel(0.8)))

statesums <- table(custdata$state.of.res)
statef <- as.data.frame(statesums)
colnames(statef) <- c("state.of.res", "count")
summary(statef)

##      state.of.res     count       
##  Alabama   : 1    Min.   :  1.00  
##  Alaska    : 1    1st Qu.:  5.00  
##  Arizona   : 1    Median : 12.00  
##  Arkansas  : 1    Mean   : 20.00  
##  California: 1    3rd Qu.: 26.25  
##  Colorado  : 1    Max.   :100.00  
##  (Other)   :44

statef <- transform(statef, state.of.res = reorder(state.of.res, count))
summary(statef)

##        state.of.res     count       
##  Delaware    : 1    Min.   :  1.00  
##  North Dakota: 1    1st Qu.:  5.00  
##  Wyoming     : 1    Median : 12.00  
##  Rhode Island: 1    Mean   : 20.00  
##  Alaska      : 1    3rd Qu.: 26.25  
##  Montana     : 1    Max.   :100.00  
##  (Other)     :44