setwd(“~/OneDrive - Montivory Co., Ltd/R”)
custdata <- read.table('/Users/pittaya/OneDrive - Montivory Co., Ltd/R/Custdata/custdata.tsv', header = T, sep ='\t')
summary(custdata)
## custid sex is.employed income
## Min. : 2068 F:440 Mode :logical Min. : -8700
## 1st Qu.: 345667 M:560 FALSE:73 1st Qu.: 14600
## Median : 693403 TRUE :599 Median : 35000
## Mean : 698500 NA's :328 Mean : 53505
## 3rd Qu.:1044606 3rd Qu.: 67000
## Max. :1414286 Max. :615000
##
## marital.stat health.ins housing.type
## Divorced/Separated:155 Mode :logical Homeowner free and clear :157
## Married :516 FALSE:159 Homeowner with mortgage/loan:412
## Never Married :233 TRUE :841 Occupied with no rent : 11
## Widowed : 96 Rented :364
## NA's : 56
##
##
## recent.move num.vehicles age state.of.res
## Mode :logical Min. :0.000 Min. : 0.0 California :100
## FALSE:820 1st Qu.:1.000 1st Qu.: 38.0 New York : 71
## TRUE :124 Median :2.000 Median : 50.0 Pennsylvania: 70
## NA's :56 Mean :1.916 Mean : 51.7 Texas : 56
## 3rd Qu.:2.000 3rd Qu.: 64.0 Michigan : 52
## Max. :6.000 Max. :146.7 Ohio : 51
## NA's :56 (Other) :600
summary(custdata$income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8700 14600 35000 53505 67000 615000
summary(custdata$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 38.0 50.0 51.7 64.0 146.7
summary(custdata$income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8700 14600 35000 53505 67000 615000
Income <- custdata$income/1000
summary(Income)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8.7 14.6 35.0 53.5 67.0 615.0
summary(custdata$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 38.0 50.0 51.7 64.0 146.7
#install.packages("ggplot2")
library(ggplot2)
summary(custdata$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 38.0 50.0 51.7 64.0 146.7
library(ggplot2)
ggplot(custdata) +
geom_histogram(aes(x = age),
binwidth = 5, fill = "gray")
library(scales)
ggplot(custdata) + geom_density(aes(x = income)) +
scale_x_continuous(labels = dollar)
ggplot(custdata) + geom_density(aes(x=income)) +
scale_x_log10(breaks = c(100,1000,10000,100000), labels = dollar) +
annotation_logticks(sides = "bt")
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 79 rows containing non-finite values (stat_density).
ggplot(custdata) +
geom_histogram(aes(x=age),
binwidth = 5, fill = "gray")
#install.packages("scales")
library(ggplot2)
library(scales)
ggplot(custdata) + geom_density(aes(x=income)) +
scale_x_continuous(labels = dollar)
ggplot(custdata) + geom_density(aes(x = income)) +
scale_x_log10(breaks = c(100,1000,10000,100000), labels = dollar) +
annotation_logticks(sides ="bt")
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 79 rows containing non-finite values (stat_density).
ggplot(custdata) +
geom_bar(aes(x = state.of.res), fill = "gray") +
coord_flip() + ##Flip x --> y
theme(axis.text.y = element_text(size = rel(0.8)))
statesums <- table(custdata$state.of.res)
statef <- as.data.frame(statesums)
colnames(statef) <- c("state.of.res", "count")
summary(statef)
## state.of.res count
## Alabama : 1 Min. : 1.00
## Alaska : 1 1st Qu.: 5.00
## Arizona : 1 Median : 12.00
## Arkansas : 1 Mean : 20.00
## California: 1 3rd Qu.: 26.25
## Colorado : 1 Max. :100.00
## (Other) :44
statef <- transform(statef, state.of.res = reorder(state.of.res, count))
summary(statef)
## state.of.res count
## Delaware : 1 Min. : 1.00
## North Dakota: 1 1st Qu.: 5.00
## Wyoming : 1 Median : 12.00
## Rhode Island: 1 Mean : 20.00
## Alaska : 1 3rd Qu.: 26.25
## Montana : 1 Max. :100.00
## (Other) :44