Scatter Chart

download.file('https://raw.githubusercontent.com/ywchiu/cdc_course/master/data/cdc.Rdata', 'cdc.Rdata')

load('cdc.Rdata')

head(cdc)
##     genhlth exerany hlthplan smoke100 height weight wtdesire age gender
## 1      good       0        1        0     70    175      175  77      m
## 2      good       0        1        1     64    125      115  33      f
## 3      good       1        1        1     60    105      105  49      f
## 4      good       1        1        0     66    132      124  42      f
## 5 very good       0        1        0     61    150      130  55      f
## 6 very good       1        1        0     64    114      114  55      f
cor(cdc[,c('height', 'weight', 'wtdesire')])
##             height    weight  wtdesire
## height   1.0000000 0.5553222 0.7581195
## weight   0.5553222 1.0000000 0.8000521
## wtdesire 0.7581195 0.8000521 1.0000000
plot(cdc$weight, cdc$height, type= 'p')

plot(height ~ weight, cdc, type = 'p')

plot(height ~ weight, cdc, type = 'p', col = 'red')

#cdc$gender

plot(height ~ weight, cdc, type = 'p', col = cdc$gender)

plot(height ~ weight, cdc, type = 'p', col = ifelse(cdc$weight> 300, 'red', 'blue'), xlim = c(0,500), ylim = c(0,100) )

male_df <- cdc[cdc$gender == 'm',]
female_df <- cdc[cdc$gender == 'f',]

plot(height ~ weight, male_df, type = 'p', col = 'blue' )
points(height ~ weight , female_df, col = 'red')

#?lm
fit <- lm(height ~ weight, cdc)
plot(height ~ weight, cdc, type = 'p', col = 'red')
abline(v  = 300, col = 'orange')
abline(h  = 80, col = 'blue')

fit <- lm(height ~ weight, cdc)
plot(height ~ weight, cdc, type = 'p', col = 'red')
abline(fit, col = 'blue')

plot(height ~ weight, male_df, type = 'p', col = 'blue' )
points(height ~ weight , female_df, col = 'red')

fit1 <- lm(height ~ weight, male_df)
fit2 <- lm(height ~ weight, female_df)

fit1
## 
## Call:
## lm(formula = height ~ weight, data = male_df)
## 
## Coefficients:
## (Intercept)       weight  
##    63.48202      0.03576
fit2
## 
## Call:
## lm(formula = height ~ weight, data = female_df)
## 
## Coefficients:
## (Intercept)       weight  
##    60.98341      0.02231
plot(height ~ weight, male_df, type = 'p', col = 'blue' )
points(height ~ weight , female_df, col = 'red')
abline(fit1, col ='orange')
abline(fit2, col ='green')

#head(cdc)

plot(wtdesire ~ weight, cdc)

#cdc[((cdc$wtdesire < 500) & (cdc$weight < 450)),]
plot(wtdesire ~ weight,cdc[((cdc$wtdesire < 500) & (cdc$weight < 450)),], type = 'p', col = 'blue' )

barplot(c(80,80,84,88), ylim = c(70,100))

plot(height~ age, cdc)

## Mosaic Plot

#head(cdc)

mosaicplot(smoke100 ~ gender, cdc,col = c('blue', 'red'))

#?rainbow()


smokers_gender <- table(cdc$gender, cdc$smoke100)
smokers_gender
##    
##        0    1
##   m 4547 5022
##   f 6012 4419
table(cdc$smoke100)
## 
##     0     1 
## 10559  9441
mosaicplot(smoke100 ~ gender, cdc         ,col=rainbow(length(colnames(smokers_gender))))

Boxplot

boxplot(cdc$height)

boxplot(runif(30))

hist(runif(30))

rnorm(1) 
## [1] -0.9234485
hist(rnorm(30))

#rnorm(30)


rand_data<- rnorm(100,mean = 25, sd = 10 )
hist(rand_data)

boxplot(rand_data)

rand_data <- c(rand_data,999)
hist(rand_data)

boxplot(rand_data)

boxplot(height~gender, cdc, col=cdc$gender)

boxplot(weight~gender, cdc, col=cdc$gender)

## Legend

library(readr)
covid19 <- read_csv('https://raw.githubusercontent.com/ywchiu/cdc_course/master/2020Exam/covid19.csv')
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character(),
##   Lat = col_double(),
##   Long = col_double(),
##   Case = col_character(),
##   Date = col_date(format = ""),
##   Case_Number = col_double()
## )
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
us_confirmed <- covid19 %>%
  filter(`Case` == 'confirmed', `Country/Region` == 'US') %>%
  select(Date, Case_Number)

us_deaths <- covid19 %>%
  filter(`Case` == 'deaths', `Country/Region` == 'US') %>%
  select(Date, Case_Number)
head(us_confirmed,3)
## # A tibble: 3 x 2
##   Date       Case_Number
##   <date>           <dbl>
## 1 2020-01-22           1
## 2 2020-01-23           1
## 3 2020-01-24           2
plot(Case_Number ~ Date, us_confirmed, type = 'l', col = 'red')
legend(x = as.Date('2020-02-01'), y = 6000000, 'US Confirmed', lwd = 1, col ='red')

plot(Case_Number ~ Date, us_confirmed, type = 'l', col = 'red')
lines(Case_Number ~ Date, us_deaths, type = 'l', col = 'blue')
legend(x = as.Date('2020-02-01'), y = 6000000, c('US Confirmed', 'US Deaths'), lwd = 1, col =c('red', 'blue') )

plot(Case_Number ~ Date, us_confirmed, type = 'l', col = 'red', yaxt = 'n')
lines(Case_Number ~ Date, us_deaths, type = 'l', col = 'blue')
axis(2, at=seq(0,7000000, 1000000), labels=sprintf("%.f", seq(0,7000000, 1000000)))
legend('topleft', c('US Confirmed', 'US Deaths'), lwd = 1, col =c('red', 'blue'), title = 'Case Type' )

#sort(table(cdc$genhlth))

sorted_data <- sort(table(cdc$genhlth), decreasing = TRUE)
sorted_data
## 
## very good      good excellent      fair      poor 
##      6972      5675      4657      2019       677
pie(sorted_data, init.angle = 90, clockwise = TRUE, col = 1:length(names(sorted_data)))
legend('bottomright', names(sorted_data), fill = 1:length(names(sorted_data)))

?rainbow
pie(sorted_data, init.angle = 90, clockwise = TRUE, col = 1:length(names(sorted_data)))
legend('bottomright', names(sorted_data), fill = 1:length(names(sorted_data)))

pie(sorted_data, init.angle = 90, clockwise = TRUE, col = rainbow(length(names(sorted_data))))
legend('bottomright', names(sorted_data), fill = rainbow(length(names(sorted_data))), cex = 1.2)

各種參數

rainbow(3)
## [1] "#FF0000" "#00FF00" "#0000FF"
plot(Case_Number ~ Date, us_confirmed, type = 'l', col = c('#002FA7'), yaxt = 'n', cex = 3, xlab = 'Datetime', ylab = 'Case Number')
legend(x = as.Date('2020-02-01'), y = 6000000, 'US Confirmed', lwd = 1, col ='#002FA7')

plot(Case_Number ~ Date, us_confirmed, type = 'o', col = 'red', yaxt = 'n')
lines(Case_Number ~ Date, us_deaths, type = 'o', col = 'blue', lty = 2, pch = 17)
axis(2, at=seq(0,7000000, 1000000), labels=sprintf("%.f", seq(0,7000000, 1000000)))
legend('topleft', c('US Confirmed', 'US Deaths'), lwd = 1, col =c('red', 'blue'), title = 'Case Type' )

## par

showLayout <- function(n){
  for(i in 1:n){
    plot(1, type="n", xaxt="n", yaxt="n", xlab="", ylab="")
    text(1, 1, labels=i, cex=10)
    }
}

par(mfrow = c(3,2))
showLayout(6)


par(mfrow = c(2,3))
showLayout(6)

par(mfcol = c(3,2))
showLayout(6)


par(mar = c(1,1,1,1),mfcol = c(3,2))
showLayout(6)

par(mar = c(3,3,3,3),mfcol = c(3,2))
showLayout(6)

us_confirmed$daily_new <- c(0, diff(us_confirmed$Case_Number))
par(mfrow=c(2,1))
plot(Case_Number ~ Date, us_confirmed, type = 'o', col = 'red', xaxt = 'n', xlab = '')
barplot(daily_new ~ Date, us_confirmed, border = 'blue', col = 'blue')

## Save Picture

jpeg("covid19_us.jpg")
par(mfrow=c(2,1))
plot(Case_Number ~ Date, us_confirmed, type = 'o', col = 'red', xaxt = 'n', xlab = '')
barplot(daily_new ~ Date, us_confirmed, border = 'blue', col = 'blue')
dev.off() 
## quartz_off_screen 
##                 2

Plotly

library(ggplot2)

g <- ggplot(us_confirmed)

fig <- g + 
  aes(x = Date, y = Case_Number) +
  geom_line()

fig

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplotly(fig)

Donut Chart

ds <- data.frame(labels = c("A", "B", "C"),
        values = c(10, 20, 30))
head(ds)
##   labels values
## 1      A     10
## 2      B     20
## 3      C     30
library(plotly)
plot_ly(ds, labels = ds$labels, 
            values = ds$values, 
        type = "pie", hole = 0.6) %>%
layout(title = "Donut Chart Example")
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Area Chart

#us_confirmed
plot_ly(x = us_confirmed$Date, y = us_confirmed$Case_Number, fill = "tozeroy", name="Confirmed", type='scatter', mode= 'markers') 
# us_deaths
plot_ly(x = us_confirmed$Date, y = us_confirmed$Case_Number, fill = "tozeroy", name="Confirmed", type='scatter', mode= 'markers') %>%
add_trace(x= us_deaths$Date, y = us_deaths$Case_Number, fill = 'tozeroy', name = 'Deaths', type ='scatter', mode = 'markers')
library(plotly)
month<- c(1,2,3,4,5)
taipei<- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
total <- taipei + tainan
y <- list(title="Rainfall")
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei",type='scatter', mode= 'markers') %>%add_trace(x = month, y = total, fill = "tonexty", name="tainan") %>%
layout(yaxis = y)

## Bubble Chart

data("diamonds")
head(diamonds)
## # A tibble: 6 x 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23  Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
## 2 0.21  Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 3 0.23  Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 4 0.290 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 5 0.31  Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 6 0.24  Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
d <- diamonds[sample(nrow(diamonds), 1000), ]

plot_ly(x = d$carat, y = d$price, type = 'scatter', mode = 'markers', color = d$clarity, size = d$carat)
## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Multiple Charts

p <- subplot(
  plot_ly(us_confirmed, x = us_confirmed$Date, y = us_confirmed$Case_Number, type='scatter', mode = 'lines'),
  plot_ly(us_deaths, x = us_deaths$Date, y = us_deaths$Case_Number, type='scatter', mode = 'lines'),
margin=0.05
)
p %>% layout(showlegend=FALSE)
p <- subplot(
  plot_ly(us_confirmed, x = us_confirmed$Date, y = us_confirmed$Case_Number, type='scatter', mode = 'lines'),
  plot_ly(us_deaths, x = us_deaths$Date, y = us_deaths$Case_Number, type='scatter', mode = 'lines'),
margin=0.05, nrows = 2
)
p %>% layout(showlegend=FALSE)