download.file('https://raw.githubusercontent.com/ywchiu/cdc_course/master/data/cdc.Rdata', 'cdc.Rdata')
load('cdc.Rdata')
head(cdc)
## genhlth exerany hlthplan smoke100 height weight wtdesire age gender
## 1 good 0 1 0 70 175 175 77 m
## 2 good 0 1 1 64 125 115 33 f
## 3 good 1 1 1 60 105 105 49 f
## 4 good 1 1 0 66 132 124 42 f
## 5 very good 0 1 0 61 150 130 55 f
## 6 very good 1 1 0 64 114 114 55 f
cor(cdc[,c('height', 'weight', 'wtdesire')])
## height weight wtdesire
## height 1.0000000 0.5553222 0.7581195
## weight 0.5553222 1.0000000 0.8000521
## wtdesire 0.7581195 0.8000521 1.0000000
plot(cdc$weight, cdc$height, type= 'p')
plot(height ~ weight, cdc, type = 'p')
plot(height ~ weight, cdc, type = 'p', col = 'red')
#cdc$gender
plot(height ~ weight, cdc, type = 'p', col = cdc$gender)
plot(height ~ weight, cdc, type = 'p', col = ifelse(cdc$weight> 300, 'red', 'blue'), xlim = c(0,500), ylim = c(0,100) )
male_df <- cdc[cdc$gender == 'm',]
female_df <- cdc[cdc$gender == 'f',]
plot(height ~ weight, male_df, type = 'p', col = 'blue' )
points(height ~ weight , female_df, col = 'red')
#?lm
fit <- lm(height ~ weight, cdc)
plot(height ~ weight, cdc, type = 'p', col = 'red')
abline(v = 300, col = 'orange')
abline(h = 80, col = 'blue')
fit <- lm(height ~ weight, cdc)
plot(height ~ weight, cdc, type = 'p', col = 'red')
abline(fit, col = 'blue')
plot(height ~ weight, male_df, type = 'p', col = 'blue' )
points(height ~ weight , female_df, col = 'red')
fit1 <- lm(height ~ weight, male_df)
fit2 <- lm(height ~ weight, female_df)
fit1
##
## Call:
## lm(formula = height ~ weight, data = male_df)
##
## Coefficients:
## (Intercept) weight
## 63.48202 0.03576
fit2
##
## Call:
## lm(formula = height ~ weight, data = female_df)
##
## Coefficients:
## (Intercept) weight
## 60.98341 0.02231
plot(height ~ weight, male_df, type = 'p', col = 'blue' )
points(height ~ weight , female_df, col = 'red')
abline(fit1, col ='orange')
abline(fit2, col ='green')
#head(cdc)
plot(wtdesire ~ weight, cdc)
#cdc[((cdc$wtdesire < 500) & (cdc$weight < 450)),]
plot(wtdesire ~ weight,cdc[((cdc$wtdesire < 500) & (cdc$weight < 450)),], type = 'p', col = 'blue' )
barplot(c(80,80,84,88), ylim = c(70,100))
plot(height~ age, cdc)
## Mosaic Plot
#head(cdc)
mosaicplot(smoke100 ~ gender, cdc,col = c('blue', 'red'))
#?rainbow()
smokers_gender <- table(cdc$gender, cdc$smoke100)
smokers_gender
##
## 0 1
## m 4547 5022
## f 6012 4419
table(cdc$smoke100)
##
## 0 1
## 10559 9441
mosaicplot(smoke100 ~ gender, cdc ,col=rainbow(length(colnames(smokers_gender))))
boxplot(cdc$height)
boxplot(runif(30))
hist(runif(30))
rnorm(1)
## [1] -0.9234485
hist(rnorm(30))
#rnorm(30)
rand_data<- rnorm(100,mean = 25, sd = 10 )
hist(rand_data)
boxplot(rand_data)
rand_data <- c(rand_data,999)
hist(rand_data)
boxplot(rand_data)
boxplot(height~gender, cdc, col=cdc$gender)
boxplot(weight~gender, cdc, col=cdc$gender)
## Legend
library(readr)
covid19 <- read_csv('https://raw.githubusercontent.com/ywchiu/cdc_course/master/2020Exam/covid19.csv')
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character(),
## Lat = col_double(),
## Long = col_double(),
## Case = col_character(),
## Date = col_date(format = ""),
## Case_Number = col_double()
## )
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
us_confirmed <- covid19 %>%
filter(`Case` == 'confirmed', `Country/Region` == 'US') %>%
select(Date, Case_Number)
us_deaths <- covid19 %>%
filter(`Case` == 'deaths', `Country/Region` == 'US') %>%
select(Date, Case_Number)
head(us_confirmed,3)
## # A tibble: 3 x 2
## Date Case_Number
## <date> <dbl>
## 1 2020-01-22 1
## 2 2020-01-23 1
## 3 2020-01-24 2
plot(Case_Number ~ Date, us_confirmed, type = 'l', col = 'red')
legend(x = as.Date('2020-02-01'), y = 6000000, 'US Confirmed', lwd = 1, col ='red')
plot(Case_Number ~ Date, us_confirmed, type = 'l', col = 'red')
lines(Case_Number ~ Date, us_deaths, type = 'l', col = 'blue')
legend(x = as.Date('2020-02-01'), y = 6000000, c('US Confirmed', 'US Deaths'), lwd = 1, col =c('red', 'blue') )
plot(Case_Number ~ Date, us_confirmed, type = 'l', col = 'red', yaxt = 'n')
lines(Case_Number ~ Date, us_deaths, type = 'l', col = 'blue')
axis(2, at=seq(0,7000000, 1000000), labels=sprintf("%.f", seq(0,7000000, 1000000)))
legend('topleft', c('US Confirmed', 'US Deaths'), lwd = 1, col =c('red', 'blue'), title = 'Case Type' )
#sort(table(cdc$genhlth))
sorted_data <- sort(table(cdc$genhlth), decreasing = TRUE)
sorted_data
##
## very good good excellent fair poor
## 6972 5675 4657 2019 677
pie(sorted_data, init.angle = 90, clockwise = TRUE, col = 1:length(names(sorted_data)))
legend('bottomright', names(sorted_data), fill = 1:length(names(sorted_data)))
?rainbow
pie(sorted_data, init.angle = 90, clockwise = TRUE, col = 1:length(names(sorted_data)))
legend('bottomright', names(sorted_data), fill = 1:length(names(sorted_data)))
pie(sorted_data, init.angle = 90, clockwise = TRUE, col = rainbow(length(names(sorted_data))))
legend('bottomright', names(sorted_data), fill = rainbow(length(names(sorted_data))), cex = 1.2)
rainbow(3)
## [1] "#FF0000" "#00FF00" "#0000FF"
plot(Case_Number ~ Date, us_confirmed, type = 'l', col = c('#002FA7'), yaxt = 'n', cex = 3, xlab = 'Datetime', ylab = 'Case Number')
legend(x = as.Date('2020-02-01'), y = 6000000, 'US Confirmed', lwd = 1, col ='#002FA7')
plot(Case_Number ~ Date, us_confirmed, type = 'o', col = 'red', yaxt = 'n')
lines(Case_Number ~ Date, us_deaths, type = 'o', col = 'blue', lty = 2, pch = 17)
axis(2, at=seq(0,7000000, 1000000), labels=sprintf("%.f", seq(0,7000000, 1000000)))
legend('topleft', c('US Confirmed', 'US Deaths'), lwd = 1, col =c('red', 'blue'), title = 'Case Type' )
## par
showLayout <- function(n){
for(i in 1:n){
plot(1, type="n", xaxt="n", yaxt="n", xlab="", ylab="")
text(1, 1, labels=i, cex=10)
}
}
par(mfrow = c(3,2))
showLayout(6)
par(mfrow = c(2,3))
showLayout(6)
par(mfcol = c(3,2))
showLayout(6)
par(mar = c(1,1,1,1),mfcol = c(3,2))
showLayout(6)
par(mar = c(3,3,3,3),mfcol = c(3,2))
showLayout(6)
us_confirmed$daily_new <- c(0, diff(us_confirmed$Case_Number))
par(mfrow=c(2,1))
plot(Case_Number ~ Date, us_confirmed, type = 'o', col = 'red', xaxt = 'n', xlab = '')
barplot(daily_new ~ Date, us_confirmed, border = 'blue', col = 'blue')
## Save Picture
jpeg("covid19_us.jpg")
par(mfrow=c(2,1))
plot(Case_Number ~ Date, us_confirmed, type = 'o', col = 'red', xaxt = 'n', xlab = '')
barplot(daily_new ~ Date, us_confirmed, border = 'blue', col = 'blue')
dev.off()
## quartz_off_screen
## 2
library(ggplot2)
g <- ggplot(us_confirmed)
fig <- g +
aes(x = Date, y = Case_Number) +
geom_line()
fig
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(fig)
ds <- data.frame(labels = c("A", "B", "C"),
values = c(10, 20, 30))
head(ds)
## labels values
## 1 A 10
## 2 B 20
## 3 C 30
library(plotly)
plot_ly(ds, labels = ds$labels,
values = ds$values,
type = "pie", hole = 0.6) %>%
layout(title = "Donut Chart Example")
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
#us_confirmed
plot_ly(x = us_confirmed$Date, y = us_confirmed$Case_Number, fill = "tozeroy", name="Confirmed", type='scatter', mode= 'markers')
# us_deaths
plot_ly(x = us_confirmed$Date, y = us_confirmed$Case_Number, fill = "tozeroy", name="Confirmed", type='scatter', mode= 'markers') %>%
add_trace(x= us_deaths$Date, y = us_deaths$Case_Number, fill = 'tozeroy', name = 'Deaths', type ='scatter', mode = 'markers')
library(plotly)
month<- c(1,2,3,4,5)
taipei<- c(92.5,132.6,168.8,159.1,218.7)
tainan <- c(21.2, 30.6, 37.3, 84.6, 184.3)
total <- taipei + tainan
y <- list(title="Rainfall")
plot_ly(x = month, y = taipei, fill = "tozeroy", name="taipei",type='scatter', mode= 'markers') %>%add_trace(x = month, y = total, fill = "tonexty", name="tainan") %>%
layout(yaxis = y)
## Bubble Chart
data("diamonds")
head(diamonds)
## # A tibble: 6 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.290 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
d <- diamonds[sample(nrow(diamonds), 1000), ]
plot_ly(x = d$carat, y = d$price, type = 'scatter', mode = 'markers', color = d$clarity, size = d$carat)
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Multiple Charts
p <- subplot(
plot_ly(us_confirmed, x = us_confirmed$Date, y = us_confirmed$Case_Number, type='scatter', mode = 'lines'),
plot_ly(us_deaths, x = us_deaths$Date, y = us_deaths$Case_Number, type='scatter', mode = 'lines'),
margin=0.05
)
p %>% layout(showlegend=FALSE)
p <- subplot(
plot_ly(us_confirmed, x = us_confirmed$Date, y = us_confirmed$Case_Number, type='scatter', mode = 'lines'),
plot_ly(us_deaths, x = us_deaths$Date, y = us_deaths$Case_Number, type='scatter', mode = 'lines'),
margin=0.05, nrows = 2
)
p %>% layout(showlegend=FALSE)