Instructor: Dr. Bob Batzinger
Academic year: 2021/2022
Semester: 1
Begins June 2021
dat = data.frame(rbind(
c(14, 2963,6.6),
c(14,10110,6.5),
c(11.1,1402112,7.0),
c(13,3214,2.2),
c(17.4,1380004,3.7),
c(6,51780,2.7)))
rownames(dat) = c("AR","AZ","CN","GE","IN","KR")
colnames(dat) = c('birthrate','population', 'diff.100')
unmatched = round((dat[,1] * dat[,2] * dat[,3]) / 10000, 3)
dat = cbind(dat,unmatched)
birthrate | population | diff.100 | unmatched | |
---|---|---|---|---|
AR | 14.0 | 2963 | 6.6 | 27.378 |
AZ | 14.0 | 10110 | 6.5 | 92.001 |
CN | 11.1 | 1402112 | 7.0 | 10894.410 |
GE | 13.0 | 3214 | 2.2 | 9.192 |
IN | 17.4 | 1380004 | 3.7 | 8884.466 |
KR | 6.0 | 51780 | 2.7 | 83.884 |
## dim(dat) ========
## [1] 6 4
## colnames(dat) ====
## [1] "birthrate" "population" "diff.100" "unmatched"
## rownames(dat) ====
## [1] "AR" "AZ" "CN" "GE" "IN" "KR"
## head(dat,2) ======
## birthrate population diff.100 unmatched
## AR 14 2963 6.6 27.378
## AZ 14 10110 6.5 92.001
## tail(dat,2) ======
## birthrate population diff.100 unmatched
## IN 17.4 1380004 3.7 8884.466
## KR 6.0 51780 2.7 83.884
## str(data) ======
## 'data.frame': 6 obs. of 4 variables:
## $ birthrate : num 14 14 11.1 13 17.4 6
## $ population: num 2963 10110 1402112 3214 1380004 ...
## $ diff.100 : num 6.6 6.5 7 2.2 3.7 2.7
## $ unmatched : num 27.38 92 10894.41 9.19 8884.47 ...
##
## summary(dat) =====
## birthrate population diff.100 unmatched
## Min. : 6.00 Min. : 2963 Min. :2.200 Min. : 9.192
## 1st Qu.:11.57 1st Qu.: 4938 1st Qu.:2.950 1st Qu.: 41.505
## Median :13.50 Median : 30945 Median :5.100 Median : 87.942
## Mean :12.58 Mean : 475030 Mean :4.783 Mean : 3331.889
## 3rd Qu.:14.00 3rd Qu.:1047948 3rd Qu.:6.575 3rd Qu.: 6686.350
## Max. :17.40 Max. :1402112 Max. :7.000 Max. :10894.410
## [1] 14.0 14.0 11.1 13.0 17.4 6.0
## [1] 2963 10110 1402112 3214 1380004 51780
## [1] 6.6 6.5 7.0 2.2 3.7 2.7
## [1] 27.378 92.001 10894.410 9.192 8884.466 83.884
Finding the mean and std dev of an array
doStat <- function(n=10000) {
total = 0; sq = 0; cnt = 0; x = 1:n
for (i in x) {total = total + i
sq = sq + i * i; cnt = cnt + 1
}
mn = round(total / n,5); msq = mn * mn
doStat = paste("mean=",mn,", std.dev=",
round(sqrt((sq - msq*cnt)/(cnt-1)),5),"\n")
}
doStat2 <- function(n=10000) { x = 1:n
doStat2 = paste("mean=",mean(x),", std.dev=",sd(x),"\n")
}
doStat(): user-defined
## time= 0.03311 +/- 0.0019
## mean= 5000.5 , std.dev= 2886.89568
##
Same analysis using mean() and sd()
## time= 0.00278 +/- 0.00109
## mean= 5000.5 , std.dev= 2886.89567990717
##
Big Mac Menu Signage in Hong Kong1
## 'data.frame': 341 obs. of 6 variables:
## $ name : Factor w/ 42 levels "Argentina","Australia",..: 39 2 4 5 6 7 13 16 19 22 ...
## $ iso_a3 : Factor w/ 41 levels "ARG","AUS","AUT",..: 38 2 4 5 16 6 15 18 21 24 ...
## $ currency_code: Factor w/ 46 levels "ARS","ATS","AUD",..: 42 3 4 5 19 9 18 21 24 27 ...
## $ local_price : num 1.6 1.75 90 2.5 1.1 1.89 16.4 7.6 1.18 370 ...
## $ dollar_ex : num 1 1.64 42 13.8 0.67 1.39 6.65 7.8 0.74 154 ...
## $ date : Factor w/ 14 levels "1986-09-01","1987-01-01",..: 1 1 1 1 1 1 1 1 1 1 ...
## 'data.frame': 1730 obs. of 7 variables:
## $ name : Factor w/ 74 levels "Argentina","Australia",..: 1 2 7 8 9 10 11 15 16 19 ...
## $ iso_a3 : Factor w/ 73 levels "ARE","ARG","AUS",..: 2 3 8 24 9 11 12 15 17 21 ...
## $ currency_code: Factor w/ 57 levels "AED","ARS","AUD",..: 2 3 6 17 7 9 10 13 14 16 ...
## $ local_price : num 2.5 2.59 2.95 1.9 2.85 ...
## $ dollar_ex : num 1 1.68 1.79 0.633 1.47 ...
## $ GDP_dollar : num NA NA NA NA NA NA NA NA NA NA ...
## $ date : Factor w/ 35 levels "2000-04-01","2001-04-01",..: 1 1 1 1 1 1 1 1 1 1 ...
Compare the lists of attributes
## macdat1 :
## name iso_a3 currency_code local_price dollar_ex date
##
## macdat2 :
## name iso_a3 currency_code local_price dollar_ex GDP_dollar date
## name iso_a3 currency_code local_price
## Britain : 48 GBR : 48 EUR : 301 Min. : 0
## United States: 48 USA : 48 GBP : 48 1st Qu.: 4
## Australia : 47 AUS : 47 USD : 48 Median : 15
## Canada : 47 CAN : 47 AUD : 47 Mean : 6963
## Denmark : 47 DNK : 47 CAD : 47 3rd Qu.: 89
## Hong Kong : 47 HKG : 47 DKK : 47 Max. :4000000
## (Other) :1787 (Other):1787 (Other):1533
## dollar_ex date
## Min. : 0.0 2018-07-01: 72
## 1st Qu.: 1.3 2019-01-01: 72
## Median : 5.8 2019-07-09: 72
## Mean : 2662.7 2020-01-14: 72
## 3rd Qu.: 33.7 2020-07-01: 72
## Max. :1600500.0 2021-01-01: 71
## (Other) :1640
plot(as.Date(macdat$date),macdat$local_price,main="Cost of a Big Mac",
ylab="Cost (in local currency)",xlab="Year",log="y",
col=rainbow(50,)[macdat$iso_a3],pch=19,cex=0.5)
## Warning in xy.coords(x, y, xlabel, ylabel, log): 1 y value <= 0 omitted from
## logarithmic plot
## Warning in xy.coords(x, y, xlabel, ylabel, log): 1 y value <= 0 omitted from
## logarithmic plot
Finding the bad data point
## name iso_a3 currency_code local_price dollar_ex date
## 1638 Venezuela VEN VEF 0 0 2018-01-01
Removing the bad data point
mcost = macdat$local_price / macdat$dollar_ex
plot(as.Date(macdat$date), mcost,
ylab= "cost in US Dollars", xlab="Year",
pch=19,cex=0.5,col=rainbow(50)[macdat$iso_a3])
m = mean(mcost)
sd = sd(mcost)
macdat[mcost < m - 2*sd, c("iso_a3","date","local_price","dollar_ex")]
## iso_a3 date local_price dollar_ex
## 4 BRA 1986-09-01 2.5 13.8000
## 114 RUS 1992-04-01 58.0 98.9500
## 489 SAU 2004-05-01 2.4 3.7502
## 1358 VEN 2015-07-01 132.0 197.0000
## 1414 VEN 2016-01-01 132.0 198.6986
m = mean(mcost)
sd = sd(mcost)
macdat[mcost > m + 3.5*sd, c("iso_a3","date","local_price","dollar_ex")]
## iso_a3 date local_price dollar_ex
## 719 NOR 2008-06-01 40.0 5.07915
## 894 NOR 2011-07-01 45.0 5.41405
## 908 CHE 2011-07-01 6.5 0.80615
## 1024 VEN 2012-07-01 34.0 4.29465
## 1057 NOR 2013-01-01 43.0 5.48310
## 1079 VEN 2013-01-01 39.0 4.29465
## 1168 NOR 2014-01-01 48.0 6.15745
## 1224 NOR 2014-07-01 48.0 6.18730
## function (x, y = NULL, legend, fill = NULL, col = par("col"),
## border = "black", lty, lwd, pch, angle = 45, density = NULL,
## bty = "o", bg = par("bg"), box.lwd = par("lwd"), box.lty = par("lty"),
## box.col = par("fg"), pt.bg = NA, cex = 1, pt.cex = cex, pt.lwd = lwd,
## xjust = 0, yjust = 1, x.intersp = 1, y.intersp = 1, adj = c(0,
## 0.5), text.width = NULL, text.col = par("col"), text.font = NULL,
## merge = do.lines && has.pch, trace = FALSE, plot = TRUE,
## ncol = 1, horiz = FALSE, title = NULL, inset = 0, xpd, title.col = text.col,
## title.adj = 0.5, seg.len = 2)
## {
## if (missing(legend) && !missing(y) && (is.character(y) ||
## is.expression(y))) {
## legend <- y
## y <- NULL
## }
## mfill <- !missing(fill) || !missing(density)
## if (!missing(xpd)) {
## op <- par("xpd")
## on.exit(par(xpd = op))
## par(xpd = xpd)
## }
## title <- as.graphicsAnnot(title)
## if (length(title) > 1)
## stop("invalid 'title'")
## legend <- as.graphicsAnnot(legend)
## n.leg <- if (is.call(legend))
## 1
## else length(legend)
## if (n.leg == 0)
## stop("'legend' is of length 0")
## auto <- if (is.character(x))
## match.arg(x, c("bottomright", "bottom", "bottomleft",
## "left", "topleft", "top", "topright", "right", "center"))
## else NA
## if (is.na(auto)) {
## xy <- xy.coords(x, y, setLab = FALSE)
## x <- xy$x
## y <- xy$y
## nx <- length(x)
## if (nx < 1 || nx > 2)
## stop("invalid coordinate lengths")
## }
## else nx <- 0
## xlog <- par("xlog")
## ylog <- par("ylog")
## rect2 <- function(left, top, dx, dy, density = NULL, angle,
## ...) {
## r <- left + dx
## if (xlog) {
## left <- 10^left
## r <- 10^r
## }
## b <- top - dy
## if (ylog) {
## top <- 10^top
## b <- 10^b
## }
## rect(left, top, r, b, angle = angle, density = density,
## ...)
## }
## segments2 <- function(x1, y1, dx, dy, ...) {
## x2 <- x1 + dx
## if (xlog) {
## x1 <- 10^x1
## x2 <- 10^x2
## }
## y2 <- y1 + dy
## if (ylog) {
## y1 <- 10^y1
## y2 <- 10^y2
## }
## segments(x1, y1, x2, y2, ...)
## }
## points2 <- function(x, y, ...) {
## if (xlog)
## x <- 10^x
## if (ylog)
## y <- 10^y
## points(x, y, ...)
## }
## text2 <- function(x, y, ...) {
## if (xlog)
## x <- 10^x
## if (ylog)
## y <- 10^y
## text(x, y, ...)
## }
## if (trace)
## catn <- function(...) do.call("cat", c(lapply(list(...),
## formatC), list("\n")))
## cin <- par("cin")
## Cex <- cex * par("cex")
## if (is.null(text.width))
## text.width <- max(abs(strwidth(legend, units = "user",
## cex = cex, font = text.font)))
## else if (!is.numeric(text.width) || text.width < 0)
## stop("'text.width' must be numeric, >= 0")
## xc <- Cex * xinch(cin[1L], warn.log = FALSE)
## yc <- Cex * yinch(cin[2L], warn.log = FALSE)
## if (xc < 0)
## text.width <- -text.width
## xchar <- xc
## xextra <- 0
## yextra <- yc * (y.intersp - 1)
## ymax <- yc * max(1, strheight(legend, units = "user", cex = cex)/yc)
## ychar <- yextra + ymax
## if (trace)
## catn(" xchar=", xchar, "; (yextra,ychar)=", c(yextra,
## ychar))
## if (mfill) {
## xbox <- xc * 0.8
## ybox <- yc * 0.5
## dx.fill <- xbox
## }
## do.lines <- (!missing(lty) && (is.character(lty) || any(lty >
## 0))) || !missing(lwd)
## n.legpercol <- if (horiz) {
## if (ncol != 1)
## warning(gettextf("horizontal specification overrides: Number of columns := %d",
## n.leg), domain = NA)
## ncol <- n.leg
## 1
## }
## else ceiling(n.leg/ncol)
## has.pch <- !missing(pch) && length(pch) > 0
## if (do.lines) {
## x.off <- if (merge)
## -0.7
## else 0
## }
## else if (merge)
## warning("'merge = TRUE' has no effect when no line segments are drawn")
## if (has.pch) {
## if (is.character(pch) && !is.na(pch[1L]) && nchar(pch[1L],
## type = "c") > 1) {
## if (length(pch) > 1)
## warning("not using pch[2..] since pch[1L] has multiple chars")
## np <- nchar(pch[1L], type = "c")
## pch <- substr(rep.int(pch[1L], np), 1L:np, 1L:np)
## }
## if (!is.character(pch))
## pch <- as.integer(pch)
## }
## if (is.na(auto)) {
## if (xlog)
## x <- log10(x)
## if (ylog)
## y <- log10(y)
## }
## if (nx == 2) {
## x <- sort(x)
## y <- sort(y)
## left <- x[1L]
## top <- y[2L]
## w <- diff(x)
## h <- diff(y)
## w0 <- w/ncol
## x <- mean(x)
## y <- mean(y)
## if (missing(xjust))
## xjust <- 0.5
## if (missing(yjust))
## yjust <- 0.5
## }
## else {
## h <- (n.legpercol + (!is.null(title))) * ychar + yc
## w0 <- text.width + (x.intersp + 1) * xchar
## if (mfill)
## w0 <- w0 + dx.fill
## if (do.lines)
## w0 <- w0 + (seg.len + x.off) * xchar
## w <- ncol * w0 + 0.5 * xchar
## if (!is.null(title) && (abs(tw <- strwidth(title, units = "user",
## cex = cex) + 0.5 * xchar)) > abs(w)) {
## xextra <- (tw - w)/2
## w <- tw
## }
## if (is.na(auto)) {
## left <- x - xjust * w
## top <- y + (1 - yjust) * h
## }
## else {
## usr <- par("usr")
## inset <- rep_len(inset, 2)
## insetx <- inset[1L] * (usr[2L] - usr[1L])
## left <- switch(auto, bottomright = , topright = ,
## right = usr[2L] - w - insetx, bottomleft = ,
## left = , topleft = usr[1L] + insetx, bottom = ,
## top = , center = (usr[1L] + usr[2L] - w)/2)
## insety <- inset[2L] * (usr[4L] - usr[3L])
## top <- switch(auto, bottomright = , bottom = , bottomleft = usr[3L] +
## h + insety, topleft = , top = , topright = usr[4L] -
## insety, left = , right = , center = (usr[3L] +
## usr[4L] + h)/2)
## }
## }
## if (plot && bty != "n") {
## if (trace)
## catn(" rect2(", left, ",", top, ", w=", w, ", h=",
## h, ", ...)", sep = "")
## rect2(left, top, dx = w, dy = h, col = bg, density = NULL,
## lwd = box.lwd, lty = box.lty, border = box.col)
## }
## xt <- left + xchar + xextra + (w0 * rep.int(0:(ncol - 1),
## rep.int(n.legpercol, ncol)))[1L:n.leg]
## yt <- top - 0.5 * yextra - ymax - (rep.int(1L:n.legpercol,
## ncol)[1L:n.leg] - 1 + (!is.null(title))) * ychar
## if (mfill) {
## if (plot) {
## if (!is.null(fill))
## fill <- rep_len(fill, n.leg)
## rect2(left = xt, top = yt + ybox/2, dx = xbox, dy = ybox,
## col = fill, density = density, angle = angle,
## border = border)
## }
## xt <- xt + dx.fill
## }
## if (plot && (has.pch || do.lines))
## col <- rep_len(col, n.leg)
## if (missing(lwd) || is.null(lwd))
## lwd <- par("lwd")
## if (do.lines) {
## if (missing(lty) || is.null(lty))
## lty <- 1
## lty <- rep_len(lty, n.leg)
## lwd <- rep_len(lwd, n.leg)
## ok.l <- !is.na(lty) & (is.character(lty) | lty > 0) &
## !is.na(lwd)
## if (trace)
## catn(" segments2(", xt[ok.l] + x.off * xchar, ",",
## yt[ok.l], ", dx=", seg.len * xchar, ", dy=0, ...)")
## if (plot)
## segments2(xt[ok.l] + x.off * xchar, yt[ok.l], dx = seg.len *
## xchar, dy = 0, lty = lty[ok.l], lwd = lwd[ok.l],
## col = col[ok.l])
## xt <- xt + (seg.len + x.off) * xchar
## }
## if (has.pch) {
## pch <- rep_len(pch, n.leg)
## pt.bg <- rep_len(pt.bg, n.leg)
## pt.cex <- rep_len(pt.cex, n.leg)
## pt.lwd <- rep_len(pt.lwd, n.leg)
## ok <- !is.na(pch)
## if (!is.character(pch)) {
## ok <- ok & (pch >= 0 | pch <= -32)
## }
## else {
## ok <- ok & nzchar(pch)
## }
## x1 <- (if (merge && do.lines)
## xt - (seg.len/2) * xchar
## else xt)[ok]
## y1 <- yt[ok]
## if (trace)
## catn(" points2(", x1, ",", y1, ", pch=", pch[ok],
## ", ...)")
## if (plot)
## points2(x1, y1, pch = pch[ok], col = col[ok], cex = pt.cex[ok],
## bg = pt.bg[ok], lwd = pt.lwd[ok])
## }
## xt <- xt + x.intersp * xchar
## if (plot) {
## if (!is.null(title))
## text2(left + w * title.adj, top - ymax, labels = title,
## adj = c(title.adj, 0), cex = cex, col = title.col)
## text2(xt, yt, labels = legend, adj = adj, cex = cex,
## col = text.col, font = text.font)
## }
## invisible(list(rect = list(w = w, h = h, left = left, top = top),
## text = list(x = xt, y = yt)))
## }
## <bytecode: 0x555b45df3868>
## <environment: namespace:graphics>
##
## Call:
## lm(formula = gdp ~ bigmac)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23080 -9408 -1116 8913 41524
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -28609 6003 -4.766 1.01e-05 ***
## bigmac 14005 1470 9.527 3.30e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14260 on 69 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5681, Adjusted R-squared: 0.5619
## F-statistic: 90.77 on 1 and 69 DF, p-value: 3.302e-14
Artist rendering3
---
title: "Title????"
author: "??Author??"
date: "?? July 2021"
output: html_notebook
---
# Introduction
## Background
## Research Question
# Methodology
## Data source
## Dictionary of the Dataset
# Results
# Summary and Conclusion
# References
LastName | FirstName | Age | Position | Type |
---|---|---|---|---|
Allen* | Miss Elizabeth Walton | 29 | Pass | C1 |
Allen* | Mr. Ernest Frederick | 24 | Staff | Engineering |
Allison | Mr. Hudson Joshua Creighton | 30 | Pass | C2 |
Allsop | Mr. Alfred Samuel | 34 | Staff | Housekeeping |
Avery* | Mr. James Frank | 22 | Staff | Restuarant |
Brown* | Miss Amelia Mary | 18 | Pass | C1 |
Swane | Mr. George Swane | 19 | Pass | C2 |
The Economist, 2021. Burgernomics: The Big Mac index. An interactive currency comparison tool available online at https://www.economist.com/big-mac-index JAN 12TH 2021↩︎
The Economist, 2021. Burgernomics: The Big Mac index. An interactive currency comparison tool available online at https://www.economist.com/big-mac-index JAN 12TH 2021↩︎
Picture drawn by Willy Stöwer in 1912 from eye-witness description. Published in Magazine Die Gartenlaube, en:Die Gartenlaube and de:Die Gartenlaube. Artwork in Public Domain, available online at https://commons.wikimedia.org/w/index.php?curid=97646↩︎