setwd(“~/career_da-pffs/udacity-eda/eda-course-materials/lesson6”)

str(gDat)
'data.frame':   1680 obs. of  6 variables:
 $ country  : Factor w/ 140 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ year     : int  1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
 $ pop      : num  8425333 9240934 10267083 11537966 13079460 ...
 $ continent: Factor w/ 4 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
 $ lifeExp  : num  28.8 30.3 32 34 36.1 ...
 $ gdpPercap: num  779 821 853 836 740 ...
#summary(gDat)
cor.test(gDat$lifeExp, gDat$gdpPercap, method = 'pearson') 

    Pearson's product-moment correlation

data:  gDat$lifeExp and gDat$gdpPercap
t = 28.799, df = 1678, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.5422206 0.6062796
sample estimates:
      cor 
0.5751311 

Observation - a strong positive correlation.

summary(gDat$lifeExp) Min. 1st Qu. Median Mean 3rd Qu. Max. 23.60 48.08 60.34 59.26 70.75 82.60

  1. A scatter plot of GDPPercap vs LifeExp
ggplot(aes(x = lifeExp, y = gdpPercap, color = continent), data = gDat) + 
  geom_point() +
  ggtitle("GdpPercap vs. LifeExp") +
  labs(x="lifeExp", y="gdpPercap") +
  scale_x_continuous(breaks = seq(20,90,4)) + 
  xlim(20, quantile(gDat$lifeExp, 0.99)) 
Scale for 'x' is already present. Adding another scale for 'x', which will replace the
existing scale.

Observation - A few outliers - a country with almost 0 gdpPercap and some with extremely high GDP Most countries with a gdp below 15000. To reduce overploting with alpha and coordinate transormation

min(gDat$gdpPercap)

ggplot(aes(x = lifeExp, y = gdpPercap, color = continent), data = gDat) + 
  geom_point(alpha = 1/2) +
  ggtitle("GdpPercap vs. LifeExp") +
  labs(x="lifeExp", y="gdpPercap") +
  scale_x_continuous(breaks = seq(20,90,4)) + 
  coord_trans(x="sqrt", y='sqrt')

Omiting the top 1% of values by applying the quartile method

ggplot(aes(x = lifeExp, y = gdpPercap, color = continent), data = gDat) + 
  geom_point(alpha = 1/2) +
  ggtitle("GdpPercap vs. LifeExp") +
  labs(x="lifeExp", y="gdpPercap") +
  scale_x_continuous(breaks = seq(20,90,4)) + 
  coord_trans(x="sqrt", y='sqrt') + 
   xlim(20, quantile(gDat$lifeExp, 0.99)) +
  ylim(0, quantile(gDat$gdpPercap, 0.99)) +
  geom_smooth(method="lm", color="red")
Scale for 'x' is already present. Adding another scale for 'x', which will replace the
existing scale.

Geom-smooth omits most of the values therefore does not seem the proper choice for this data

Making the plot more readable by creating and using conditoning summaries.
Error below - I do not understand it??

gDat_by_country<- gDat %>% 
                group_by(country) %>%
                summarise(lifeExp_mean=mean(lifeExp),
                lifeExp_median=median(lifeExp),
                 n=n()) %>%
                arrange(country)
Error: This function should not be called directly
Call `rlang::last_error()` to see a backtrace
ggplot(aes(x = continent, y = lifeExp),
       data = subset(gDat, !is.na(continent))) + 
  geom_boxplot() +
  stat_summary(geom = 'point', shape = 4, fun.y = mean)

ggplot(aes(x = continent, y = gdpPercap, color = continent),
       data = subset(gDat, !is.na(continent))) + 
  geom_boxplot() +
  stat_summary(geom = 'point', shape = 4, fun.y = median) +
  scale_y_continuous(limits = c(0, 30000),breaks=seq(0, 30000, 1000))

NA

Write code to create a new data frame - why??

year_lifeExp_group <- group_by(gDat, year, lifeExp) %>% 
                filter(!is.na(year)) %>% 
                summarise(mean_gdpPercap = mean(gdpPercap),
                          median_gdpPercap = median(gdpPercap),
                          n = n())
Error: This function should not be called directly
Call `rlang::last_error()` to see a backtrace

``` why?

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCg0KLS0tDQoNCnNldHdkKCJ+L2NhcmVlcl9kYS1wZmZzL3VkYWNpdHktZWRhL2VkYS1jb3Vyc2UtbWF0ZXJpYWxzL2xlc3NvbjYiKQ0KDQoNCmBgYHtyfQ0KaW5zdGFsbC5wYWNrYWdlcyhnZ3Bsb3QyKQ0KaW5zdGFsbC5wYWNrYWdlcygicmVzaGFwZSIpDQppbnN0YWxsLnBhY2thZ2VzKCJ0aWR5ciIpDQppbnN0YWxsLnBhY2thZ2VzKCJkcGx5ciIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeSh0aWR5cikNCmxpYnJhcnkoZ2dwbG90MikNCg0KYGBgDQoNCg0KYGBge3J9DQpnZFVSTCA8LSAiaHR0cDovL3d3dy5zdGF0LnViYy5jYS9+amVubnkvbm90T2N0by9TVEFUNTQ1QS9leGFtcGxlcy9nYXBtaW5kZXIvZGF0YS9nYXBtaW5kZXJEYXRhRml2ZVllYXIudHh0Ig0KZ0RhdCA8LSByZWFkLmRlbGltKGZpbGUgPSBnZFVSTCkNCmdEYXQgPC0gZHJvcGxldmVscyhzdWJzZXQoZ0RhdCwgY29udGluZW50ICE9ICJPY2VhbmlhIikpDQpsaWJyYXJ5KGdncGxvdDIpDQpsaWJyYXJ5KHBseXIpDQojIGh0dHA6Ly9ycHVicy5jb20vbmVpbF9zcGVuY2VyL3N0YXQ1NDVhLTIwMTMtaHcwNV9zcGVuY2VyLW5laQ0KYGBgDQoNCg0KYGBge3J9DQpzdHIoZ0RhdCkNCmBgYA0KDQpgYGB7cn0NCiNzdW1tYXJ5KGdEYXQpDQoNCmNvci50ZXN0KGdEYXQkbGlmZUV4cCwgZ0RhdCRnZHBQZXJjYXAsIG1ldGhvZCA9ICdwZWFyc29uJykgDQpgYGANCk9ic2VydmF0aW9uIC0gYSBzdHJvbmcgcG9zaXRpdmUgY29ycmVsYXRpb24uDQoNCg0Kc3VtbWFyeShnRGF0JGxpZmVFeHApDQogIE1pbi4gMXN0IFF1LiAgTWVkaWFuICAgIE1lYW4gM3JkIFF1LiAgICBNYXguIA0KICAyMy42MCAgIDQ4LjA4ICAgNjAuMzQgICA1OS4yNiAgIDcwLjc1ICAgODIuNjAgDQoNCg0KDQoxLiBBIHNjYXR0ZXIgcGxvdCBvZiBHRFBQZXJjYXAgdnMgTGlmZUV4cA0KYGBge3J9DQpnZ3Bsb3QoYWVzKHggPSBsaWZlRXhwLCB5ID0gZ2RwUGVyY2FwLCBjb2xvciA9IGNvbnRpbmVudCksIGRhdGEgPSBnRGF0KSArIA0KICBnZW9tX3BvaW50KCkgKw0KICBnZ3RpdGxlKCJHZHBQZXJjYXAgdnMuIExpZmVFeHAiKSArDQogIGxhYnMoeD0ibGlmZUV4cCIsIHk9ImdkcFBlcmNhcCIpICsNCiAgc2NhbGVfeF9jb250aW51b3VzKGJyZWFrcyA9IHNlcSgyMCw5MCw0KSkgDQpgYGANCk9ic2VydmF0aW9uIC0gQSBmZXcgb3V0bGllcnMgLSBhIGNvdW50cnkgd2l0aCBhbG1vc3QgMCBnZHBQZXJjYXAgYW5kIHNvbWUgd2l0aCBleHRyZW1lbHkgaGlnaCBHRFANCk1vc3QgY291bnRyaWVzIHdpdGggYSBnZHAgYmVsb3cgMTUwMDAuICBUbyByZWR1Y2Ugb3ZlcnBsb3Rpbmcgd2l0aCBhbHBoYSBhbmQgY29vcmRpbmF0ZSB0cmFuc29ybWF0aW9uIA0KDQptaW4oZ0RhdCRnZHBQZXJjYXApDQoNCmBgYHtyfQ0KZ2dwbG90KGFlcyh4ID0gbGlmZUV4cCwgeSA9IGdkcFBlcmNhcCwgY29sb3IgPSBjb250aW5lbnQpLCBkYXRhID0gZ0RhdCkgKyANCiAgZ2VvbV9wb2ludChhbHBoYSA9IDEvMikgKw0KICBnZ3RpdGxlKCJHZHBQZXJjYXAgdnMuIExpZmVFeHAiKSArDQogIGxhYnMoeD0ibGlmZUV4cCIsIHk9ImdkcFBlcmNhcCIpICsNCiAgc2NhbGVfeF9jb250aW51b3VzKGJyZWFrcyA9IHNlcSgyMCw5MCw0KSkgKyANCiAgY29vcmRfdHJhbnMoeD0ic3FydCIsIHk9J3NxcnQnKQ0KYGBgDQojIE9taXRpbmcgdGhlIHRvcCAxJSBvZiB2YWx1ZXMgYnkgYXBwbHlpbmcgdGhlIHF1YXJ0aWxlIG1ldGhvZA0KDQoNCmBgYHtyIC0gT21pdGluZyB0aGUgdG9wIDElfQ0KZ2dwbG90KGFlcyh4ID0gbGlmZUV4cCwgeSA9IGdkcFBlcmNhcCwgY29sb3IgPSBjb250aW5lbnQpLA0KICAgICAgIGRhdGEgPSBnRGF0KSArIA0KICBnZW9tX3BvaW50KGFscGhhID0gMS8yKSArDQogIGdndGl0bGUoIkdkcFBlcmNhcCB2cy4gTGlmZUV4cCIpICsNCiAgbGFicyh4PSJsaWZlRXhwIiwgeT0iZ2RwUGVyY2FwIikgKw0KICBzY2FsZV94X2NvbnRpbnVvdXMoYnJlYWtzID0gc2VxKDIwLDkwLDQpKSArIA0KICBjb29yZF90cmFucyh4PSJzcXJ0IiwgeT0nc3FydCcpICsgDQogICB4bGltKDIwLCBxdWFudGlsZShnRGF0JGxpZmVFeHAsIDAuOTkpKSArDQogIHlsaW0oMCwgcXVhbnRpbGUoZ0RhdCRnZHBQZXJjYXAsIDAuOTkpKSArDQogIGdlb21fc21vb3RoKG1ldGhvZD0ibG0iLCBjb2xvcj0icmVkIikNCmBgYA0KR2VvbS1zbW9vdGggb21pdHMgbW9zdCBvZiB0aGUgdmFsdWVzIHRoZXJlZm9yZSBkb2VzIG5vdCBzZWVtIHRoZSBwcm9wZXIgY2hvaWNlIGZvciB0aGlzIGRhdGENCg0KDQpNYWtpbmcgdGhlIHBsb3QgbW9yZSByZWFkYWJsZSBieSBjcmVhdGluZyBhbmQgdXNpbmcgY29uZGl0b25pbmcgc3VtbWFyaWVzLiAgDQpFcnJvciBiZWxvdyAtIEkgZG8gbm90IHVuZGVyc3RhbmQgaXQ/Pw0KDQpgYGB7cn0NCmdEYXRfYnlfY291bnRyeTwtIGdEYXQgJT4lIA0KICAgICAgICAgICAgICAgIGdyb3VwX2J5KGNvdW50cnkpICU+JQ0KICAgICAgICAgICAgICAgIHN1bW1hcmlzZShsaWZlRXhwX21lYW49bWVhbihsaWZlRXhwKSwNCiAgICAgICAgICAgICAgICBsaWZlRXhwX21lZGlhbj1tZWRpYW4obGlmZUV4cCksDQogICAgICAgICAgICAgICAgIG49bigpKSAlPiUNCiAgICAgICAgICAgICAgICBhcnJhbmdlKGNvdW50cnkpDQogICAgICAgICAgaGVhZChnRGF0X2J5X2NvdW50cnkpDQpgYGANCg0KDQoNCmBgYHtyIFBsb3R0aW5nIENvbmRpdGlvbmFsIFN1bW1hcmllc30NCg0KZ2dwbG90KGFlcyh4ID0gY29udGluZW50LCB5ID0gbGlmZUV4cCksDQogICAgICAgZGF0YSA9IHN1YnNldChnRGF0LCAhaXMubmEoY29udGluZW50KSkpICsgDQogIGdlb21fYm94cGxvdCgpICsNCiAgc3RhdF9zdW1tYXJ5KGdlb20gPSAncG9pbnQnLCBzaGFwZSA9IDQsIGZ1bi55ID0gbWVhbikNCg0KYGBgDQoNCmBgYHtyfQ0KZ2dwbG90KGFlcyh4ID0gY29udGluZW50LCB5ID0gZ2RwUGVyY2FwLCBjb2xvciA9IGNvbnRpbmVudCksDQogICAgICAgZGF0YSA9IHN1YnNldChnRGF0LCAhaXMubmEoY29udGluZW50KSkpICsgDQogIGdlb21fYm94cGxvdCgpICsNCiAgc3RhdF9zdW1tYXJ5KGdlb20gPSAncG9pbnQnLCBzaGFwZSA9IDQsIGZ1bi55ID0gbWVkaWFuKSArDQogIHNjYWxlX3lfY29udGludW91cyhsaW1pdHMgPSBjKDAsIDMwMDAwKSxicmVha3M9c2VxKDAsIDMwMDAwLCAxMDAwKSkNCiAgDQpgYGANCg0KDQoNCldyaXRlIGNvZGUgdG8gY3JlYXRlIGEgbmV3IGRhdGEgZnJhbWUgLSB3aHk/Pw0KDQpgYGB7cn0NCg0KeWVhcl9saWZlRXhwX2dyb3VwIDwtIGdyb3VwX2J5KGdEYXQsIHllYXIsIGxpZmVFeHApICU+JSANCiAgICAgICAgICAgICAgICBmaWx0ZXIoIWlzLm5hKHllYXIpKSAlPiUgDQogICAgICAgICAgICAgICAgc3VtbWFyaXNlKG1lYW5fZ2RwUGVyY2FwID0gbWVhbihnZHBQZXJjYXApLA0KICAgICAgICAgICAgICAgICAgICAgICAgICBtZWRpYW5fZ2RwUGVyY2FwID0gbWVkaWFuKGdkcFBlcmNhcCksDQogICAgICAgICAgICAgICAgICAgICAgICAgIG4gPSBuKCkpDQp1bmdyb3VwKCkgJT4lDQphcnJhbmdlKGdEYXRfeWVhcl9saWZlRXhwX2dyb3VwLCB5ZWFyKQ0KYGBgDQpgYGANCndoeT8NCg0KDQoNCg0KDQoNCg0KDQoNCg0K