Task 1

First, we load the data

Task 2.1

First let’s load the data

download.file("https://raw.githubusercontent.com/kflisikowski/ds/master/netflix-dataset.csv?raw=true", destfile ="dane.csv",mode="wb")
mydata<-read.csv(file="dane.csv",encoding ="UTF-8",header=TRUE,sep = ",")
attach(mydata)

We will consider a movie or series to be polish if it is available in Poland and has Polish subtitles. Now some data wrangling.

movies <- mydata %>% 
  filter(Languages == "Polish") %>%
  filter (Country.Availability == "Poland") %>%
  select( Country.Availability,Languages, Title, IMDb.Score )
ggplot(mydata, aes(IMDb.Score))+
  geom_histogram(binwidth=0.1, fill = "cornsilk", color="navyblue")+
  facet_wrap(vars(Series.or.Movie))

Task 2.2

Now let’s take a look at density.

ggplot(mydata, aes(IMDb.Score), y=after_stat(density), bw=0.5)+
  geom_density( fill = "cornsilk", color="navyblue")+
  facet_wrap(vars(Series.or.Movie))

Task 3.3

new <- mydata %>%
  select(Languages) %>%
  separate_rows(Languages, sep=", ") %>%
  count(Languages, sort=TRUE) %>%
  top_n(10, n)


ggplot(new, aes(x=n, y=fct_reorder(Languages, n)))+
  geom_col()+
  ylab("Language")+
  xlab("How many titles available")

LS0tCnRpdGxlOiAiRGF0YSBWaXN1YWxpemF0aW9uIgphdXRob3I6ICJHcm91cDogTWFjaWVqIEthcndpaywgTmF0YWxpYSBTYXJiaWV3c2thLCBNYXJjaW4gU3VjaHRvLCBaaGFuZyBIYW9rYW5nIgpvdXRwdXQ6CiAgaHRtbF9kb2N1bWVudDogCiAgICB0aGVtZTogY2VydWxlYW4KICAgIGhpZ2hsaWdodDogdGV4dG1hdGUKICAgIGZvbnRzaXplOiA4cHQKICAgIHRvYzogeWVzCiAgICBjb2RlX2Rvd25sb2FkOiB5ZXMKICAgIHRvY19mbG9hdDoKICAgICAgY29sbGFwc2VkOiBubwogICAgZGZfcHJpbnQ6IGRlZmF1bHQKICAgIHRvY19kZXB0aDogNQplZGl0b3Jfb3B0aW9uczogCiAgbWFya2Rvd246IAogICAgd3JhcDogNzIKICBjaHVua19vdXRwdXRfdHlwZTogaW5saW5lCi0tLQoKYGBge3IsIGluY2x1ZGU9RkFMU0V9CiNsb2FkIGxpYnJhcnkgYW5kIHNldCBzZWVkCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoa2FibGVFeHRyYSkKbGlicmFyeShzdHJpbmdyKQpzZXQuc2VlZCgyMSkKCmBgYAoKIyMgVGFzayAxCgpGaXJzdCwgd2UgbG9hZCB0aGUgZGF0YQoKYGBge3IgTG9hZH0KYGBgCgojIyBUYXNrIDIuMQoKRmlyc3QgbGV0J3MgbG9hZCB0aGUgZGF0YQoKYGBge3IgZXgyLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFLCBpbmNsdWRlPVRSVUV9CmRvd25sb2FkLmZpbGUoImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9rZmxpc2lrb3dza2kvZHMvbWFzdGVyL25ldGZsaXgtZGF0YXNldC5jc3Y/cmF3PXRydWUiLCBkZXN0ZmlsZSA9ImRhbmUuY3N2Iixtb2RlPSJ3YiIpCm15ZGF0YTwtcmVhZC5jc3YoZmlsZT0iZGFuZS5jc3YiLGVuY29kaW5nID0iVVRGLTgiLGhlYWRlcj1UUlVFLHNlcCA9ICIsIikKYXR0YWNoKG15ZGF0YSkKYGBgCgpXZSB3aWxsIGNvbnNpZGVyIGEgbW92aWUgb3Igc2VyaWVzIHRvIGJlIHBvbGlzaCBpZiBpdCBpcyBhdmFpbGFibGUgaW4KUG9sYW5kIGFuZCBoYXMgUG9saXNoIHN1YnRpdGxlcy4gTm93IHNvbWUgZGF0YSB3cmFuZ2xpbmcuCgpgYGB7ciwgIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9Cm1vdmllcyA8LSBteWRhdGEgJT4lIAogIGZpbHRlcihMYW5ndWFnZXMgPT0gIlBvbGlzaCIpICU+JQogIGZpbHRlciAoQ291bnRyeS5BdmFpbGFiaWxpdHkgPT0gIlBvbGFuZCIpICU+JQogIHNlbGVjdCggQ291bnRyeS5BdmFpbGFiaWxpdHksTGFuZ3VhZ2VzLCBUaXRsZSwgSU1EYi5TY29yZSApCmBgYAoKYGBge3IsIHdhcm5pbmc9RkFMU0V9CmdncGxvdChteWRhdGEsIGFlcyhJTURiLlNjb3JlKSkrCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGg9MC4xLCBmaWxsID0gImNvcm5zaWxrIiwgY29sb3I9Im5hdnlibHVlIikrCiAgZmFjZXRfd3JhcCh2YXJzKFNlcmllcy5vci5Nb3ZpZSkpCmBgYAoKIyMgVGFzayAyLjIKCk5vdyBsZXQncyB0YWtlIGEgbG9vayBhdCBkZW5zaXR5LgoKYGBge3IsIHdhcm5pbmc9RkFMU0V9CmdncGxvdChteWRhdGEsIGFlcyhJTURiLlNjb3JlKSwgeT1hZnRlcl9zdGF0KGRlbnNpdHkpLCBidz0wLjUpKwogIGdlb21fZGVuc2l0eSggZmlsbCA9ICJjb3Juc2lsayIsIGNvbG9yPSJuYXZ5Ymx1ZSIpKwogIGZhY2V0X3dyYXAodmFycyhTZXJpZXMub3IuTW92aWUpKQoKYGBgCgojIyBUYXNrIDMuMwoKYGBgICAgICAgICAgCmBgYAoKYGBge3J9Cm5ldyA8LSBteWRhdGEgJT4lCiAgc2VsZWN0KExhbmd1YWdlcykgJT4lCiAgc2VwYXJhdGVfcm93cyhMYW5ndWFnZXMsIHNlcD0iLCAiKSAlPiUKICBjb3VudChMYW5ndWFnZXMsIHNvcnQ9VFJVRSkgJT4lCiAgdG9wX24oMTAsIG4pCgoKZ2dwbG90KG5ldywgYWVzKHg9biwgeT1mY3RfcmVvcmRlcihMYW5ndWFnZXMsIG4pKSkrCiAgZ2VvbV9jb2woKSsKICB5bGFiKCJMYW5ndWFnZSIpKwogIHhsYWIoIkhvdyBtYW55IHRpdGxlcyBhdmFpbGFibGUiKQoKCgpgYGAK