#Number of PhD graduates in Serbia from 1990 until now
#Source: public and open data of the Republic of Serbia http://opendata.stat.gov.rs/

library(readxl)

df <- read_xlsx("C:/Users/vjovanovic/Desktop/R Udemy/Vezbe sa javnim podacima/broj_doktora_nauka.xlsx")

str(df)
## Classes 'tbl_df', 'tbl' and 'data.frame':    780 obs. of  8 variables:
##  $ idindikator: chr  "110102IND01" "110102IND01" "110102IND01" "110102IND01" ...
##  $ IDTer      : chr  "70017" "70017" "70017" "70025" ...
##  $ nTer       : chr  "Aleksandrovac" "Aleksandrovac" "Aleksandrovac" "Aleksinac" ...
##  $ mes        : chr  "00" "00" "00" "00" ...
##  $ god        : chr  "2016" "2016" "2016" "2016" ...
##  $ IDPol      : chr  "0" "1" "2" "0" ...
##  $ nPol       : chr  "Ukupno" "Muško" "Žensko" "Ukupno" ...
##  $ vrednost   : chr  "605" "307" "298" "829" ...
#Now we must change character to factor or numeric variable

df$nTer <- as.factor(df$nTer)
df$vrednost <- as.numeric(df$vrednost)
df$god <- as.factor(df$god)
df$nPol <- as.factor(df$nPol)

head(df)
## # A tibble: 6 x 8
##   idindikator IDTer nTer          mes   god   IDPol nPol   vrednost
##   <chr>       <chr> <fct>         <chr> <fct> <chr> <fct>     <dbl>
## 1 110102IND01 70017 Aleksandrovac 00    2016  0     Ukupno      605
## 2 110102IND01 70017 Aleksandrovac 00    2016  1     Muško       307
## 3 110102IND01 70017 Aleksandrovac 00    2016  2     Žensko      298
## 4 110102IND01 70025 Aleksinac     00    2016  0     Ukupno      829
## 5 110102IND01 70025 Aleksinac     00    2016  1     Muško       421
## 6 110102IND01 70025 Aleksinac     00    2016  2     Žensko      408
#Now we will select only variables for the Republic of Serbia, not for particular cities.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df1 <- filter(df, nTer=="REPUBLIKA SRBIJA")
head(df1)
## # A tibble: 6 x 8
##   idindikator IDTer nTer             mes   god   IDPol nPol   vrednost
##   <chr>       <chr> <fct>            <chr> <fct> <chr> <fct>     <dbl>
## 1 110102IND01 RS    REPUBLIKA SRBIJA 00    1990  0     Ukupno   181064
## 2 110102IND01 RS    REPUBLIKA SRBIJA 00    1990  2     Žensko    87664
## 3 110102IND01 RS    REPUBLIKA SRBIJA 00    1995  0     Ukupno   167746
## 4 110102IND01 RS    REPUBLIKA SRBIJA 00    1995  2     Žensko    81311
## 5 110102IND01 RS    REPUBLIKA SRBIJA 00    2000  0     Ukupno   167745
## 6 110102IND01 RS    REPUBLIKA SRBIJA 00    2000  2     Žensko    81371
#Now we will plot a graph

library(ggplot2)

p<-ggplot(df1, aes(god, vrednost, group=nPol, colour=nPol)) + geom_line(alpha=0.5, size=1) + theme_bw()
p<-p+geom_point(alpha=0.5, size=4) + xlab("Year") + ylab("Number of PhD graduates") + ggtitle("The Growth of number of PhDs in last 30 years in Serbia") 
p