#Number of PhD graduates in Serbia from 1990 until now
#Source: public and open data of the Republic of Serbia http://opendata.stat.gov.rs/
library(readxl)
df <- read_xlsx("C:/Users/vjovanovic/Desktop/R Udemy/Vezbe sa javnim podacima/broj_doktora_nauka.xlsx")
str(df)
## Classes 'tbl_df', 'tbl' and 'data.frame': 780 obs. of 8 variables:
## $ idindikator: chr "110102IND01" "110102IND01" "110102IND01" "110102IND01" ...
## $ IDTer : chr "70017" "70017" "70017" "70025" ...
## $ nTer : chr "Aleksandrovac" "Aleksandrovac" "Aleksandrovac" "Aleksinac" ...
## $ mes : chr "00" "00" "00" "00" ...
## $ god : chr "2016" "2016" "2016" "2016" ...
## $ IDPol : chr "0" "1" "2" "0" ...
## $ nPol : chr "Ukupno" "Muško" "Žensko" "Ukupno" ...
## $ vrednost : chr "605" "307" "298" "829" ...
#Now we must change character to factor or numeric variable
df$nTer <- as.factor(df$nTer)
df$vrednost <- as.numeric(df$vrednost)
df$god <- as.factor(df$god)
df$nPol <- as.factor(df$nPol)
head(df)
## # A tibble: 6 x 8
## idindikator IDTer nTer mes god IDPol nPol vrednost
## <chr> <chr> <fct> <chr> <fct> <chr> <fct> <dbl>
## 1 110102IND01 70017 Aleksandrovac 00 2016 0 Ukupno 605
## 2 110102IND01 70017 Aleksandrovac 00 2016 1 Muško 307
## 3 110102IND01 70017 Aleksandrovac 00 2016 2 Žensko 298
## 4 110102IND01 70025 Aleksinac 00 2016 0 Ukupno 829
## 5 110102IND01 70025 Aleksinac 00 2016 1 Muško 421
## 6 110102IND01 70025 Aleksinac 00 2016 2 Žensko 408
#Now we will select only variables for the Republic of Serbia, not for particular cities.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df1 <- filter(df, nTer=="REPUBLIKA SRBIJA")
head(df1)
## # A tibble: 6 x 8
## idindikator IDTer nTer mes god IDPol nPol vrednost
## <chr> <chr> <fct> <chr> <fct> <chr> <fct> <dbl>
## 1 110102IND01 RS REPUBLIKA SRBIJA 00 1990 0 Ukupno 181064
## 2 110102IND01 RS REPUBLIKA SRBIJA 00 1990 2 Žensko 87664
## 3 110102IND01 RS REPUBLIKA SRBIJA 00 1995 0 Ukupno 167746
## 4 110102IND01 RS REPUBLIKA SRBIJA 00 1995 2 Žensko 81311
## 5 110102IND01 RS REPUBLIKA SRBIJA 00 2000 0 Ukupno 167745
## 6 110102IND01 RS REPUBLIKA SRBIJA 00 2000 2 Žensko 81371
#Now we will plot a graph
library(ggplot2)
p<-ggplot(df1, aes(god, vrednost, group=nPol, colour=nPol)) + geom_line(alpha=0.5, size=1) + theme_bw()
p<-p+geom_point(alpha=0.5, size=4) + xlab("Year") + ylab("Number of PhD graduates") + ggtitle("The Growth of number of PhDs in last 30 years in Serbia")
p
