#Nedostajuci podaci
##Missing values rijesavamo na na slijedeci nacin:
#uzmimo opet bazu WHO
WHO <- read.csv("WHO.csv") #navodnici su veoma bitni ovdje!
table(is.na(WHO)) #pokazuje nam koliko imamo polja u bazi podataka koji su NA
##
## FALSE TRUE
## 2192 330
#drug nacin da vidimo koliko svaka varjabla ima je
summary(WHO)
## Country Region Population Under15
## Length:194 Length:194 Min. : 1 Min. :13.12
## Class :character Class :character 1st Qu.: 1696 1st Qu.:18.72
## Mode :character Mode :character Median : 7790 Median :28.65
## Mean : 36360 Mean :28.73
## 3rd Qu.: 24535 3rd Qu.:37.75
## Max. :1390000 Max. :49.99
##
## Over60 FertilityRate LifeExpectancy ChildMortality
## Min. : 0.81 Min. :1.260 Min. :47.00 Min. : 2.200
## 1st Qu.: 5.20 1st Qu.:1.835 1st Qu.:64.00 1st Qu.: 8.425
## Median : 8.53 Median :2.400 Median :72.50 Median : 18.600
## Mean :11.16 Mean :2.941 Mean :70.01 Mean : 36.149
## 3rd Qu.:16.69 3rd Qu.:3.905 3rd Qu.:76.00 3rd Qu.: 55.975
## Max. :31.92 Max. :7.580 Max. :83.00 Max. :181.600
## NA's :11
## CellularSubscribers LiteracyRate GNI
## Min. : 2.57 Min. :31.10 Min. : 340
## 1st Qu.: 63.57 1st Qu.:71.60 1st Qu.: 2335
## Median : 97.75 Median :91.80 Median : 7870
## Mean : 93.64 Mean :83.71 Mean :13321
## 3rd Qu.:120.81 3rd Qu.:97.85 3rd Qu.:17558
## Max. :196.41 Max. :99.80 Max. :86440
## NA's :10 NA's :91 NA's :32
## PrimarySchoolEnrollmentMale PrimarySchoolEnrollmentFemale
## Min. : 37.20 Min. : 32.50
## 1st Qu.: 87.70 1st Qu.: 87.30
## Median : 94.70 Median : 95.10
## Mean : 90.85 Mean : 89.63
## 3rd Qu.: 98.10 3rd Qu.: 97.90
## Max. :100.00 Max. :100.00
## NA's :93 NA's :93
##zasto moramo "znati" kako rukovati sa Na?----
#imamo nekih funkcija koje ne mozemo raditi i R nam prikazuje NA kao rezultat ili nekada kao gresku. npr mean tj. srednja vrijednost
mean(WHO$LiteracyRate)
## [1] NA
mean(WHO$LiteracyRate, na.rm = T) #na ovaj nacin eliminisemo Na iz racunice tj. u srednjoj vrijednosti zemlje koje nemaju podatak o stopi pismenosti nisu usle u racunicu
## [1] 83.71068
##Ako zelimo u potpunosti da iz cijele baze izbacimo sve obzervacije koje po bilo kojoj varijabli imaju NA onda:
WHObezNA <- na.omit (WHO) #Na ovaj nacin formirali smo novu bazu bezNA koja ne sadrzi NA.
WHObezNA
## Country Region Population
## 5 Angola Africa 20821
## 6 Antigua and Barbuda Americas 89
## 22 Bosnia and Herzegovina Europe 3834
## 32 Cape Verde Africa 494
## 33 Central African Republic Africa 4525
## 37 Colombia Americas 47704
## 43 Croatia Europe 4307
## 52 Dominican Republic Americas 10277
## 55 El Salvador Americas 6297
## 56 Equatorial Guinea Africa 736
## 57 Eritrea Africa 6131
## 58 Estonia Europe 1291
## 64 Gambia Africa 1791
## 68 Greece Europe 11125
## 70 Guatemala Americas 15083
## 71 Guinea Africa 11451
## 72 Guinea-Bissau Africa 1664
## 75 Honduras Americas 7936
## 76 Hungary Europe 9976
## 84 Italy Europe 60885
## 87 Jordan Eastern Mediterranean 7009
## 94 Latvia Europe 2060
## 96 Lesotho Africa 2052
## 99 Lithuania Europe 3028
## 105 Mali Africa 14854
## 108 Mauritania Africa 3796
## 110 Mexico Americas 121000
## 113 Mongolia Western Pacific 2796
## 116 Mozambique Africa 25203
## 118 Namibia Africa 2259
## 125 Nigeria Africa 169000
## 131 Panama Americas 3802
## 133 Paraguay Americas 6687
## 136 Poland Europe 38211
## 137 Portugal Europe 10604
## 138 Qatar Eastern Mediterranean 2051
## 140 Republic of Moldova Europe 3514
## 141 Romania Europe 21755
## 150 Saudi Arabia Eastern Mediterranean 28288
## 152 Serbia Europe 9553
## 157 Slovenia Europe 2068
## 162 Spain Europe 46755
## 163 Sri Lanka South-East Asia 21098
## 170 Tajikistan Europe 8009
## 172 The former Yugoslav Republic of Macedonia Europe 2106
## 181 Uganda Africa 36346
## 182 Ukraine Europe 45530
## 188 Uzbekistan Europe 28541
## 192 Yemen Eastern Mediterranean 23852
## 193 Zambia Africa 14075
## Under15 Over60 FertilityRate LifeExpectancy ChildMortality
## 5 47.58 3.84 6.10 51 163.5
## 6 25.96 12.35 2.12 75 9.9
## 22 16.35 20.52 1.26 76 6.7
## 32 30.17 7.05 2.38 72 22.2
## 33 40.07 5.74 4.54 48 128.6
## 37 28.03 9.19 2.35 78 17.6
## 43 14.98 24.69 1.48 77 4.7
## 52 30.53 8.97 2.55 73 27.1
## 55 30.62 9.64 2.24 72 15.9
## 56 38.95 4.53 5.04 54 100.3
## 57 43.10 3.73 4.88 61 51.8
## 58 15.69 23.92 1.62 76 3.6
## 64 45.90 3.72 5.79 58 72.9
## 68 14.60 25.41 1.51 81 4.8
## 70 40.80 6.56 3.91 69 32.0
## 71 42.46 5.03 5.09 55 101.2
## 72 41.55 5.06 5.05 50 129.1
## 75 35.72 6.41 3.10 74 22.9
## 76 14.62 23.41 1.38 75 6.2
## 84 14.04 26.97 1.45 82 3.8
## 87 34.13 5.30 3.39 74 19.1
## 94 14.57 24.24 1.57 74 8.7
## 96 36.75 6.31 3.15 50 99.6
## 99 15.13 20.57 1.49 74 5.4
## 105 47.14 4.29 6.85 51 128.0
## 108 40.22 4.94 4.78 59 84.0
## 110 29.02 9.18 2.25 75 16.2
## 113 27.05 5.80 2.45 68 27.5
## 116 45.38 5.01 5.34 53 89.7
## 118 36.59 5.38 3.17 65 38.7
## 125 44.23 4.49 6.02 53 123.7
## 131 28.65 10.13 2.52 77 18.5
## 133 32.78 8.01 2.93 75 22.0
## 136 14.91 20.48 1.39 76 5.0
## 137 14.92 24.39 1.33 80 3.6
## 138 13.28 1.73 2.06 82 7.4
## 140 16.52 16.72 1.47 71 17.6
## 141 15.05 20.66 1.39 74 12.2
## 150 29.69 4.59 2.76 76 8.6
## 152 16.45 20.52 1.37 74 6.6
## 157 14.16 23.16 1.49 80 3.1
## 162 15.20 22.86 1.47 82 4.5
## 163 25.15 12.40 2.35 75 9.6
## 170 35.75 4.80 3.81 68 58.3
## 172 16.89 17.56 1.44 75 7.4
## 181 48.54 3.72 6.06 56 68.9
## 182 14.18 20.76 1.45 71 10.7
## 188 28.90 6.38 2.38 68 39.6
## 192 40.72 4.54 4.35 64 60.0
## 193 46.73 3.95 5.77 55 88.5
## CellularSubscribers LiteracyRate GNI PrimarySchoolEnrollmentMale
## 5 48.38 70.1 5230 93.1
## 6 196.41 99.0 17900 91.1
## 22 84.52 97.9 9190 86.5
## 32 79.19 84.3 3980 94.6
## 33 40.65 56.0 810 81.3
## 37 98.45 93.4 9560 91.7
## 43 116.37 98.8 18760 94.8
## 52 87.22 89.5 9420 95.5
## 55 133.54 84.5 6640 95.2
## 56 59.15 93.9 25620 56.5
## 57 4.47 67.8 580 37.2
## 58 138.98 99.8 20850 97.7
## 64 78.89 50.0 1750 68.2
## 68 106.48 97.2 25100 98.8
## 70 140.38 75.2 4760 98.6
## 71 44.02 41.0 1020 85.2
## 72 56.18 54.2 1240 76.7
## 75 103.97 84.8 3820 94.8
## 76 117.30 99.0 20310 97.8
## 84 157.93 98.9 32400 99.6
## 87 118.20 92.6 5930 90.8
## 94 102.94 99.8 17700 95.0
## 96 56.17 89.6 2050 72.2
## 99 151.30 99.7 19640 95.6
## 105 68.32 31.1 1040 70.6
## 108 93.60 58.0 2400 72.8
## 110 82.38 93.1 15390 99.2
## 113 105.08 97.4 4290 99.6
## 116 32.83 56.1 970 94.6
## 118 96.39 88.8 6560 83.8
## 125 58.58 61.3 2290 60.1
## 131 188.60 94.1 14510 99.1
## 133 99.40 93.9 5390 84.4
## 136 130.97 99.5 20430 96.9
## 137 115.39 95.2 24440 99.1
## 138 123.11 96.3 86440 95.7
## 140 104.80 98.5 3640 90.1
## 141 109.16 97.7 15120 87.9
## 150 191.24 86.6 24700 96.7
## 152 125.39 97.9 11540 94.7
## 157 106.56 99.7 26510 97.7
## 162 113.22 97.7 31400 99.7
## 163 87.05 91.2 5520 93.9
## 170 90.64 99.7 2300 99.5
## 172 107.24 97.3 11090 97.3
## 181 48.38 73.2 1310 89.7
## 182 122.98 99.7 7040 90.8
## 188 91.65 99.4 3420 93.3
## 192 47.05 63.9 2170 85.5
## 193 60.59 71.2 1490 91.4
## PrimarySchoolEnrollmentFemale
## 5 78.2
## 6 84.5
## 22 88.4
## 32 92.4
## 33 60.6
## 37 91.3
## 43 97.0
## 52 90.4
## 55 95.5
## 56 56.0
## 57 32.5
## 58 97.0
## 64 70.4
## 68 99.3
## 70 97.5
## 71 72.1
## 72 73.3
## 75 97.0
## 76 98.3
## 84 98.5
## 87 90.7
## 94 96.8
## 96 75.3
## 99 95.8
## 105 60.8
## 108 76.0
## 110 99.9
## 113 98.5
## 116 89.4
## 118 88.5
## 125 54.8
## 131 98.2
## 133 83.9
## 136 96.7
## 137 99.7
## 138 96.6
## 140 90.1
## 141 87.3
## 150 96.5
## 152 94.4
## 157 97.3
## 162 99.8
## 163 94.4
## 170 96.0
## 172 99.2
## 181 92.3
## 182 91.5
## 188 91.0
## 192 70.5
## 193 93.9
table (is.na(WHObezNA)) #dakle nemamo NA
##
## FALSE
## 650
#izuzece NA iz odredjene varijable
table(is.na(WHO$PrimarySchoolEnrollmentFemale))#prvo provjerimo koliko imamo Na
##
## FALSE TRUE
## 101 93
summary(WHO$PrimarySchoolEnrollmentFemale)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 32.50 87.30 95.10 89.63 97.90 100.00 93
WHOPSEFnona <- WHO[complete.cases(WHO$PrimarySchoolEnrollmentFemale),] #ovo je ispravniji nacin od na.omit ()
summary (WHOPSEFnona$PrimarySchoolEnrollmentFemale)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 32.50 87.30 95.10 89.63 97.90 100.00
table(is.na(WHOPSEFnona$PrimarySchoolEnrollmentFemale)) #vidimo da smo rijesili
##
## FALSE
## 101
##sta se desilo sa NA?
##Vise nema tih obzervacija u bazi
length(WHO$PrimarySchoolEnrollmentFemale)
## [1] 194
length(WHOPSEFnona$PrimarySchoolEnrollmentFemale) #manje za 93
## [1] 101