Cleaning data

#install.packages
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
mydata_excel2 <- read_excel("~/Desktop/NLB projekt/Logistična regresija/anketa_končni podatki.xlsx")

mydata_excel2 <- mydata_excel2[-1, ] #Delete first row in which the questions are written

mydata_excel2$ID <- seq(1,nrow(mydata_excel2))

head(mydata_excel2)
## # A tibble: 6 × 129
##   Q1    Q2    Q3    Q4    Q5    Q6a   Q6b   Q6c   Q6d   Q6e   Q6f   Q6g   Q6h  
##   <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2     1     1     1     1     2     1     1     2     1     2     1     1    
## 2 2     1     1     2     -2    -2    -2    -2    -2    -2    -2    -2    -2   
## 3 2     1     1     1     2     4     5     5     4     3     5     5     4    
## 4 2     1     1     1     2     -3    -3    -3    -3    -3    -3    -3    -3   
## 5 2     1     1     1     2     2     3     4     4     1     2     2     3    
## 6 2     1     1     1     2     3     2     5     1     1     1     1     1    
## # ℹ 116 more variables: Q6i <chr>, Q6j <chr>, Q6j_text <chr>, Q7 <chr>,
## #   `Podpora in svetovanje` <chr>,
## #   `Zagotavljanje varnosti in zaščita vaših finančnih podatkov ter transakcij` <chr>,
## #   `Transparentnost delovanja banke in njenih storitev` <chr>,
## #   Priročnost <chr>, `Hitrost in zanesljivost opravljene storitve` <chr>,
## #   Q8f <chr>, Q8f_text <chr>, `Poslovalnica_Podpora in svetovanje` <chr>,
## #   `Mobilna banka_Podpora in svetovanje` <chr>, …
mydata2 <- mydata_excel2[!(apply(mydata_excel2 == -3, 1, any)), ]

mydata2 <- subset(mydata2, select = -c(Q21:Q40))

mydata2$ID <- seq(1,nrow(mydata2))

head(mydata2)
## # A tibble: 6 × 83
##   Q1    Q2    Q3    Q4    Q5    Q6a   Q6b   Q6c   Q6d   Q6e   Q6f   Q6g   Q6h  
##   <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2     1     1     1     1     2     1     1     2     1     2     1     1    
## 2 2     1     1     2     -2    -2    -2    -2    -2    -2    -2    -2    -2   
## 3 2     1     1     1     2     4     5     5     4     3     5     5     4    
## 4 2     1     1     1     2     2     3     4     4     1     2     2     3    
## 5 2     1     1     1     2     3     2     5     1     1     1     1     1    
## 6 2     2     2     1     2     5     4     4     5     4     3     5     5    
## # ℹ 70 more variables: Q6i <chr>, Q6j <chr>, Q6j_text <chr>, Q7 <chr>,
## #   `Podpora in svetovanje` <chr>,
## #   `Zagotavljanje varnosti in zaščita vaših finančnih podatkov ter transakcij` <chr>,
## #   `Transparentnost delovanja banke in njenih storitev` <chr>,
## #   Priročnost <chr>, `Hitrost in zanesljivost opravljene storitve` <chr>,
## #   Q8f <chr>, Q8f_text <chr>, `Poslovalnica_Podpora in svetovanje` <chr>,
## #   `Mobilna banka_Podpora in svetovanje` <chr>, …
mydata2$Q2 <- factor(mydata2$Q2, 
                         levels = c(1, 2), 
                         labels = c("Yes","No"))

mydata2$Q3 <- factor(mydata2$Q3, 
                         levels = c(1, 2), 
                         labels = c("Yes","No"))

mydata2$Q4 <- factor(mydata2$Q4, 
                         levels = c(1, 2, 3), 
                         labels = c("Yes","No", "I don't know"))

mydata2$Q5 <- factor(mydata2$Q5, 
                         levels = c(1, 2), 
                         labels = c("Yes","No"))

mydata2$Q7 <- factor(mydata2$Q7, 
                         levels = c(1, 2), 
                         labels = c("Yes","No"))

mydata2$Q16a <- factor(mydata2$Q16a, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q16b <- factor(mydata2$Q16b, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q16c <- factor(mydata2$Q16c, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q16d <- factor(mydata2$Q16d, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q16e <- factor(mydata2$Q16e, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q16f <- factor(mydata2$Q16f, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q16g <- factor(mydata2$Q16g, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q41 <- factor(mydata2$Q41, 
                         levels = c(1, 2, 3), 
                         labels = c("Female","Male", "I don't want to answer"))

mydata2$Q43a <- factor(mydata2$Q43a, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q43b <- factor(mydata2$Q43b, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q43c <- factor(mydata2$Q43c, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q43d <- factor(mydata2$Q43d, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q43e <- factor(mydata2$Q43e, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q43f <- factor(mydata2$Q43f, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q43g <- factor(mydata2$Q43g, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q43h <- factor(mydata2$Q43h, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata2$Q44 <- factor(mydata2$Q44, 
                         levels = c(2, 6, 1, 4, 3, 5), 
                         labels = c("1.000 – 5.000 habitants","More than 100.000 habitants", "Less than 1.000 habitants", "20.001 – 50.000 habitants", "5.001 – 20.000 habitants", "50.001 – 100.000 habitants"))

mydata2$Q45 <- factor(mydata2$Q45, 
                         levels = c(3, 5, 1, 9, 12, 7, 10, 4, 11, 6), 
                         labels = c("OTP banka d.d.","Banka Intesa Sanpaolo d.d.", "Nova Ljubljanska Banka d.d. (NLB)", "Gorenjska Banka d.d.", "Delavska Hranilnica d.d.", "Revolut", "Deželna Banka Slovenije d.d.", "Banka Sparkasse d.d.", "Addiko Bank d.d.", "UniCredit Banka Slovenija d.d."))

mydata2$Q46 <- factor(mydata2$Q46, 
                         levels = c(1, 2, 3, 5, 6, 4), 
                         labels = c("Študent/-ka","Redno zaposlen/-a", "Upokojen/-a", "Samozaposlen/-a", "Delno zaposlen/-a", "Brezposeln/-a"))

mydata2$Q47 <- factor(mydata2$Q47, 
                         levels = c(1, 5, 3, 8, 2, 4, 6, 7), 
                         labels = c("Pod 1.000€","3.001€ - 5.000€", "1.501€ - 2.000€", "I don't want to answer", "1.000€ - 1.500€", "2.001€ - 3.000€", "5.001€ - 10.000€", "Above 10.000€"))

mydata2$Q48 <- factor(mydata2$Q48, 
                         levels = c(2, 6, 3, 5, 7, 4), 
                         labels = c("Dokončana osnovna šola","Dokončana visokošolska strokovna univerzitetna izobrazba (tudi 2. bolonjska stopnja)", "Dokončana nižja ali srednja poklicna izobrazba", "Dokončana višješolska strokovna ali visokošolska strokovna izobrazba (tudi 1. bolonjska stopnja)", "Dokončana specializacija, znanstveni magisterij, doktorat", "Dokončana srednja strokovna ali splošna izobrazba"))

mydata2$Q49 <- factor(mydata2$Q49, 
                         levels = c(1, 2, 3), 
                         labels = c("Preko linka","Na tablici", "Na listu papirja"))

mydata2[c(6:14, 18:22, 25:39, 48:55, 58:62)] <- mydata2[c(6:14, 18:22, 25:39, 48:55, 58:62)] %>% mutate_all(as.numeric)

mydata2 <- mydata2 %>% mutate(across(where(is.numeric), ~ replace(., . == -2, mean(.[. != -2], na.rm = TRUE))))

mydata2 <- mydata2 %>% mutate(across(where(is.numeric), ~ replace(., is.na(.), mean(., na.rm = TRUE))))

mydata2 <- mydata2 %>%
  filter(!ID %in% c(2, 16, 17)) 

mydata2$ID <- seq(1, nrow(mydata2)) 

summary(mydata2[c(-1, -15, -16, -23, -24, -47, -56, -57, -63, -64, -66, -75, -78, -83)])
##     Q2         Q3                 Q4         Q5           Q6a       
##  Yes :121   Yes :131   Yes         :103   Yes : 25   Min.   :1.000  
##  No  : 36   No  : 26   No          : 28   No  :132   1st Qu.:3.000  
##  NA's:  1   NA's:  1   I don't know: 26   NA's:  1   Median :3.000  
##                        NA's        :  1              Mean   :3.433  
##                                                      3rd Qu.:4.000  
##                                                      Max.   :5.000  
##                                                                     
##       Q6b             Q6c             Q6d             Q6e       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:1.000  
##  Median :4.000   Median :4.000   Median :4.000   Median :2.000  
##  Mean   :3.535   Mean   :3.847   Mean   :3.854   Mean   :2.688  
##  3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##                                                                 
##       Q6f             Q6g             Q6h            Q6i           Q7    
##  Min.   :1.000   Min.   :1.000   Min.   :1.00   Min.   :1.000   Yes :82  
##  1st Qu.:3.000   1st Qu.:2.000   1st Qu.:2.00   1st Qu.:3.000   No  :75  
##  Median :4.000   Median :3.086   Median :3.00   Median :4.000   NA's: 1  
##  Mean   :3.497   Mean   :3.172   Mean   :2.93   Mean   :3.465            
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.00   3rd Qu.:4.000            
##  Max.   :5.000   Max.   :5.000   Max.   :5.00   Max.   :5.000            
##                                                                          
##  Podpora in svetovanje
##  Min.   :1.00         
##  1st Qu.:3.00         
##  Median :4.00         
##  Mean   :3.79         
##  3rd Qu.:4.00         
##  Max.   :5.00         
##                       
##  Zagotavljanje varnosti in zaščita vaših finančnih podatkov ter transakcij
##  Min.   :1.000                                                            
##  1st Qu.:4.000                                                            
##  Median :5.000                                                            
##  Mean   :4.382                                                            
##  3rd Qu.:5.000                                                            
##  Max.   :5.000                                                            
##                                                                           
##  Transparentnost delovanja banke in njenih storitev   Priročnost   
##  Min.   :1.000                                      Min.   :1.000  
##  1st Qu.:4.000                                      1st Qu.:4.000  
##  Median :4.000                                      Median :4.000  
##  Mean   :4.108                                      Mean   :3.975  
##  3rd Qu.:5.000                                      3rd Qu.:5.000  
##  Max.   :5.000                                      Max.   :5.000  
##                                                                    
##  Hitrost in zanesljivost opravljene storitve Poslovalnica_Podpora in svetovanje
##  Min.   :1.000                               Min.   :2.000                     
##  1st Qu.:4.000                               1st Qu.:4.000                     
##  Median :4.000                               Median :4.000                     
##  Mean   :4.185                               Mean   :4.006                     
##  3rd Qu.:5.000                               3rd Qu.:5.000                     
##  Max.   :5.000                               Max.   :5.000                     
##                                                                                
##  Mobilna banka_Podpora in svetovanje
##  Min.   :1.00                       
##  1st Qu.:3.00                       
##  Median :3.00                       
##  Mean   :3.14                       
##  3rd Qu.:4.00                       
##  Max.   :5.00                       
##                                     
##  Poslovalnica_Zagotavljanje varnosti in zaščita vaših finančnih podatkov ter transakcij
##  Min.   :2.000                                                                         
##  1st Qu.:3.986                                                                         
##  Median :4.000                                                                         
##  Mean   :3.981                                                                         
##  3rd Qu.:5.000                                                                         
##  Max.   :5.000                                                                         
##                                                                                        
##  Mobilna banka_Zagotavljanje varnosti in zaščita vaših finančnih podatkov ter transakcij
##  Min.   :1.000                                                                          
##  1st Qu.:2.000                                                                          
##  Median :3.000                                                                          
##  Mean   :3.032                                                                          
##  3rd Qu.:4.000                                                                          
##  Max.   :5.000                                                                          
##                                                                                         
##  Poslovalnica_Transparentnost delovanja banke in njenih storitev
##  Min.   :1.000                                                  
##  1st Qu.:3.000                                                  
##  Median :4.000                                                  
##  Mean   :3.898                                                  
##  3rd Qu.:4.000                                                  
##  Max.   :5.000                                                  
##                                                                 
##  Mobilna banka_Transparentnost delovanja banke in njenih storitev
##  Min.   :1.000                                                   
##  1st Qu.:3.000                                                   
##  Median :3.000                                                   
##  Mean   :3.248                                                   
##  3rd Qu.:4.000                                                   
##  Max.   :5.000                                                   
##                                                                  
##  Poslovalnica_Priročnost Mobilna banka_Priročnost
##  Min.   :1.000           Min.   :1.000           
##  1st Qu.:3.000           1st Qu.:3.000           
##  Median :4.000           Median :3.000           
##  Mean   :3.752           Mean   :3.389           
##  3rd Qu.:4.000           3rd Qu.:4.000           
##  Max.   :5.000           Max.   :5.000           
##                                                  
##  Poslovalnica_Hitrost in zanesljivost opravljene storitve
##  Min.   :1.000                                           
##  1st Qu.:3.000                                           
##  Median :4.000                                           
##  Mean   :3.701                                           
##  3rd Qu.:4.000                                           
##  Max.   :5.000                                           
##                                                          
##  Mobilna banka_Hitrost in zanesljivost opravljene storitve      Q15a  
##  Min.   :1.000                                             Min.   :1  
##  1st Qu.:3.000                                             1st Qu.:4  
##  Median :4.000                                             Median :4  
##  Mean   :3.586                                             Mean   :4  
##  3rd Qu.:4.000                                             3rd Qu.:5  
##  Max.   :5.000                                             Max.   :5  
##                                                                       
##       Q15b            Q15c            Q15d            Q15e      
##  Min.   :2.000   Min.   :1.000   Min.   :1.000   Min.   :2.000  
##  1st Qu.:4.000   1st Qu.:4.000   1st Qu.:3.000   1st Qu.:4.000  
##  Median :4.000   Median :4.000   Median :4.000   Median :4.000  
##  Mean   :4.134   Mean   :4.115   Mean   :3.809   Mean   :4.127  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:4.000   3rd Qu.:5.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##                                                                 
##            Q16a              Q16b              Q16c              Q16d   
##  Selected    :98   Selected    :66   Selected    :68   Selected    :79  
##  Not selected:59   Not selected:91   Not selected:89   Not selected:78  
##  NA's        : 1   NA's        : 1   NA's        : 1   NA's        : 1  
##                                                                         
##                                                                         
##                                                                         
##                                                                         
##            Q16e               Q16f               Q16g          Q17a      
##  Selected    : 49   Selected    : 36   Selected    :  9   Min.   :0.000  
##  Not selected:108   Not selected:121   Not selected:148   1st Qu.:4.000  
##  NA's        :  1   NA's        :  1   NA's        :  1   Median :4.000  
##                                                           Mean   :3.662  
##                                                           3rd Qu.:4.000  
##                                                           Max.   :5.000  
##                                                                          
##       Q17b            Q17c            Q17d            Q17e            Q18a     
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :1.00  
##  1st Qu.:4.000   1st Qu.:4.000   1st Qu.:2.000   1st Qu.:4.000   1st Qu.:3.00  
##  Median :4.000   Median :4.000   Median :4.000   Median :4.000   Median :4.00  
##  Mean   :3.694   Mean   :3.758   Mean   :3.223   Mean   :3.809   Mean   :3.35  
##  3rd Qu.:4.000   3rd Qu.:4.750   3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:4.00  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.00  
##                                                                                
##       Q18b            Q18c            Q19a            Q19b      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000  
##  Median :4.000   Median :4.000   Median :3.000   Median :4.000  
##  Mean   :3.732   Mean   :3.713   Mean   :3.217   Mean   :3.548  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##                                                                 
##       Q19c            Q19d            Q19e                           Q41    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Female                :78  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:2.000   Male                  :79  
##  Median :4.000   Median :4.000   Median :3.000   I don't want to answer: 0  
##  Mean   :3.331   Mean   :3.618   Mean   :3.051   NA's                  : 1  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000                              
##  Max.   :5.000   Max.   :5.000   Max.   :5.000                              
##                                                                             
##            Q43a               Q43b               Q43c               Q43d   
##  Selected    : 32   Selected    : 12   Selected    :  5   Selected    :81  
##  Not selected:125   Not selected:145   Not selected:152   Not selected:76  
##  NA's        :  1   NA's        :  1   NA's        :  1   NA's        : 1  
##                                                                            
##                                                                            
##                                                                            
##                                                                            
##            Q43e               Q43f               Q43g               Q43h    
##  Selected    : 57   Selected    :  1   Selected    : 29   Selected    :  0  
##  Not selected:100   Not selected:156   Not selected:128   Not selected:157  
##  NA's        :  1   NA's        :  1   NA's        :  1   NA's        :  1  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##                           Q44                                    Q45    
##  1.000 – 5.000 habitants    :44   OTP banka d.d.                   :71  
##  More than 100.000 habitants:36   Nova Ljubljanska Banka d.d. (NLB):35  
##  Less than 1.000 habitants  :30   Delavska Hranilnica d.d.         :12  
##  20.001 – 50.000 habitants  :14   Banka Intesa Sanpaolo d.d.       :10  
##  5.001 – 20.000 habitants   :22   Deželna Banka Slovenije d.d.     : 8  
##  50.001 – 100.000 habitants :11   (Other)                          :18  
##  NA's                       : 1   NA's                             : 4  
##                 Q46                  Q47    
##  Študent/-ka      :22   Pod 1.000€     :46  
##  Redno zaposlen/-a:73   1.000€ - 1.500€:39  
##  Upokojen/-a      :43   2.001€ - 3.000€:29  
##  Samozaposlen/-a  :13   1.501€ - 2.000€:25  
##  Delno zaposlen/-a: 3   3.001€ - 5.000€: 6  
##  Brezposeln/-a    : 3   (Other)        : 4  
##  NA's             : 1   NA's           : 9  
##                                                                                                Q48    
##  Dokončana osnovna šola                                                                          : 8  
##  Dokončana visokošolska strokovna univerzitetna izobrazba (tudi 2. bolonjska stopnja)            :35  
##  Dokončana nižja ali srednja poklicna izobrazba                                                  :23  
##  Dokončana višješolska strokovna ali visokošolska strokovna izobrazba (tudi 1. bolonjska stopnja):34  
##  Dokončana specializacija, znanstveni magisterij, doktorat                                       : 5  
##  Dokončana srednja strokovna ali splošna izobrazba                                               :50  
##  NA's                                                                                            : 3  
##                Q49    
##  Preko linka     :84  
##  Na tablici      :37  
##  Na listu papirja:17  
##  NA's            :20  
##                       
##                       
## 
data2 <- mydata_excel2 %>% select(Q41, Q42, Q43a, Q43b, Q43c, Q43d, Q43e, Q43f, Q43g, Q43h, Q44, Q45, Q46, Q47, Q48)
# Rename columns using colnames()
colnames(data2) <- c("Spol","Leto rojstva","Živim s starši","Živim s sorojenci","Živim s sorodniki","Živim s partnerko/jem", "Živim z otroki", "Živim s skrbnikom", "Živim sam/a", "Drugo", "Št. prebivalcev v kraju bivanja", "Primarna banka", "Trenutna zaposlitev", "Mesečni neto prihodek", "Stopnja izobrazbe")

# View first few rows
head(data2)
## # A tibble: 6 × 15
##   Spol  `Leto rojstva` `Živim s starši` `Živim s sorojenci` `Živim s sorodniki`
##   <chr> <chr>          <chr>            <chr>               <chr>              
## 1 2     2006           1                0                   0                  
## 2 2     1988           0                0                   0                  
## 3 2     1963           0                0                   0                  
## 4 -3    -3             -3               -3                  -3                 
## 5 2     2004           1                0                   0                  
## 6 2     2000           1                1                   0                  
## # ℹ 10 more variables: `Živim s partnerko/jem` <chr>, `Živim z otroki` <chr>,
## #   `Živim s skrbnikom` <chr>, `Živim sam/a` <chr>, Drugo <chr>,
## #   `Št. prebivalcev v kraju bivanja` <chr>, `Primarna banka` <chr>,
## #   `Trenutna zaposlitev` <chr>, `Mesečni neto prihodek` <chr>,
## #   `Stopnja izobrazbe` <chr>
# Automatically calculate age and assign age groups

data2 <- data2 %>%
  mutate(
    `Leto rojstva` = as.numeric(as.character(`Leto rojstva`)),  # Convert to numeric (handles factors & characters)
    age = as.numeric(format(Sys.Date(), "%Y")) - `Leto rojstva`, # Calculate age
    age_group = case_when(
      age <= 40 ~ "Young",
      age > 40 & age <= 60 ~ "Professional",
      age > 60 ~ "Older"))

# View first few rows
head(data2)
## # A tibble: 6 × 17
##   Spol  `Leto rojstva` `Živim s starši` `Živim s sorojenci` `Živim s sorodniki`
##   <chr>          <dbl> <chr>            <chr>               <chr>              
## 1 2               2006 1                0                   0                  
## 2 2               1988 0                0                   0                  
## 3 2               1963 0                0                   0                  
## 4 -3                -3 -3               -3                  -3                 
## 5 2               2004 1                0                   0                  
## 6 2               2000 1                1                   0                  
## # ℹ 12 more variables: `Živim s partnerko/jem` <chr>, `Živim z otroki` <chr>,
## #   `Živim s skrbnikom` <chr>, `Živim sam/a` <chr>, Drugo <chr>,
## #   `Št. prebivalcev v kraju bivanja` <chr>, `Primarna banka` <chr>,
## #   `Trenutna zaposlitev` <chr>, `Mesečni neto prihodek` <chr>,
## #   `Stopnja izobrazbe` <chr>, age <dbl>, age_group <chr>
data2clean <- data2 %>%
  filter(complete.cases(.))  # Removes rows with any NAs

head(data2)
## # A tibble: 6 × 17
##   Spol  `Leto rojstva` `Živim s starši` `Živim s sorojenci` `Živim s sorodniki`
##   <chr>          <dbl> <chr>            <chr>               <chr>              
## 1 2               2006 1                0                   0                  
## 2 2               1988 0                0                   0                  
## 3 2               1963 0                0                   0                  
## 4 -3                -3 -3               -3                  -3                 
## 5 2               2004 1                0                   0                  
## 6 2               2000 1                1                   0                  
## # ℹ 12 more variables: `Živim s partnerko/jem` <chr>, `Živim z otroki` <chr>,
## #   `Živim s skrbnikom` <chr>, `Živim sam/a` <chr>, Drugo <chr>,
## #   `Št. prebivalcev v kraju bivanja` <chr>, `Primarna banka` <chr>,
## #   `Trenutna zaposlitev` <chr>, `Mesečni neto prihodek` <chr>,
## #   `Stopnja izobrazbe` <chr>, age <dbl>, age_group <chr>
summary(data2)
##      Spol            Leto rojstva  Živim s starši     Živim s sorojenci 
##  Length:190         Min.   :  -3   Length:190         Length:190        
##  Class :character   1st Qu.:1951   Class :character   Class :character  
##  Mode  :character   Median :1967   Mode  :character   Mode  :character  
##                     Mean   :1662                                        
##                     3rd Qu.:1986                                        
##                     Max.   :2007                                        
##  Živim s sorodniki  Živim s partnerko/jem Živim z otroki     Živim s skrbnikom 
##  Length:190         Length:190            Length:190         Length:190        
##  Class :character   Class :character      Class :character   Class :character  
##  Mode  :character   Mode  :character      Mode  :character   Mode  :character  
##                                                                                
##                                                                                
##                                                                                
##  Živim sam/a           Drugo           Št. prebivalcev v kraju bivanja
##  Length:190         Length:190         Length:190                     
##  Class :character   Class :character   Class :character               
##  Mode  :character   Mode  :character   Mode  :character               
##                                                                       
##                                                                       
##                                                                       
##  Primarna banka     Trenutna zaposlitev Mesečni neto prihodek
##  Length:190         Length:190          Length:190           
##  Class :character   Class :character    Class :character     
##  Mode  :character   Mode  :character    Mode  :character     
##                                                              
##                                                              
##                                                              
##  Stopnja izobrazbe       age           age_group        
##  Length:190         Min.   :  18.00   Length:190        
##  Class :character   1st Qu.:  39.25   Class :character  
##  Mode  :character   Median :  58.00   Mode  :character  
##                     Mean   : 362.84                     
##                     3rd Qu.:  73.75                     
##                     Max.   :2028.00