Подготовка исходных данных
1. Очистить данные с использованием функции is.na()
x <-c (7,2,NA,8,NA,9,1)
bad <- is.na(x)
x[!bad]
## [1] 7 2 8 9 1
2. Сгенерировать таблицу данных с числовыми и текстовые столбцами.
Очистить данные с помощью функции complete.cases()
x <- c("a", "b", NA, "d", NA, "f","r",NA,"ya")
y <- c(1,2,NA,5,NA,7,NA,29,31)
good <- complete.cases(x, y)
x[good]
## [1] "a" "b" "d" "f" "ya"
y[good]
## [1] 1 2 5 7 31
3. Сгенерировать числовую таблицу данных с пропусками. С
использованием функции preProcess из пакета caret заполнить пропуски
предсказанными значениями (среднее, медиана).
head(airquality)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
library(caret)
## Warning: пакет 'caret' был собран под R версии 4.2.2
## Загрузка требуемого пакета: ggplot2
## Загрузка требуемого пакета: lattice
pPmI <- preProcess(airquality, method = 'medianImpute')
airquality <- predict(pPmI, airquality)
(Imp.Med <- airquality)
## Ozone Solar.R Wind Temp Month Day
## 1 41.0 190 7.4 67 5 1
## 2 36.0 118 8.0 72 5 2
## 3 12.0 149 12.6 74 5 3
## 4 18.0 313 11.5 62 5 4
## 5 31.5 205 14.3 56 5 5
## 6 28.0 205 14.9 66 5 6
## 7 23.0 299 8.6 65 5 7
## 8 19.0 99 13.8 59 5 8
## 9 8.0 19 20.1 61 5 9
## 10 31.5 194 8.6 69 5 10
## 11 7.0 205 6.9 74 5 11
## 12 16.0 256 9.7 69 5 12
## 13 11.0 290 9.2 66 5 13
## 14 14.0 274 10.9 68 5 14
## 15 18.0 65 13.2 58 5 15
## 16 14.0 334 11.5 64 5 16
## 17 34.0 307 12.0 66 5 17
## 18 6.0 78 18.4 57 5 18
## 19 30.0 322 11.5 68 5 19
## 20 11.0 44 9.7 62 5 20
## 21 1.0 8 9.7 59 5 21
## 22 11.0 320 16.6 73 5 22
## 23 4.0 25 9.7 61 5 23
## 24 32.0 92 12.0 61 5 24
## 25 31.5 66 16.6 57 5 25
## 26 31.5 266 14.9 58 5 26
## 27 31.5 205 8.0 57 5 27
## 28 23.0 13 12.0 67 5 28
## 29 45.0 252 14.9 81 5 29
## 30 115.0 223 5.7 79 5 30
## 31 37.0 279 7.4 76 5 31
## 32 31.5 286 8.6 78 6 1
## 33 31.5 287 9.7 74 6 2
## 34 31.5 242 16.1 67 6 3
## 35 31.5 186 9.2 84 6 4
## 36 31.5 220 8.6 85 6 5
## 37 31.5 264 14.3 79 6 6
## 38 29.0 127 9.7 82 6 7
## 39 31.5 273 6.9 87 6 8
## 40 71.0 291 13.8 90 6 9
## 41 39.0 323 11.5 87 6 10
## 42 31.5 259 10.9 93 6 11
## 43 31.5 250 9.2 92 6 12
## 44 23.0 148 8.0 82 6 13
## 45 31.5 332 13.8 80 6 14
## 46 31.5 322 11.5 79 6 15
## 47 21.0 191 14.9 77 6 16
## 48 37.0 284 20.7 72 6 17
## 49 20.0 37 9.2 65 6 18
## 50 12.0 120 11.5 73 6 19
## 51 13.0 137 10.3 76 6 20
## 52 31.5 150 6.3 77 6 21
## 53 31.5 59 1.7 76 6 22
## 54 31.5 91 4.6 76 6 23
## 55 31.5 250 6.3 76 6 24
## 56 31.5 135 8.0 75 6 25
## 57 31.5 127 8.0 78 6 26
## 58 31.5 47 10.3 73 6 27
## 59 31.5 98 11.5 80 6 28
## 60 31.5 31 14.9 77 6 29
## 61 31.5 138 8.0 83 6 30
## 62 135.0 269 4.1 84 7 1
## 63 49.0 248 9.2 85 7 2
## 64 32.0 236 9.2 81 7 3
## 65 31.5 101 10.9 84 7 4
## 66 64.0 175 4.6 83 7 5
## 67 40.0 314 10.9 83 7 6
## 68 77.0 276 5.1 88 7 7
## 69 97.0 267 6.3 92 7 8
## 70 97.0 272 5.7 92 7 9
## 71 85.0 175 7.4 89 7 10
## 72 31.5 139 8.6 82 7 11
## 73 10.0 264 14.3 73 7 12
## 74 27.0 175 14.9 81 7 13
## 75 31.5 291 14.9 91 7 14
## 76 7.0 48 14.3 80 7 15
## 77 48.0 260 6.9 81 7 16
## 78 35.0 274 10.3 82 7 17
## 79 61.0 285 6.3 84 7 18
## 80 79.0 187 5.1 87 7 19
## 81 63.0 220 11.5 85 7 20
## 82 16.0 7 6.9 74 7 21
## 83 31.5 258 9.7 81 7 22
## 84 31.5 295 11.5 82 7 23
## 85 80.0 294 8.6 86 7 24
## 86 108.0 223 8.0 85 7 25
## 87 20.0 81 8.6 82 7 26
## 88 52.0 82 12.0 86 7 27
## 89 82.0 213 7.4 88 7 28
## 90 50.0 275 7.4 86 7 29
## 91 64.0 253 7.4 83 7 30
## 92 59.0 254 9.2 81 7 31
## 93 39.0 83 6.9 81 8 1
## 94 9.0 24 13.8 81 8 2
## 95 16.0 77 7.4 82 8 3
## 96 78.0 205 6.9 86 8 4
## 97 35.0 205 7.4 85 8 5
## 98 66.0 205 4.6 87 8 6
## 99 122.0 255 4.0 89 8 7
## 100 89.0 229 10.3 90 8 8
## 101 110.0 207 8.0 90 8 9
## 102 31.5 222 8.6 92 8 10
## 103 31.5 137 11.5 86 8 11
## 104 44.0 192 11.5 86 8 12
## 105 28.0 273 11.5 82 8 13
## 106 65.0 157 9.7 80 8 14
## 107 31.5 64 11.5 79 8 15
## 108 22.0 71 10.3 77 8 16
## 109 59.0 51 6.3 79 8 17
## 110 23.0 115 7.4 76 8 18
## 111 31.0 244 10.9 78 8 19
## 112 44.0 190 10.3 78 8 20
## 113 21.0 259 15.5 77 8 21
## 114 9.0 36 14.3 72 8 22
## 115 31.5 255 12.6 75 8 23
## 116 45.0 212 9.7 79 8 24
## 117 168.0 238 3.4 81 8 25
## 118 73.0 215 8.0 86 8 26
## 119 31.5 153 5.7 88 8 27
## 120 76.0 203 9.7 97 8 28
## 121 118.0 225 2.3 94 8 29
## 122 84.0 237 6.3 96 8 30
## 123 85.0 188 6.3 94 8 31
## 124 96.0 167 6.9 91 9 1
## 125 78.0 197 5.1 92 9 2
## 126 73.0 183 2.8 93 9 3
## 127 91.0 189 4.6 93 9 4
## 128 47.0 95 7.4 87 9 5
## 129 32.0 92 15.5 84 9 6
## 130 20.0 252 10.9 80 9 7
## 131 23.0 220 10.3 78 9 8
## 132 21.0 230 10.9 75 9 9
## 133 24.0 259 9.7 73 9 10
## 134 44.0 236 14.9 81 9 11
## 135 21.0 259 15.5 76 9 12
## 136 28.0 238 6.3 77 9 13
## 137 9.0 24 10.9 71 9 14
## 138 13.0 112 11.5 71 9 15
## 139 46.0 237 6.9 78 9 16
## 140 18.0 224 13.8 67 9 17
## 141 13.0 27 10.3 76 9 18
## 142 24.0 238 10.3 68 9 19
## 143 16.0 201 8.0 82 9 20
## 144 13.0 238 12.6 64 9 21
## 145 23.0 14 9.2 71 9 22
## 146 36.0 139 10.3 81 9 23
## 147 7.0 49 10.3 69 9 24
## 148 14.0 20 16.6 63 9 25
## 149 30.0 193 6.9 70 9 26
## 150 31.5 145 13.2 77 9 27
## 151 14.0 191 14.3 75 9 28
## 152 18.0 131 8.0 76 9 29
## 153 20.0 223 11.5 68 9 30
4. Сгенерировать два числовых набора данных, добавить в них выбросы.
С использованием функции boxplot обнаружить выбросы и удалить их
# Inject outliers into data.
cars1 <- cars[1:30, ] # original data
cars_outliers <- data.frame(speed=c(19,19,20,20,20), dist=c(190, 186, 210, 220, 218)) # introduce outliers.
cars2 <- rbind(cars1, cars_outliers) # data with outliers.
# Plot of data with outliers.
par(mfrow=c(1, 2))
plot(cars2$speed, cars2$dist, xlim=c(0, 28), ylim=c(0, 230), main="With Outliers", xlab="speed", ylab="dist", pch="*", col="red", cex=2)
abline(lm(dist ~ speed, data=cars2), col="blue", lwd=3, lty=2)
# Plot of original data without outliers. Note the change in slope (angle) of best fit line.
plot(cars1$speed, cars1$dist, xlim=c(0, 28), ylim=c(0, 230), main="Outliers removed \n A much better fit!", xlab="speed", ylab="dist", pch="*", col="red", cex=2)
abline(lm(dist ~ speed, data=cars1), col="blue", lwd=3, lty=2)

5. Сгенерируйте таблицу данных, в которой дублируются строки.
Удалите строки с использованием функций unique(), duplicated(). Сравните
результаты
a <- c(rep("A", 3), rep("B", 3), rep("C",2))
b <- c(1,1,2,4,1,1,2,2)
df <-data.frame(a,b)
duplicated(df)
## [1] FALSE TRUE FALSE FALSE FALSE TRUE FALSE TRUE
df[duplicated(df), ]
## a b
## 2 A 1
## 6 B 1
## 8 C 2
df[!duplicated(df), ]
## a b
## 1 A 1
## 3 A 2
## 4 B 4
## 5 B 1
## 7 C 2
6. Обработать пропуски в данных с использованием пакета mice.
dataset <- airquality
summary(dataset)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 21.00 1st Qu.:120.0 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 39.56 Mean :186.8 Mean : 9.958 Mean :77.88
## 3rd Qu.: 46.00 3rd Qu.:256.0 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
library(mice)
## Warning: пакет 'mice' был собран под R версии 4.2.2
##
## Присоединяю пакет: 'mice'
## Следующий объект скрыт от 'package:stats':
##
## filter
## Следующие объекты скрыты от 'package:base':
##
## cbind, rbind
set.seed(1)
dataset2 <- mice(dataset)
##
## iter imp variable
## 1 1
## 1 2
## 1 3
## 1 4
## 1 5
## 2 1
## 2 2
## 2 3
## 2 4
## 2 5
## 3 1
## 3 2
## 3 3
## 3 4
## 3 5
## 4 1
## 4 2
## 4 3
## 4 4
## 4 5
## 5 1
## 5 2
## 5 3
## 5 4
## 5 5
dataset2 <- complete(dataset2)
summary(dataset2)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 21.00 1st Qu.:120.0 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 39.56 Mean :186.8 Mean : 9.958 Mean :77.88
## 3rd Qu.: 46.00 3rd Qu.:256.0 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
7. Разобрать пример с мультиколлинеарностью.
library(readxl)
## Warning: пакет 'readxl' был собран под R версии 4.2.2
wagesmicrodata <-read.csv("CPS1985.csv", header = TRUE, sep = ";", quote = "\"")
#View(wagesmicrodata)
attach(wagesmicrodata)
fit1<- lm(log(WAGE)~OCCUPATION+SECTOR+UNION+EDUCATION+EXPERIENCE+AGE+SEX+MARR+RACE+SOUTH)
summary(fit1)
##
## Call:
## lm(formula = log(WAGE) ~ OCCUPATION + SECTOR + UNION + EDUCATION +
## EXPERIENCE + AGE + SEX + MARR + RACE + SOUTH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.16246 -0.29163 -0.00469 0.29981 1.98248
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.078596 0.687514 1.569 0.117291
## OCCUPATION -0.007417 0.013109 -0.566 0.571761
## SECTOR 0.091458 0.038736 2.361 0.018589 *
## UNION 0.200483 0.052475 3.821 0.000149 ***
## EDUCATION 0.179366 0.110756 1.619 0.105949
## EXPERIENCE 0.095822 0.110799 0.865 0.387531
## AGE -0.085444 0.110730 -0.772 0.440671
## SEX -0.221997 0.039907 -5.563 4.24e-08 ***
## MARR 0.076611 0.041931 1.827 0.068259 .
## RACE 0.050406 0.028531 1.767 0.077865 .
## SOUTH -0.102360 0.042823 -2.390 0.017187 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4398 on 523 degrees of freedom
## Multiple R-squared: 0.3185, Adjusted R-squared: 0.3054
## F-statistic: 24.44 on 10 and 523 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(fit1)
## Warning: наблюдения с единичной трансляцией не рисую:
## 444

X <- wagesmicrodata[,3:11]
library(GGally)
## Warning: пакет 'GGally' был собран под R версии 4.2.2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggpairs(X)

library(corpcor)
cor2pcor(cov(X))
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1.00000000 -0.051814221 -0.084663752 -0.22350867 0.07430981
## [2,] -0.05181422 1.000000000 0.007228653 -0.42063263 0.98119549
## [3,] -0.08466375 0.007228653 1.000000000 0.13197044 0.01237206
## [4,] -0.22350867 -0.420632633 0.131970436 1.00000000 0.43989938
## [5,] 0.07430981 0.981195495 0.012372055 0.43989938 1.00000000
## [6,] 0.04234732 -0.062072080 -0.107715633 0.08227380 0.05620822
## [7,] -0.14914626 0.146410013 0.217594609 -0.04968818 -0.16017158
## [8,] -0.09152223 0.135368994 -0.021950358 0.08096922 -0.11407350
## [9,] 0.00993958 0.010488351 0.060656681 0.03975582 0.03863893
## [,6] [,7] [,8] [,9]
## [1,] 0.042347322 -0.14914626 -0.091522229 0.00993958
## [2,] -0.062072080 0.14641001 0.135368994 0.01048835
## [3,] -0.107715633 0.21759461 -0.021950358 0.06065668
## [4,] 0.082273797 -0.04968818 0.080969219 0.03975582
## [5,] 0.056208218 -0.16017158 -0.114073496 0.03863893
## [6,] 1.000000000 0.06142611 0.001717449 0.04831882
## [7,] 0.061426112 1.00000000 0.316853644 -0.01754927
## [8,] 0.001717449 0.31685364 1.000000000 0.03338670
## [9,] 0.048318821 -0.01754927 0.033386698 1.00000000
library(mctest)
omcdiag(fit1)
##
## Call:
## omcdiag(mod = fit1)
##
##
## Overall Multicollinearity Diagnostics
##
## MC Results detection
## Determinant |X'X|: 0.0001 1
## Farrar Chi-Square: 4818.3895 1
## Red Indicator: 0.1983 0
## Sum of Lambda Inverse: 10068.8439 1
## Theil's Method: 0.8845 1
## Condition Number: 739.7337 1
##
## 1 --> COLLINEARITY is detected by the test
## 0 --> COLLINEARITY is not detected by the test
imcdiag(fit1)
##
## Call:
## imcdiag(mod = fit1)
##
##
## All Individual Multicollinearity Diagnostics Result
##
## VIF TOL Wi Fi Leamer CVIF Klein
## OCCUPATION 1.2982 0.7703 17.3637 19.5715 0.8777 1.3620 0
## SECTOR 1.1987 0.8343 11.5670 13.0378 0.9134 1.2576 0
## UNION 1.1209 0.8922 7.0368 7.9315 0.9445 1.1759 0
## EDUCATION 231.1956 0.0043 13402.4982 15106.5849 0.0658 242.5527 1
## EXPERIENCE 5184.0939 0.0002 301771.2445 340140.5368 0.0139 5438.7545 1
## AGE 4645.6650 0.0002 270422.7164 304806.1391 0.0147 4873.8761 1
## SEX 1.0916 0.9161 5.3351 6.0135 0.9571 1.1453 0
## MARR 1.0961 0.9123 5.5969 6.3085 0.9551 1.1500 0
## RACE 1.0371 0.9642 2.1622 2.4372 0.9819 1.0881 0
## SOUTH 1.0468 0.9553 2.7264 3.0731 0.9774 1.0983 0
## IND1 IND2
## OCCUPATION 0.0132 0.6125
## SECTOR 0.0143 0.4419
## UNION 0.0153 0.2875
## EDUCATION 0.0001 2.6546
## EXPERIENCE 0.0000 2.6656
## AGE 0.0000 2.6656
## SEX 0.0157 0.2238
## MARR 0.0157 0.2338
## RACE 0.0166 0.0955
## SOUTH 0.0164 0.1193
##
## 1 --> COLLINEARITY is detected by the test
## 0 --> COLLINEARITY is not detected by the test
##
## OCCUPATION , EDUCATION , EXPERIENCE , AGE , MARR , RACE , coefficient(s) are non-significant may be due to multicollinearity
##
## R-square of y on all x: 0.3185
##
## * use method argument to check which regressors may be the reason of collinearity
## ===================================
library(ppcor)
## Warning: пакет 'ppcor' был собран под R версии 4.2.2
## Загрузка требуемого пакета: MASS
pcor(X, method = "pearson")
## $estimate
## SEX EXPERIENCE UNION WAGE AGE
## SEX 1.00000000 -0.051814221 -0.084663752 -0.22350867 0.07430981
## EXPERIENCE -0.05181422 1.000000000 0.007228653 -0.42063263 0.98119549
## UNION -0.08466375 0.007228653 1.000000000 0.13197044 0.01237206
## WAGE -0.22350867 -0.420632633 0.131970436 1.00000000 0.43989938
## AGE 0.07430981 0.981195495 0.012372055 0.43989938 1.00000000
## RACE 0.04234732 -0.062072080 -0.107715633 0.08227380 0.05620822
## OCCUPATION -0.14914626 0.146410013 0.217594609 -0.04968818 -0.16017158
## SECTOR -0.09152223 0.135368994 -0.021950358 0.08096922 -0.11407350
## MARR 0.00993958 0.010488351 0.060656681 0.03975582 0.03863893
## RACE OCCUPATION SECTOR MARR
## SEX 0.042347322 -0.14914626 -0.091522229 0.00993958
## EXPERIENCE -0.062072080 0.14641001 0.135368994 0.01048835
## UNION -0.107715633 0.21759461 -0.021950358 0.06065668
## WAGE 0.082273797 -0.04968818 0.080969219 0.03975582
## AGE 0.056208218 -0.16017158 -0.114073496 0.03863893
## RACE 1.000000000 0.06142611 0.001717449 0.04831882
## OCCUPATION 0.061426112 1.00000000 0.316853644 -0.01754927
## SECTOR 0.001717449 0.31685364 1.000000000 0.03338670
## MARR 0.048318821 -0.01754927 0.033386698 1.00000000
##
## $p.value
## SEX EXPERIENCE UNION WAGE AGE
## SEX 0.000000e+00 2.350519e-01 5.208199e-02 2.165737e-07 8.834415e-02
## EXPERIENCE 2.350519e-01 0.000000e+00 8.685091e-01 5.198759e-24 0.000000e+00
## UNION 5.208199e-02 8.685091e-01 0.000000e+00 2.400016e-03 7.769045e-01
## WAGE 2.165737e-07 5.198759e-24 2.400016e-03 0.000000e+00 2.382090e-26
## AGE 8.834415e-02 0.000000e+00 7.769045e-01 2.382090e-26 0.000000e+00
## RACE 3.319107e-01 1.547520e-01 1.335714e-02 5.910213e-02 1.976424e-01
## OCCUPATION 5.926105e-04 7.482482e-04 4.561261e-07 2.548447e-01 2.224027e-04
## SECTOR 3.568933e-02 1.842168e-03 6.151281e-01 6.325352e-02 8.765462e-03
## MARR 8.199249e-01 8.101671e-01 1.643959e-01 3.623762e-01 3.760282e-01
## RACE OCCUPATION SECTOR MARR
## SEX 0.33191073 5.926105e-04 3.568933e-02 0.8199249
## EXPERIENCE 0.15475201 7.482482e-04 1.842168e-03 0.8101671
## UNION 0.01335714 4.561261e-07 6.151281e-01 0.1643959
## WAGE 0.05910213 2.548447e-01 6.325352e-02 0.3623762
## AGE 0.19764239 2.224027e-04 8.765462e-03 0.3760282
## RACE 0.00000000 1.590988e-01 9.686249e-01 0.2681889
## OCCUPATION 0.15909882 0.000000e+00 9.379012e-14 0.6877254
## SECTOR 0.96862489 9.379012e-14 0.000000e+00 0.4443702
## MARR 0.26818887 6.877254e-01 4.443702e-01 0.0000000
##
## $statistic
## SEX EXPERIENCE UNION WAGE AGE RACE
## SEX 0.0000000 -1.1888098 -1.9468804 -5.254147 1.7073721 0.97117024
## EXPERIENCE -1.1888098 0.0000000 0.1656336 -10.623429 116.4771067 -1.42499788
## UNION -1.9468804 0.1656336 0.0000000 3.050503 0.2835011 -2.48251914
## WAGE -5.2541467 -10.6234287 3.0505033 0.000000 11.2236444 1.89154228
## AGE 1.7073721 116.4771067 0.2835011 11.223644 0.0000000 1.28993136
## RACE 0.9711702 -1.4249979 -2.4825191 1.891542 1.2899314 0.00000000
## OCCUPATION -3.4560252 3.3912186 5.1081131 -1.139907 -3.7179942 1.41011184
## SECTOR -2.1058760 3.1305088 -0.5030671 1.861349 -2.6309260 0.03935177
## MARR 0.2277556 0.2403315 1.3923830 0.911641 0.8859907 1.10841795
## OCCUPATION SECTOR MARR
## SEX -3.4560252 -2.10587601 0.2277556
## EXPERIENCE 3.3912186 3.13050884 0.2403315
## UNION 5.1081131 -0.50306710 1.3923830
## WAGE -1.1399073 1.86134942 0.9116410
## AGE -3.7179942 -2.63092602 0.8859907
## RACE 1.4101118 0.03935177 1.1084179
## OCCUPATION 0.0000000 7.65442770 -0.4021662
## SECTOR 7.6544277 0.00000000 0.7654121
## MARR -0.4021662 0.76541206 0.0000000
##
## $n
## [1] 534
##
## $gp
## [1] 7
##
## $method
## [1] "pearson"
fit2<- lm(log(WAGE)~OCCUPATION+SECTOR+UNION+EDUCATION+AGE+SEX+MARR+RACE+SOUTH)
summary(fit2)
##
## Call:
## lm(formula = log(WAGE) ~ OCCUPATION + SECTOR + UNION + EDUCATION +
## AGE + SEX + MARR + RACE + SOUTH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.16018 -0.29085 -0.00513 0.29985 1.97932
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.501358 0.164794 3.042 0.002465 **
## OCCUPATION -0.006941 0.013095 -0.530 0.596309
## SECTOR 0.091013 0.038723 2.350 0.019125 *
## UNION 0.200018 0.052459 3.813 0.000154 ***
## EDUCATION 0.083815 0.007728 10.846 < 2e-16 ***
## AGE 0.010305 0.001745 5.905 6.34e-09 ***
## SEX -0.220100 0.039837 -5.525 5.20e-08 ***
## MARR 0.075125 0.041886 1.794 0.073458 .
## RACE 0.050674 0.028523 1.777 0.076210 .
## SOUTH -0.103186 0.042802 -2.411 0.016261 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4397 on 524 degrees of freedom
## Multiple R-squared: 0.3175, Adjusted R-squared: 0.3058
## F-statistic: 27.09 on 9 and 524 DF, p-value: < 2.2e-16