AUDITING Obligatory Homework Prepared by Jan Zakrzewski, 114601 (solo)

I have absolutely no clue how to format the R markdown document

I have only managed to copy the contents of the console, and actually add the .png images of the obtained graphs.

Last plot is at the bottom

library(readr) DF <- read_csv(file.choose( )) Rows: 67 Columns: 13 .csv [] 261.96MB/s, eta: 0s ── Column specification ─────────────────────────────── Delimiter: “,” dbl (13): PublicationYears, BigSample, Method, Angl…

ℹ Use spec() to retrieve the full column specification for this data. ℹ Specify the column types or set show_col_types = FALSE to quiet this message. > DF<- as.data.frame(DF) > rownames(DF)<-DF$Nr_Papieru > head(DF) PublicationYears BigSample Method AngloSaxon USA 2 35 0 0 1 1 3 33 0 0 1 1 5 22 0 1 1 1 6 22 0 1 1 1 7 20 0 0 0 0 8 16 0 0 0 0 TimeSpan Sample BusinessSupport TC TCYear 2 0 243 0 0 0.0000000 3 0 410 0 4 0.1212121 5 1 98 0 49 2.2272727 6 3 860 0 35 1.5909091 7 0 287 0 6 0.3000000 8 0 2428 0 7 0.4375000 EXCLUDED PY Nr_Papieru 2 0 1980 2 3 0 1982 3 5 0 1993 5 6 0 1993 6 7 0 1995 7 8 0 1999 8 > library(psych) > > describe(DF) vars n mean sd median PublicationYears 1 67 8.52 8.16 7.0 BigSample 2 67 0.27 0.45 0.0 Method 3 67 0.70 0.46 1.0 AngloSaxon 4 67 0.69 0.47 1.0 USA 5 67 0.45 0.50 0.0 TimeSpan 6 67 3.87 7.57 1.0 Sample 7 67 1755.17 6348.16 371.0 BusinessSupport 8 67 0.04 0.21 0.0 TC 9 67 22.52 46.55 3.0 TCYear 10 67 2.17 3.95 0.5 EXCLUDED 11 67 0.00 0.00 0.0 PY 12 67 2006.48 8.16 2008.0 Nr_Papieru 13 67 64.72 44.95 55.0 trimmed mad min max range PublicationYears 7.05 5.93 0 36.00 36.00 BigSample 0.22 0.00 0 1.00 1.00 Method 0.75 0.00 0 1.00 1.00 AngloSaxon 0.73 0.00 0 1.00 1.00 USA 0.44 0.00 0 1.00 1.00 TimeSpan 2.20 1.48 0 40.00 40.00 Sample 779.31 413.65 0 51755.00 51755.00 BusinessSupport 0.00 0.00 0 1.00 1.00 TC 10.87 4.45 0 227.00 227.00 TCYear 1.19 0.74 0 18.92 18.92 EXCLUDED 0.00 0.00 0 0.00 0.00 PY 2007.95 5.93 1979 2015.00 36.00 Nr_Papieru 64.07 65.23 1 137.00 136.00 skew kurtosis se PublicationYears 1.80 3.10 1.00 BigSample 1.02 -0.97 0.05 Method -0.86 -1.28 0.06 AngloSaxon -0.79 -1.40 0.06 USA 0.21 -1.99 0.06 TimeSpan 3.67 13.88 0.92 Sample 7.29 54.45 775.55 BusinessSupport 4.30 16.78 0.03 TC 2.65 6.79 5.69 TCYear 2.48 5.70 0.48 EXCLUDED NaN NaN 0.00 PY -1.80 3.10 1.00 Nr_Papieru 0.13 -1.49 5.49 > TCY<- lm(TCYear ~ PublicationYears +BigSample+ Method+AngloSaxon+TimeSpan +Sample+BusinessSupport,DF) > summary(TCY)

Call: lm(formula = TCYear ~ PublicationYears + BigSample + Method + AngloSaxon + TimeSpan + Sample + BusinessSupport, data = DF)

Residuals: Min 1Q Median 3Q Max -4.6610 -1.4347 -0.4097 0.3778 13.8215

Coefficients: Estimate Std. Error t value (Intercept) -1.085e+00 1.206e+00 -0.900 PublicationYears 1.019e-01 5.645e-02 1.806 BigSample 3.583e+00 1.129e+00 3.173 Method 6.020e-01 1.069e+00 0.563 AngloSaxon 8.287e-01 1.027e+00 0.807 TimeSpan 1.106e-01 6.237e-02 1.773 Sample -5.256e-05 7.557e-05 -0.696 BusinessSupport 2.185e+00 2.184e+00 1.001 Pr(>|t|)
(Intercept) 0.3718
PublicationYears 0.0760 . BigSample 0.0024 ** Method 0.5755
AngloSaxon 0.4231
TimeSpan 0.0815 . Sample 0.4895
BusinessSupport 0.3212
— Signif. codes:
0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 3.633 on 59 degrees of freedom Multiple R-squared: 0.2427, Adjusted R-squared: 0.1529 F-statistic: 2.701 on 7 and 59 DF, p-value: 0.01693

plot(TCY) Hit to see next plot: Hit to see next plot: Hit to see next plot: Hit to see next plot:

hats <- as.data.frame(hatvalues(TCY)) hats hatvalues(TCY) 2 0.21290798 3 0.19167812 5 0.07835187 6 0.07988768 7 0.10611081 8 0.09475865 9 0.04597844 10 0.04404193 11 0.07201387 12 0.07308525 15 0.13581008 19 0.07100794 20 0.06762549 21 0.04526655 22 0.37665168 23 0.09072711 24 0.08400364 29 0.04133848 30 0.06246831 35 0.34292671 36 0.08739606 37 0.04062334 38 0.18367009 43 0.07222597 44 0.39530021 45 0.06844399 46 0.04191017 51 0.04291890 52 0.08724704 53 0.04695298 54 0.06736653 55 0.06529267 62 0.04571295 63 0.04351669 64 0.97998610 67 0.04362519 68 0.07036614 77 0.09122794 78 0.06180910 88 0.34836347 90 0.07183608 91 0.04735230 101 0.07554022 102 0.37576933 103 0.10478660 104 0.05068387 105 0.13679124 106 0.07345583 107 0.13162536 108 0.05073110 117 0.06632617 118 0.12817979 119 0.09669277 120 0.08084825 121 0.07821484 122 0.09649332 123 0.07595300 124 0.07221547 127 0.05541486 128 0.09904922 129 0.06116892 130 0.07186645 131 0.06992124 136 0.06570245 137 0.11032187 1 0.25670894 4 0.19575441 hats[order(-hats[‘hatvalues(TCY)’]), ] Error in xtfrm.data.frame(x) : cannot xtfrm data frames

plot(hatvalues(TCY), type = ‘h’) cutoff<-2(8-1)/67 z<-hats> cutoff cutoff<-2(8-1)/67 z<-hats> cutoff z hatvalues(TCY) 2 TRUE 3 FALSE 5 FALSE 6 FALSE 7 FALSE 8 FALSE 9 FALSE 10 FALSE 11 FALSE 12 FALSE 15 FALSE 19 FALSE 20 FALSE 21 FALSE 22 TRUE 23 FALSE 24 FALSE 29 FALSE 30 FALSE 35 TRUE 36 FALSE 37 FALSE 38 FALSE 43 FALSE 44 TRUE 45 FALSE 46 FALSE 51 FALSE 52 FALSE 53 FALSE 54 FALSE 55 FALSE 62 FALSE 63 FALSE 64 TRUE 67 FALSE 68 FALSE 77 FALSE 78 FALSE 88 TRUE 90 FALSE 91 FALSE 101 FALSE 102 TRUE 103 FALSE 104 FALSE 105 FALSE 106 FALSE 107 FALSE 108 FALSE 117 FALSE 118 FALSE 119 FALSE 120 FALSE 121 FALSE 122 FALSE 123 FALSE 124 FALSE 127 FALSE 128 FALSE 129 FALSE 130 FALSE 131 FALSE 136 FALSE 137 FALSE 1 TRUE 4 FALSE sum(z, na.rm=FALSE) [1] 8 knit() Error in knit() : argument “input” is missing, with no default knitr Error: object ‘knitr’ not found read.csv2() Error in read.csv2() : argument “file” is missing, with no default read.table function (file, header = FALSE, sep = ““, quote =”"‘“, dec =”.”, numerals = c(“allow.loss”, “warn.loss”, “no.loss”), row.names, col.names, as.is = !stringsAsFactors, tryLogical = TRUE, na.strings = “NA”, colClasses = NA, nrows = -1, skip = 0, check.names = TRUE, fill = !blank.lines.skip, strip.white = FALSE, blank.lines.skip = TRUE, comment.char = “#”, allowEscapes = FALSE, flush = FALSE, stringsAsFactors = FALSE, fileEncoding = ““, encoding =”unknown”, text, skipNul = FALSE) { if (missing(file) && !missing(text)) { file <- textConnection(text, encoding = “UTF-8”) encoding <- “UTF-8” on.exit(close(file)) } if (is.character(file)) { file <- if (nzchar(fileEncoding)) file(file, “rt”, encoding = fileEncoding) else file(file, “rt”) on.exit(close(file)) } if (!inherits(file, “connection”)) stop(“‘file’ must be a character string or connection”) if (!isOpen(file, “rt”)) { open(file, “rt”) on.exit(close(file)) } pbEncoding <- if (encoding %in% c(““,”bytes”, “UTF-8”)) encoding else “bytes” numerals <- match.arg(numerals) if (skip > 0L) readLines(file, skip) nlines <- n0lines <- if (nrows < 0L) 5 else min(5L, (header + nrows)) lines <- .External(C_readtablehead, file, nlines, comment.char, blank.lines.skip, quote, sep, skipNul) if (encoding %in% c(“UTF-8”, “latin1”)) Encoding(lines) <- encoding nlines <- length(lines) if (!nlines) { if (missing(col.names)) stop(“no lines available in input”) rlabp <- FALSE cols <- length(col.names) } else { if (all(!nzchar(lines))) stop(“empty beginning of file”) if (nlines < n0lines && file == 0L) { pushBack(c(lines, lines, ““), file, encoding = pbEncoding) on.exit((clearPushBack(stdin()))) } else pushBack(c(lines, lines), file, encoding = pbEncoding) first <- scan(file, what =”“, sep = sep, quote = quote, nlines = 1, quiet = TRUE, skip = 0, strip.white = TRUE, blank.lines.skip = blank.lines.skip, na.strings = character(0), comment.char = comment.char, allowEscapes = allowEscapes, encoding = encoding, skipNul = skipNul) col1 <- if (missing(col.names)) length(first) else length(col.names) col <- numeric(nlines - 1L) if (nlines > 1L) for (i in seq_along(col)) col[i] <- length(scan(file, what =”“, sep = sep, quote = quote, nlines = 1, quiet = TRUE, skip = 0, strip.white = strip.white, blank.lines.skip = blank.lines.skip, comment.char = comment.char, allowEscapes = allowEscapes, encoding = encoding, skipNul = skipNul)) cols <- max(col1, col) rlabp <- (cols - col1) == 1L if (rlabp && missing(header)) header <- TRUE if (!header) rlabp <- FALSE if (header) { .External(C_readtablehead, file, 1L, comment.char, blank.lines.skip, quote, sep, skipNul) if (missing(col.names)) col.names <- first else if (length(first) != length(col.names)) warning(”header and ’col.names’ are of different lengths”) } else if (missing(col.names)) col.names <- paste0(“V”, 1L:cols) if (length(col.names) + rlabp < cols) stop(“more columns than column names”) if (fill && length(col.names) > cols) cols <- length(col.names) if (!fill && cols > 0L && length(col.names) > cols) stop(“more column names than columns”) if (cols == 0L) stop(“first five rows are empty: giving up”) } if (check.names) col.names <- make.names(col.names, unique = TRUE) if (rlabp) col.names <- c(“row.names”, col.names) nmColClasses <- names(colClasses) if (is.null(nmColClasses)) { if (length(colClasses) < cols) colClasses <- rep_len(colClasses, cols) } else { tmp <- rep_len(NA_character_, cols) names(tmp) <- col.names i <- match(nmColClasses, col.names, 0L) if (any(i <= 0L)) warning(“not all columns named in ‘colClasses’ exist”) tmp[i[i > 0L]] <- colClasses[i > 0L] colClasses <- tmp } what <- rep.int(list(““), cols) names(what) <- col.names colClasses[colClasses %in% c(”real”, ”double”)] <-”numeric” known <- colClasses %in% c(“logical”, “integer”, “numeric”, “complex”, “character”, “raw”) what[known] <- lapply(colClasses[known], do.call, list(0)) what[colClasses %in% “NULL”] <- list(NULL) keep <- !sapply(what, is.null) data <- scan(file = file, what = what, sep = sep, quote = quote, dec = dec, nmax = nrows, skip = 0, na.strings = na.strings, quiet = TRUE, fill = fill, strip.white = strip.white, blank.lines.skip = blank.lines.skip, multi.line = FALSE, comment.char = comment.char, allowEscapes = allowEscapes, flush = flush, encoding = encoding, skipNul = skipNul) nlines <- length(data[[which.max(keep)]]) if (cols != length(data)) { warning(“cols =”, cols, ” != length(data) = “, length(data), domain = NA) cols <- length(data) } if (is.logical(as.is)) { as.is <- rep_len(as.is, cols) } else if (is.numeric(as.is)) { if (any(as.is < 1 | as.is > cols)) stop(”invalid numeric ‘as.is’ expression”) i <- rep.int(FALSE, cols) i[as.is] <- TRUE as.is <- i } else if (is.character(as.is)) { i <- match(as.is, col.names, 0L) if (any(i <= 0L)) warning(“not all columns named in ‘as.is’ exist”) i <- i[i > 0L] as.is <- rep.int(FALSE, cols) as.is[i] <- TRUE } else if (length(as.is) != cols) stop(gettextf(“‘as.is’ has the wrong length %d != cols = %d”, length(as.is), cols), domain = NA) do <- keep & !known if (rlabp) do[1L] <- FALSE for (i in (1L:cols)[do]) { data[[i]] <- if (is.na(colClasses[i])) type.convert(data[[i]], as.is = as.is[i], dec = dec, numerals = numerals, na.strings = character(0L), tryLogical = tryLogical) else if (colClasses[i] == “factor”) as.factor(data[[i]]) else if (colClasses[i] == “Date”) as.Date(data[[i]]) else if (colClasses[i] == “POSIXct”) as.POSIXct(data[[i]]) else methods::as(data[[i]], colClasses[i]) } compactRN <- TRUE if (missing(row.names)) { if (rlabp) { row.names <- data[[1L]] data <- data[-1L] keep <- keep[-1L] compactRN <- FALSE } else row.names <- .set_row_names(as.integer(nlines)) } else if (is.null(row.names)) { row.names <- .set_row_names(as.integer(nlines)) } else if (is.character(row.names)) { compactRN <- FALSE if (length(row.names) == 1L) { rowvar <- (1L:cols)[match(col.names, row.names, 0L) == 1L] row.names <- data[[rowvar]] data <- data[-rowvar] keep <- keep[-rowvar] } } else if (is.numeric(row.names) && length(row.names) == 1L) { compactRN <- FALSE rlabp <- row.names row.names <- data[[rlabp]] data <- data[-rlabp] keep <- keep[-rlabp] } else stop(“invalid ‘row.names’ specification”) data <- data[keep] if (is.object(row.names) || !(is.integer(row.names))) row.names <- as.character(row.names) if (!compactRN) { if (length(row.names) != nlines) stop(“invalid ‘row.names’ length”) if (anyDuplicated(row.names)) stop(“duplicate ‘row.names’ are not allowed”) if (anyNA(row.names)) stop(“missing values in ‘row.names’ are not allowed”) } class(data) <- “data.frame” attr(data, “row.names”) <- row.names data } <bytecode: 0x55aef7f16d80> <environment: namespace:utils>