# 회귀분석 개요
# 회귀분석은 다음과 같은 목적으로 사용된다.
# 예측 : 독립변수를 기반으로 종속변수 값을 예측한다.
# 관계 분석 : 변수 간 관계의 강도와 방향 파악
# 모델링 : 데이터의 패턴을 수학적으로 표현
# 단순선형 회귀
# 하나의 독립변수(x)와
library(ggplot2)
library
## function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,
## logical.return = FALSE, warn.conflicts, quietly = FALSE,
## verbose = getOption("verbose"), mask.ok, exclude, include.only,
## attach.required = missing(include.only))
## {
## conf.ctrl <- getOption("conflicts.policy")
## if (is.character(conf.ctrl))
## conf.ctrl <- switch(conf.ctrl, strict = list(error = TRUE,
## warn = FALSE), depends.ok = list(error = TRUE, generics.ok = TRUE,
## can.mask = c("base", "methods", "utils", "grDevices",
## "graphics", "stats"), depends.ok = TRUE), warning(gettextf("unknown conflict policy: %s",
## sQuote(conf.ctrl)), call. = FALSE, domain = NA))
## if (!is.list(conf.ctrl))
## conf.ctrl <- NULL
## stopOnConflict <- isTRUE(conf.ctrl$error)
## if (missing(warn.conflicts))
## warn.conflicts <- !isFALSE(conf.ctrl$warn)
## if (!missing(include.only) && !missing(exclude))
## stop("only one of 'include.only' and 'exclude' can be used",
## call. = FALSE)
## testRversion <- function(pkgInfo, pkgname, pkgpath) {
## if (is.null(built <- pkgInfo$Built))
## stop(gettextf("package %s has not been installed properly\n",
## sQuote(pkgname)), call. = FALSE, domain = NA)
## R_version_built_under <- as.numeric_version(built$R)
## if (R_version_built_under < "3.0.0")
## stop(gettextf("package %s was built before R 3.0.0: please re-install it",
## sQuote(pkgname)), call. = FALSE, domain = NA)
## current <- getRversion()
## if (length(Rdeps <- pkgInfo$Rdepends2)) {
## for (dep in Rdeps) if (length(dep) > 1L) {
## target <- dep$version
## res <- do.call(dep$op, if (is.character(target))
## list(as.numeric(R.version[["svn rev"]]), as.numeric(sub("^r",
## "", target)))
## else list(current, as.numeric_version(target)))
## if (!res)
## stop(gettextf("This is R %s, package %s needs %s %s",
## current, sQuote(pkgname), dep$op, target),
## call. = FALSE, domain = NA)
## }
## }
## if (R_version_built_under > current)
## warning(gettextf("package %s was built under R version %s",
## sQuote(pkgname), as.character(built$R)), call. = FALSE,
## domain = NA)
## platform <- built$Platform
## r_arch <- .Platform$r_arch
## if (.Platform$OS.type == "unix") {
## }
## else {
## if (nzchar(platform) && !grepl("mingw", platform))
## stop(gettextf("package %s was built for %s",
## sQuote(pkgname), platform), call. = FALSE,
## domain = NA)
## }
## if (nzchar(r_arch) && file.exists(file.path(pkgpath,
## "libs")) && !file.exists(file.path(pkgpath, "libs",
## r_arch)))
## stop(gettextf("package %s is not installed for 'arch = %s'",
## sQuote(pkgname), r_arch), call. = FALSE, domain = NA)
## }
## checkNoGenerics <- function(env, pkg) {
## nenv <- env
## ns <- .getNamespace(as.name(pkg))
## if (!is.null(ns))
## nenv <- asNamespace(ns)
## if (exists(".noGenerics", envir = nenv, inherits = FALSE))
## TRUE
## else {
## !any(startsWith(names(env), ".__T"))
## }
## }
## checkConflicts <- function(package, pkgname, pkgpath, nogenerics,
## env) {
## dont.mind <- c("last.dump", "last.warning", ".Last.value",
## ".Random.seed", ".Last.lib", ".onDetach", ".packageName",
## ".noGenerics", ".required", ".no_S3_generics", ".Depends",
## ".requireCachedGenerics")
## sp <- search()
## lib.pos <- which(sp == pkgname)
## ob <- names(as.environment(lib.pos))
## if (!nogenerics) {
## these <- ob[startsWith(ob, ".__T__")]
## gen <- gsub(".__T__(.*):([^:]+)", "\\1", these)
## from <- gsub(".__T__(.*):([^:]+)", "\\2", these)
## gen <- gen[from != package]
## ob <- ob[!(ob %in% gen)]
## }
## ipos <- seq_along(sp)[-c(lib.pos, match(c("Autoloads",
## "CheckExEnv"), sp, 0L))]
## cpos <- NULL
## conflicts <- vector("list", 0)
## for (i in ipos) {
## obj.same <- match(names(as.environment(i)), ob, nomatch = 0L)
## if (any(obj.same > 0L)) {
## same <- ob[obj.same]
## same <- same[!(same %in% dont.mind)]
## Classobjs <- which(startsWith(same, ".__"))
## if (length(Classobjs))
## same <- same[-Classobjs]
## same.isFn <- function(where) vapply(same, exists,
## NA, where = where, mode = "function", inherits = FALSE)
## same <- same[same.isFn(i) == same.isFn(lib.pos)]
## not.Ident <- function(ch, TRAFO = identity, ...) vapply(ch,
## function(.) !identical(TRAFO(get(., i)), TRAFO(get(.,
## lib.pos)), ...), NA)
## if (length(same))
## same <- same[not.Ident(same)]
## if (length(same) && identical(sp[i], "package:base"))
## same <- same[not.Ident(same, ignore.environment = TRUE)]
## if (length(same)) {
## conflicts[[sp[i]]] <- same
## cpos[sp[i]] <- i
## }
## }
## }
## if (length(conflicts)) {
## if (stopOnConflict) {
## emsg <- ""
## pkg <- names(conflicts)
## notOK <- vector("list", 0)
## for (i in seq_along(conflicts)) {
## pkgname <- sub("^package:", "", pkg[i])
## if (pkgname %in% canMaskEnv$canMask)
## next
## same <- conflicts[[i]]
## if (is.list(mask.ok))
## myMaskOK <- mask.ok[[pkgname]]
## else myMaskOK <- mask.ok
## if (isTRUE(myMaskOK))
## same <- NULL
## else if (is.character(myMaskOK))
## same <- setdiff(same, myMaskOK)
## if (length(same)) {
## notOK[[pkg[i]]] <- same
## msg <- .maskedMsg(sort(same), pkg = sQuote(pkg[i]),
## by = cpos[i] < lib.pos)
## emsg <- paste(emsg, msg, sep = "\n")
## }
## }
## if (length(notOK)) {
## msg <- gettextf("Conflicts attaching package %s:\n%s",
## sQuote(package), emsg)
## stop(errorCondition(msg, package = package,
## conflicts = conflicts, class = "packageConflictError"))
## }
## }
## if (warn.conflicts) {
## packageStartupMessage(gettextf("\nAttaching package: %s\n",
## sQuote(package)), domain = NA)
## pkg <- names(conflicts)
## for (i in seq_along(conflicts)) {
## msg <- .maskedMsg(sort(conflicts[[i]]), pkg = sQuote(pkg[i]),
## by = cpos[i] < lib.pos)
## packageStartupMessage(msg, domain = NA)
## }
## }
## }
## }
## if (verbose && quietly)
## message("'verbose' and 'quietly' are both true; being verbose then ..")
## if (!missing(package)) {
## if (is.null(lib.loc))
## lib.loc <- .libPaths()
## lib.loc <- lib.loc[dir.exists(lib.loc)]
## if (!character.only)
## package <- as.character(substitute(package))
## if (length(package) != 1L)
## stop(gettextf("'%s' must be of length 1", "package"),
## domain = NA)
## if (is.na(package) || (package == ""))
## stop("invalid package name")
## pkgname <- paste0("package:", package)
## newpackage <- is.na(match(pkgname, search()))
## if (newpackage) {
## pkgpath <- find.package(package, lib.loc, quiet = TRUE,
## verbose = verbose)
## if (length(pkgpath) == 0L) {
## if (length(lib.loc) && !logical.return)
## stop(packageNotFoundError(package, lib.loc,
## sys.call()))
## txt <- if (length(lib.loc))
## gettextf("there is no package called %s", sQuote(package))
## else gettext("no library trees found in 'lib.loc'")
## if (logical.return) {
## if (!quietly)
## warning(txt, domain = NA)
## return(FALSE)
## }
## else stop(txt, domain = NA)
## }
## which.lib.loc <- normalizePath(dirname(pkgpath),
## "/", TRUE)
## pfile <- system.file("Meta", "package.rds", package = package,
## lib.loc = which.lib.loc)
## if (!nzchar(pfile))
## stop(gettextf("%s is not a valid installed package",
## sQuote(package)), domain = NA)
## pkgInfo <- readRDS(pfile)
## testRversion(pkgInfo, package, pkgpath)
## if (is.character(pos)) {
## npos <- match(pos, search())
## if (is.na(npos)) {
## warning(gettextf("%s not found on search path, using pos = 2",
## sQuote(pos)), domain = NA)
## pos <- 2
## }
## else pos <- npos
## }
## deps <- unique(names(pkgInfo$Depends))
## depsOK <- isTRUE(conf.ctrl$depends.ok)
## if (depsOK) {
## canMaskEnv <- dynGet("__library_can_mask__",
## NULL)
## if (is.null(canMaskEnv)) {
## canMaskEnv <- new.env()
## canMaskEnv$canMask <- union("base", conf.ctrl$can.mask)
## "__library_can_mask__" <- canMaskEnv
## }
## canMaskEnv$canMask <- unique(c(package, deps,
## canMaskEnv$canMask))
## }
## else canMaskEnv <- NULL
## if (attach.required)
## .getRequiredPackages2(pkgInfo, quietly = quietly,
## lib.loc = c(lib.loc, .libPaths()))
## cr <- conflictRules(package)
## if (missing(mask.ok))
## mask.ok <- cr$mask.ok
## if (missing(exclude))
## exclude <- cr$exclude
## if (isNamespaceLoaded(package)) {
## newversion <- as.numeric_version(pkgInfo$DESCRIPTION["Version"])
## oldversion <- as.numeric_version(getNamespaceVersion(package))
## if (newversion != oldversion) {
## tryCatch(unloadNamespace(package), error = function(e) {
## P <- if (!is.null(cc <- conditionCall(e)))
## paste("Error in", deparse(cc)[1L], ": ")
## else "Error : "
## stop(gettextf("Package %s version %s cannot be unloaded:\n %s",
## sQuote(package), oldversion, paste0(P,
## conditionMessage(e), "\n")), domain = NA)
## })
## }
## }
## tt <- tryCatch({
## attr(package, "LibPath") <- which.lib.loc
## ns <- loadNamespace(package, lib.loc)
## env <- attachNamespace(ns, pos = pos, deps, exclude,
## include.only)
## }, error = function(e) {
## P <- if (!is.null(cc <- conditionCall(e)))
## paste(" in", deparse(cc)[1L])
## else ""
## msg <- gettextf("package or namespace load failed for %s%s:\n %s",
## sQuote(package), P, conditionMessage(e))
## if (logical.return && !quietly)
## message(paste("Error:", msg), domain = NA)
## else stop(msg, call. = FALSE, domain = NA)
## })
## if (logical.return && is.null(tt))
## return(FALSE)
## attr(package, "LibPath") <- NULL
## {
## on.exit(detach(pos = pos))
## nogenerics <- !.isMethodsDispatchOn() || checkNoGenerics(env,
## package)
## if (isFALSE(conf.ctrl$generics.ok) || (stopOnConflict &&
## !isTRUE(conf.ctrl$generics.ok)))
## nogenerics <- TRUE
## if (stopOnConflict || (warn.conflicts && !exists(".conflicts.OK",
## envir = env, inherits = FALSE)))
## checkConflicts(package, pkgname, pkgpath, nogenerics,
## ns)
## on.exit()
## if (logical.return)
## return(TRUE)
## else return(invisible(.packages()))
## }
## }
## if (verbose && !newpackage)
## warning(gettextf("package %s already present in search()",
## sQuote(package)), domain = NA)
## }
## else if (!missing(help)) {
## if (!character.only)
## help <- as.character(substitute(help))
## pkgName <- help[1L]
## pkgPath <- find.package(pkgName, lib.loc, verbose = verbose)
## docFiles <- c(file.path(pkgPath, "Meta", "package.rds"),
## file.path(pkgPath, "INDEX"))
## if (file.exists(vignetteIndexRDS <- file.path(pkgPath,
## "Meta", "vignette.rds")))
## docFiles <- c(docFiles, vignetteIndexRDS)
## pkgInfo <- vector("list", 3L)
## readDocFile <- function(f) {
## if (basename(f) %in% "package.rds") {
## txt <- readRDS(f)$DESCRIPTION
## if ("Encoding" %in% names(txt)) {
## to <- if (Sys.getlocale("LC_CTYPE") == "C")
## "ASCII//TRANSLIT"
## else ""
## tmp <- try(iconv(txt, from = txt["Encoding"],
## to = to))
## if (!inherits(tmp, "try-error"))
## txt <- tmp
## else warning("'DESCRIPTION' has an 'Encoding' field and re-encoding is not possible",
## call. = FALSE)
## }
## nm <- paste0(names(txt), ":")
## formatDL(nm, txt, indent = max(nchar(nm, "w")) +
## 3L)
## }
## else if (basename(f) %in% "vignette.rds") {
## txt <- readRDS(f)
## if (is.data.frame(txt) && nrow(txt))
## cbind(basename(gsub("\\.[[:alpha:]]+$", "",
## txt$File)), paste(txt$Title, paste0(rep.int("(source",
## NROW(txt)), ifelse(nzchar(txt$PDF), ", pdf",
## ""), ")")))
## else NULL
## }
## else readLines(f)
## }
## for (i in which(file.exists(docFiles))) pkgInfo[[i]] <- readDocFile(docFiles[i])
## y <- list(name = pkgName, path = pkgPath, info = pkgInfo)
## class(y) <- "packageInfo"
## return(y)
## }
## else {
## if (is.null(lib.loc))
## lib.loc <- .libPaths()
## db <- matrix(character(), nrow = 0L, ncol = 3L)
## nopkgs <- character()
## for (lib in lib.loc) {
## a <- .packages(all.available = TRUE, lib.loc = lib)
## for (i in sort(a)) {
## file <- system.file("Meta", "package.rds", package = i,
## lib.loc = lib)
## title <- if (nzchar(file)) {
## txt <- readRDS(file)
## if (is.list(txt))
## txt <- txt$DESCRIPTION
## if ("Encoding" %in% names(txt)) {
## to <- if (Sys.getlocale("LC_CTYPE") == "C")
## "ASCII//TRANSLIT"
## else ""
## tmp <- try(iconv(txt, txt["Encoding"], to,
## "?"))
## if (!inherits(tmp, "try-error"))
## txt <- tmp
## else warning("'DESCRIPTION' has an 'Encoding' field and re-encoding is not possible",
## call. = FALSE)
## }
## txt["Title"]
## }
## else NA
## if (is.na(title))
## title <- " ** No title available ** "
## db <- rbind(db, cbind(i, lib, title))
## }
## if (length(a) == 0L)
## nopkgs <- c(nopkgs, lib)
## }
## dimnames(db) <- list(NULL, c("Package", "LibPath", "Title"))
## if (length(nopkgs) && !missing(lib.loc)) {
## pkglist <- paste(sQuote(nopkgs), collapse = ", ")
## msg <- sprintf(ngettext(length(nopkgs), "library %s contains no packages",
## "libraries %s contain no packages"), pkglist)
## warning(msg, domain = NA)
## }
## y <- list(header = NULL, results = db, footer = NULL)
## class(y) <- "libraryIQR"
## return(y)
## }
## if (logical.return)
## TRUE
## else invisible(.packages())
## }
## <bytecode: 0x00000233a4174c30>
## <environment: namespace:base>
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
model<-lm(mpg~hp, data=mtcars)
summary(model)
##
## Call:
## lm(formula = mpg ~ hp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.7121 -2.1122 -0.8854 1.5819 8.2360
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.09886 1.63392 18.421 < 2e-16 ***
## hp -0.06823 0.01012 -6.742 1.79e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.863 on 30 degrees of freedom
## Multiple R-squared: 0.6024, Adjusted R-squared: 0.5892
## F-statistic: 45.46 on 1 and 30 DF, p-value: 1.788e-07
# Estimate (회귀계수)
# (Intercept) 절편입니다
# y=-0.06823+30.09886
# y=-0.06823+30.09886x
# 산점도와 회귀선 시각화
ggplot(mtcars, aes(x = hp, y = mpg)) +
geom_point() + # 산점도
geom_smooth(method = "lm", se = TRUE) + # 회귀선과 신뢰구간
theme_minimal() +
labs(title = "MPG vs Horsepower", x = "Horsepower", y = "Miles per Gallon")
## `geom_smooth()` using formula = 'y ~ x'

# 새로운 데이터로 예측
new_data <- data.frame(hp = c(100, 150, 200))
predict(model, newdata = new_data)
## 1 2 3
## 23.27603 19.86462 16.45320
# 다중 선형 회귀
multi_model <- lm(mpg ~ hp + wt + am, data = mtcars)
summary(multi_model)
##
## Call:
## lm(formula = mpg ~ hp + wt + am, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4221 -1.7924 -0.3788 1.2249 5.5317
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.002875 2.642659 12.867 2.82e-13 ***
## hp -0.037479 0.009605 -3.902 0.000546 ***
## wt -2.878575 0.904971 -3.181 0.003574 **
## am 2.083710 1.376420 1.514 0.141268
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.538 on 28 degrees of freedom
## Multiple R-squared: 0.8399, Adjusted R-squared: 0.8227
## F-statistic: 48.96 on 3 and 28 DF, p-value: 2.908e-11
# 새로운 데이터로 예측
new_multi_data <- data.frame(hp = c(120, 180), wt = c(3.0, 3.5), am = c(0, 1))
predict(multi_model, newdata = new_multi_data)
## 1 2
## 20.8697 19.2654