# 회귀분석 개요
# 회귀분석은 다음과 같은 목적으로 사용된다.
# 예측 : 독립변수를 기반으로 종속변수 값을 예측한다.
# 관계 분석 : 변수 간 관계의 강도와 방향 파악
# 모델링 : 데이터의 패턴을 수학적으로 표현

# 단순선형 회귀
# 하나의 독립변수(x)와 

library(ggplot2)
library
## function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE, 
##     logical.return = FALSE, warn.conflicts, quietly = FALSE, 
##     verbose = getOption("verbose"), mask.ok, exclude, include.only, 
##     attach.required = missing(include.only)) 
## {
##     conf.ctrl <- getOption("conflicts.policy")
##     if (is.character(conf.ctrl)) 
##         conf.ctrl <- switch(conf.ctrl, strict = list(error = TRUE, 
##             warn = FALSE), depends.ok = list(error = TRUE, generics.ok = TRUE, 
##             can.mask = c("base", "methods", "utils", "grDevices", 
##                 "graphics", "stats"), depends.ok = TRUE), warning(gettextf("unknown conflict policy: %s", 
##             sQuote(conf.ctrl)), call. = FALSE, domain = NA))
##     if (!is.list(conf.ctrl)) 
##         conf.ctrl <- NULL
##     stopOnConflict <- isTRUE(conf.ctrl$error)
##     if (missing(warn.conflicts)) 
##         warn.conflicts <- !isFALSE(conf.ctrl$warn)
##     if (!missing(include.only) && !missing(exclude)) 
##         stop("only one of 'include.only' and 'exclude' can be used", 
##             call. = FALSE)
##     testRversion <- function(pkgInfo, pkgname, pkgpath) {
##         if (is.null(built <- pkgInfo$Built)) 
##             stop(gettextf("package %s has not been installed properly\n", 
##                 sQuote(pkgname)), call. = FALSE, domain = NA)
##         R_version_built_under <- as.numeric_version(built$R)
##         if (R_version_built_under < "3.0.0") 
##             stop(gettextf("package %s was built before R 3.0.0: please re-install it", 
##                 sQuote(pkgname)), call. = FALSE, domain = NA)
##         current <- getRversion()
##         if (length(Rdeps <- pkgInfo$Rdepends2)) {
##             for (dep in Rdeps) if (length(dep) > 1L) {
##                 target <- dep$version
##                 res <- do.call(dep$op, if (is.character(target)) 
##                   list(as.numeric(R.version[["svn rev"]]), as.numeric(sub("^r", 
##                     "", target)))
##                 else list(current, as.numeric_version(target)))
##                 if (!res) 
##                   stop(gettextf("This is R %s, package %s needs %s %s", 
##                     current, sQuote(pkgname), dep$op, target), 
##                     call. = FALSE, domain = NA)
##             }
##         }
##         if (R_version_built_under > current) 
##             warning(gettextf("package %s was built under R version %s", 
##                 sQuote(pkgname), as.character(built$R)), call. = FALSE, 
##                 domain = NA)
##         platform <- built$Platform
##         r_arch <- .Platform$r_arch
##         if (.Platform$OS.type == "unix") {
##         }
##         else {
##             if (nzchar(platform) && !grepl("mingw", platform)) 
##                 stop(gettextf("package %s was built for %s", 
##                   sQuote(pkgname), platform), call. = FALSE, 
##                   domain = NA)
##         }
##         if (nzchar(r_arch) && file.exists(file.path(pkgpath, 
##             "libs")) && !file.exists(file.path(pkgpath, "libs", 
##             r_arch))) 
##             stop(gettextf("package %s is not installed for 'arch = %s'", 
##                 sQuote(pkgname), r_arch), call. = FALSE, domain = NA)
##     }
##     checkNoGenerics <- function(env, pkg) {
##         nenv <- env
##         ns <- .getNamespace(as.name(pkg))
##         if (!is.null(ns)) 
##             nenv <- asNamespace(ns)
##         if (exists(".noGenerics", envir = nenv, inherits = FALSE)) 
##             TRUE
##         else {
##             !any(startsWith(names(env), ".__T"))
##         }
##     }
##     checkConflicts <- function(package, pkgname, pkgpath, nogenerics, 
##         env) {
##         dont.mind <- c("last.dump", "last.warning", ".Last.value", 
##             ".Random.seed", ".Last.lib", ".onDetach", ".packageName", 
##             ".noGenerics", ".required", ".no_S3_generics", ".Depends", 
##             ".requireCachedGenerics")
##         sp <- search()
##         lib.pos <- which(sp == pkgname)
##         ob <- names(as.environment(lib.pos))
##         if (!nogenerics) {
##             these <- ob[startsWith(ob, ".__T__")]
##             gen <- gsub(".__T__(.*):([^:]+)", "\\1", these)
##             from <- gsub(".__T__(.*):([^:]+)", "\\2", these)
##             gen <- gen[from != package]
##             ob <- ob[!(ob %in% gen)]
##         }
##         ipos <- seq_along(sp)[-c(lib.pos, match(c("Autoloads", 
##             "CheckExEnv"), sp, 0L))]
##         cpos <- NULL
##         conflicts <- vector("list", 0)
##         for (i in ipos) {
##             obj.same <- match(names(as.environment(i)), ob, nomatch = 0L)
##             if (any(obj.same > 0L)) {
##                 same <- ob[obj.same]
##                 same <- same[!(same %in% dont.mind)]
##                 Classobjs <- which(startsWith(same, ".__"))
##                 if (length(Classobjs)) 
##                   same <- same[-Classobjs]
##                 same.isFn <- function(where) vapply(same, exists, 
##                   NA, where = where, mode = "function", inherits = FALSE)
##                 same <- same[same.isFn(i) == same.isFn(lib.pos)]
##                 not.Ident <- function(ch, TRAFO = identity, ...) vapply(ch, 
##                   function(.) !identical(TRAFO(get(., i)), TRAFO(get(., 
##                     lib.pos)), ...), NA)
##                 if (length(same)) 
##                   same <- same[not.Ident(same)]
##                 if (length(same) && identical(sp[i], "package:base")) 
##                   same <- same[not.Ident(same, ignore.environment = TRUE)]
##                 if (length(same)) {
##                   conflicts[[sp[i]]] <- same
##                   cpos[sp[i]] <- i
##                 }
##             }
##         }
##         if (length(conflicts)) {
##             if (stopOnConflict) {
##                 emsg <- ""
##                 pkg <- names(conflicts)
##                 notOK <- vector("list", 0)
##                 for (i in seq_along(conflicts)) {
##                   pkgname <- sub("^package:", "", pkg[i])
##                   if (pkgname %in% canMaskEnv$canMask) 
##                     next
##                   same <- conflicts[[i]]
##                   if (is.list(mask.ok)) 
##                     myMaskOK <- mask.ok[[pkgname]]
##                   else myMaskOK <- mask.ok
##                   if (isTRUE(myMaskOK)) 
##                     same <- NULL
##                   else if (is.character(myMaskOK)) 
##                     same <- setdiff(same, myMaskOK)
##                   if (length(same)) {
##                     notOK[[pkg[i]]] <- same
##                     msg <- .maskedMsg(sort(same), pkg = sQuote(pkg[i]), 
##                       by = cpos[i] < lib.pos)
##                     emsg <- paste(emsg, msg, sep = "\n")
##                   }
##                 }
##                 if (length(notOK)) {
##                   msg <- gettextf("Conflicts attaching package %s:\n%s", 
##                     sQuote(package), emsg)
##                   stop(errorCondition(msg, package = package, 
##                     conflicts = conflicts, class = "packageConflictError"))
##                 }
##             }
##             if (warn.conflicts) {
##                 packageStartupMessage(gettextf("\nAttaching package: %s\n", 
##                   sQuote(package)), domain = NA)
##                 pkg <- names(conflicts)
##                 for (i in seq_along(conflicts)) {
##                   msg <- .maskedMsg(sort(conflicts[[i]]), pkg = sQuote(pkg[i]), 
##                     by = cpos[i] < lib.pos)
##                   packageStartupMessage(msg, domain = NA)
##                 }
##             }
##         }
##     }
##     if (verbose && quietly) 
##         message("'verbose' and 'quietly' are both true; being verbose then ..")
##     if (!missing(package)) {
##         if (is.null(lib.loc)) 
##             lib.loc <- .libPaths()
##         lib.loc <- lib.loc[dir.exists(lib.loc)]
##         if (!character.only) 
##             package <- as.character(substitute(package))
##         if (length(package) != 1L) 
##             stop(gettextf("'%s' must be of length 1", "package"), 
##                 domain = NA)
##         if (is.na(package) || (package == "")) 
##             stop("invalid package name")
##         pkgname <- paste0("package:", package)
##         newpackage <- is.na(match(pkgname, search()))
##         if (newpackage) {
##             pkgpath <- find.package(package, lib.loc, quiet = TRUE, 
##                 verbose = verbose)
##             if (length(pkgpath) == 0L) {
##                 if (length(lib.loc) && !logical.return) 
##                   stop(packageNotFoundError(package, lib.loc, 
##                     sys.call()))
##                 txt <- if (length(lib.loc)) 
##                   gettextf("there is no package called %s", sQuote(package))
##                 else gettext("no library trees found in 'lib.loc'")
##                 if (logical.return) {
##                   if (!quietly) 
##                     warning(txt, domain = NA)
##                   return(FALSE)
##                 }
##                 else stop(txt, domain = NA)
##             }
##             which.lib.loc <- normalizePath(dirname(pkgpath), 
##                 "/", TRUE)
##             pfile <- system.file("Meta", "package.rds", package = package, 
##                 lib.loc = which.lib.loc)
##             if (!nzchar(pfile)) 
##                 stop(gettextf("%s is not a valid installed package", 
##                   sQuote(package)), domain = NA)
##             pkgInfo <- readRDS(pfile)
##             testRversion(pkgInfo, package, pkgpath)
##             if (is.character(pos)) {
##                 npos <- match(pos, search())
##                 if (is.na(npos)) {
##                   warning(gettextf("%s not found on search path, using pos = 2", 
##                     sQuote(pos)), domain = NA)
##                   pos <- 2
##                 }
##                 else pos <- npos
##             }
##             deps <- unique(names(pkgInfo$Depends))
##             depsOK <- isTRUE(conf.ctrl$depends.ok)
##             if (depsOK) {
##                 canMaskEnv <- dynGet("__library_can_mask__", 
##                   NULL)
##                 if (is.null(canMaskEnv)) {
##                   canMaskEnv <- new.env()
##                   canMaskEnv$canMask <- union("base", conf.ctrl$can.mask)
##                   "__library_can_mask__" <- canMaskEnv
##                 }
##                 canMaskEnv$canMask <- unique(c(package, deps, 
##                   canMaskEnv$canMask))
##             }
##             else canMaskEnv <- NULL
##             if (attach.required) 
##                 .getRequiredPackages2(pkgInfo, quietly = quietly, 
##                   lib.loc = c(lib.loc, .libPaths()))
##             cr <- conflictRules(package)
##             if (missing(mask.ok)) 
##                 mask.ok <- cr$mask.ok
##             if (missing(exclude)) 
##                 exclude <- cr$exclude
##             if (isNamespaceLoaded(package)) {
##                 newversion <- as.numeric_version(pkgInfo$DESCRIPTION["Version"])
##                 oldversion <- as.numeric_version(getNamespaceVersion(package))
##                 if (newversion != oldversion) {
##                   tryCatch(unloadNamespace(package), error = function(e) {
##                     P <- if (!is.null(cc <- conditionCall(e))) 
##                       paste("Error in", deparse(cc)[1L], ": ")
##                     else "Error : "
##                     stop(gettextf("Package %s version %s cannot be unloaded:\n %s", 
##                       sQuote(package), oldversion, paste0(P, 
##                         conditionMessage(e), "\n")), domain = NA)
##                   })
##                 }
##             }
##             tt <- tryCatch({
##                 attr(package, "LibPath") <- which.lib.loc
##                 ns <- loadNamespace(package, lib.loc)
##                 env <- attachNamespace(ns, pos = pos, deps, exclude, 
##                   include.only)
##             }, error = function(e) {
##                 P <- if (!is.null(cc <- conditionCall(e))) 
##                   paste(" in", deparse(cc)[1L])
##                 else ""
##                 msg <- gettextf("package or namespace load failed for %s%s:\n %s", 
##                   sQuote(package), P, conditionMessage(e))
##                 if (logical.return && !quietly) 
##                   message(paste("Error:", msg), domain = NA)
##                 else stop(msg, call. = FALSE, domain = NA)
##             })
##             if (logical.return && is.null(tt)) 
##                 return(FALSE)
##             attr(package, "LibPath") <- NULL
##             {
##                 on.exit(detach(pos = pos))
##                 nogenerics <- !.isMethodsDispatchOn() || checkNoGenerics(env, 
##                   package)
##                 if (isFALSE(conf.ctrl$generics.ok) || (stopOnConflict && 
##                   !isTRUE(conf.ctrl$generics.ok))) 
##                   nogenerics <- TRUE
##                 if (stopOnConflict || (warn.conflicts && !exists(".conflicts.OK", 
##                   envir = env, inherits = FALSE))) 
##                   checkConflicts(package, pkgname, pkgpath, nogenerics, 
##                     ns)
##                 on.exit()
##                 if (logical.return) 
##                   return(TRUE)
##                 else return(invisible(.packages()))
##             }
##         }
##         if (verbose && !newpackage) 
##             warning(gettextf("package %s already present in search()", 
##                 sQuote(package)), domain = NA)
##     }
##     else if (!missing(help)) {
##         if (!character.only) 
##             help <- as.character(substitute(help))
##         pkgName <- help[1L]
##         pkgPath <- find.package(pkgName, lib.loc, verbose = verbose)
##         docFiles <- c(file.path(pkgPath, "Meta", "package.rds"), 
##             file.path(pkgPath, "INDEX"))
##         if (file.exists(vignetteIndexRDS <- file.path(pkgPath, 
##             "Meta", "vignette.rds"))) 
##             docFiles <- c(docFiles, vignetteIndexRDS)
##         pkgInfo <- vector("list", 3L)
##         readDocFile <- function(f) {
##             if (basename(f) %in% "package.rds") {
##                 txt <- readRDS(f)$DESCRIPTION
##                 if ("Encoding" %in% names(txt)) {
##                   to <- if (Sys.getlocale("LC_CTYPE") == "C") 
##                     "ASCII//TRANSLIT"
##                   else ""
##                   tmp <- try(iconv(txt, from = txt["Encoding"], 
##                     to = to))
##                   if (!inherits(tmp, "try-error")) 
##                     txt <- tmp
##                   else warning("'DESCRIPTION' has an 'Encoding' field and re-encoding is not possible", 
##                     call. = FALSE)
##                 }
##                 nm <- paste0(names(txt), ":")
##                 formatDL(nm, txt, indent = max(nchar(nm, "w")) + 
##                   3L)
##             }
##             else if (basename(f) %in% "vignette.rds") {
##                 txt <- readRDS(f)
##                 if (is.data.frame(txt) && nrow(txt)) 
##                   cbind(basename(gsub("\\.[[:alpha:]]+$", "", 
##                     txt$File)), paste(txt$Title, paste0(rep.int("(source", 
##                     NROW(txt)), ifelse(nzchar(txt$PDF), ", pdf", 
##                     ""), ")")))
##                 else NULL
##             }
##             else readLines(f)
##         }
##         for (i in which(file.exists(docFiles))) pkgInfo[[i]] <- readDocFile(docFiles[i])
##         y <- list(name = pkgName, path = pkgPath, info = pkgInfo)
##         class(y) <- "packageInfo"
##         return(y)
##     }
##     else {
##         if (is.null(lib.loc)) 
##             lib.loc <- .libPaths()
##         db <- matrix(character(), nrow = 0L, ncol = 3L)
##         nopkgs <- character()
##         for (lib in lib.loc) {
##             a <- .packages(all.available = TRUE, lib.loc = lib)
##             for (i in sort(a)) {
##                 file <- system.file("Meta", "package.rds", package = i, 
##                   lib.loc = lib)
##                 title <- if (nzchar(file)) {
##                   txt <- readRDS(file)
##                   if (is.list(txt)) 
##                     txt <- txt$DESCRIPTION
##                   if ("Encoding" %in% names(txt)) {
##                     to <- if (Sys.getlocale("LC_CTYPE") == "C") 
##                       "ASCII//TRANSLIT"
##                     else ""
##                     tmp <- try(iconv(txt, txt["Encoding"], to, 
##                       "?"))
##                     if (!inherits(tmp, "try-error")) 
##                       txt <- tmp
##                     else warning("'DESCRIPTION' has an 'Encoding' field and re-encoding is not possible", 
##                       call. = FALSE)
##                   }
##                   txt["Title"]
##                 }
##                 else NA
##                 if (is.na(title)) 
##                   title <- " ** No title available ** "
##                 db <- rbind(db, cbind(i, lib, title))
##             }
##             if (length(a) == 0L) 
##                 nopkgs <- c(nopkgs, lib)
##         }
##         dimnames(db) <- list(NULL, c("Package", "LibPath", "Title"))
##         if (length(nopkgs) && !missing(lib.loc)) {
##             pkglist <- paste(sQuote(nopkgs), collapse = ", ")
##             msg <- sprintf(ngettext(length(nopkgs), "library %s contains no packages", 
##                 "libraries %s contain no packages"), pkglist)
##             warning(msg, domain = NA)
##         }
##         y <- list(header = NULL, results = db, footer = NULL)
##         class(y) <- "libraryIQR"
##         return(y)
##     }
##     if (logical.return) 
##         TRUE
##     else invisible(.packages())
## }
## <bytecode: 0x00000233a4174c30>
## <environment: namespace:base>
data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
model<-lm(mpg~hp, data=mtcars)
summary(model)
## 
## Call:
## lm(formula = mpg ~ hp, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.7121 -2.1122 -0.8854  1.5819  8.2360 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 30.09886    1.63392  18.421  < 2e-16 ***
## hp          -0.06823    0.01012  -6.742 1.79e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.863 on 30 degrees of freedom
## Multiple R-squared:  0.6024, Adjusted R-squared:  0.5892 
## F-statistic: 45.46 on 1 and 30 DF,  p-value: 1.788e-07
# Estimate (회귀계수)
# (Intercept) 절편입니다
# y=-0.06823+30.09886
# y=-0.06823+30.09886x


# 산점도와 회귀선 시각화
ggplot(mtcars, aes(x = hp, y = mpg)) +
  geom_point() +  # 산점도
  geom_smooth(method = "lm", se = TRUE) +  # 회귀선과 신뢰구간
  theme_minimal() +
  labs(title = "MPG vs Horsepower", x = "Horsepower", y = "Miles per Gallon")
## `geom_smooth()` using formula = 'y ~ x'

# 새로운 데이터로 예측
new_data <- data.frame(hp = c(100, 150, 200))
predict(model, newdata = new_data)
##        1        2        3 
## 23.27603 19.86462 16.45320
# 다중 선형 회귀
multi_model <- lm(mpg ~ hp + wt + am, data = mtcars)
summary(multi_model)
## 
## Call:
## lm(formula = mpg ~ hp + wt + am, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4221 -1.7924 -0.3788  1.2249  5.5317 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.002875   2.642659  12.867 2.82e-13 ***
## hp          -0.037479   0.009605  -3.902 0.000546 ***
## wt          -2.878575   0.904971  -3.181 0.003574 ** 
## am           2.083710   1.376420   1.514 0.141268    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.538 on 28 degrees of freedom
## Multiple R-squared:  0.8399, Adjusted R-squared:  0.8227 
## F-statistic: 48.96 on 3 and 28 DF,  p-value: 2.908e-11
# 새로운 데이터로 예측
new_multi_data <- data.frame(hp = c(120, 180), wt = c(3.0, 3.5), am = c(0, 1))
predict(multi_model, newdata = new_multi_data)
##       1       2 
## 20.8697 19.2654