This is an basic introduction to R programming language
# Some basic commands in R
## Creating a list of numbers named x
x=c(1:10)
## Creating another list of number named y
y=c(20:19)
# Adding them up together to form c
c=x+y
c
## [1] 21 21 23 23 25 25 27 27 29 29
# See all objects currently available in R environment
ls()
## [1] "c" "x" "y"
# Remove c for example
rm(c)
# Checking c if available in R
ls()
## [1] "x" "y"
# It is possible to remove all objects in R
rm(list=ls())
# Re-check the objects
ls()
## character(0)
Creating matrix in R
# Creating a matrix in R
x=matrix(c(1,2,3,4),nrow = 2,ncol = 2)
# Displaying x
x # If we don't specify the byrow=T, it is automatically assigned under column
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
x=matrix(c(1,2,3,4),nrow=2,ncol=2,byrow = T)
x
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
Creating a normal distribution values using rnorm
x=rnorm(1000,10,10)
hist(x, col=rainbow(30),main="Histogram",xlab = "Values")
See whole dataset
# Using this command
fix(mtcars)
There are few other functions in R
library(ISLR)
# Read dataset in R
df<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/Dataset/LungCapData.csv",na.strings = "?",header = T,sep=";")
# Omit missing data
na.omit(df)
head(df)
Pair plot
# Scatter plots
pairs(~df$LungCap+df$Age+df$Height)
plot(df$LungCap,df$Age)
identify(df$LungCap,df$Age,df$Gender)
## integer(0)
# College dataset
df<-College
head(df)
# Give the first column a name
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.3
## -- Attaching packages ------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.3.4 v dplyr 0.7.4
## v tidyr 0.7.2 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'purrr' was built under R version 3.4.3
## Warning: package 'dplyr' was built under R version 3.4.3
## -- Conflicts ---------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
head(df)
Logistic Regression Model