Now, to read tha data,
data=read.csv(file.choose(),header=T,sep=",")
data
## Age income edu Gender
## 1 54 85000 18 1
## 2 24 20000 16 1
## 3 23 19000 16 1
## 4 16 12000 12 0
## 5 18 7000 12 0
## 6 23 15000 12 0
## 7 50 80000 18 1
## 8 23 10000 14 1
## 9 22 25000 16 1
## 10 29 30000 18 1
## 11 26 30000 16 0
## 12 25 20000 16 0
## 13 26 15000 16 0
## 14 24 10000 16 1
## 15 24 10000 16 0
## 16 24 10000 16 0
## 17 25 20000 16 0
## 18 23 50000 18 1
## 19 34 25000 16 1
## 20 24 40000 16 1
## 21 18 50000 14 1
## 22 21 25000 16 0
## 23 24 60000 16 1
## 24 26 20000 16 1
## 25 20 20000 12 1
## 26 21 20000 14 0
## 27 21 50000 16 1
## 28 23 40000 16 1
## 29 60 50000 18 1
## 30 53 50000 12 1
## 31 60 80000 16 1
## 32 36 150000 18 1
## 33 53 200000 18 1
## 34 32 50000 16 1
## 35 28 15000 12 1
## 36 23 15000 12 1
## 37 21 50000 16 1
## 38 26 30000 14 1
## 39 21 20000 12 1
## 40 22 50000 16 1
## 41 62 80000 16 1
## 42 31 40000 16 0
## 43 35 35000 14 1
## 44 35 40000 14 1
## 45 30 20000 16 1
## 46 25 14000 16 1
## 47 21 10000 14 1
## 48 20 15000 14 1
## 49 29 120000 18 1
## 50 26 12000 14 1
## 51 32 80000 16 1
## 52 27 15000 14 1
## 53 24 20000 12 1
## 54 36 30000 16 1
## 55 25 200000 16 1
## 56 18 15000 12 1
## 57 24 40000 16 1
## 58 21 12000 12 1
## 59 20 40000 14 1
## 60 22 30000 16 1
## 61 39 50000 18 1
## 62 22 12000 12 0
## 63 19 13000 14 0
## 64 26 40000 16 1
## 65 38 100000 16 1
## 66 26 20000 14 1
## 67 25 14000 16 1
## 68 22 15000 16 1
## 69 25 48000 16 1
## 70 25 25000 16 0
## 71 22 35000 16 1
## 72 24 20000 16 1
## 73 25 25000 16 1
## 74 22 25000 14 1
## 75 30 30000 16 1
## 76 24 250000 16 1
## 77 39 100000 16 1
## 78 28 25000 16 0
## 79 44 30000 16 1
## 80 24 15000 16 1
## 81 26 20000 16 0
## 82 25 20000 16 1
## 83 26 20000 16 0
## 84 23 25000 14 0
## 85 23 12500 16 1
## 86 23 10000 14 1
## 87 24 10000 14 1
## 88 25 15000 12 0
## 89 25 20000 18 1
## 90 21 10000 16 1
## 91 24 18000 16 0
## 92 22 15000 16 1
## 93 24 15000 16 0
## 94 23 12000 16 1
## 95 30 50000 14 1
## 96 24 70000 18 1
## 97 23 50000 16 1
## 98 24 50000 16 1
## 99 22 20000 16 1
## 100 32 150000 18 1
Gender=as.factor(data$Gender)
Gender
## [1] 1 1 1 0 0 0 1 1 1 1 0 0 0 1 0 0 0 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1
## [36] 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0
## [71] 1 1 1 1 1 1 1 0 1 1 0 1 0 0 1 1 1 0 1 1 0 1 0 1 1 1 1 1 1 1
## Levels: 0 1
edu=as.factor(data$edu)
edu
## [1] 18 16 16 12 12 12 18 14 16 18 16 16 16 16 16 16 16 18 16 16 14 16 16
## [24] 16 12 14 16 16 18 12 16 18 18 16 12 12 16 14 12 16 16 16 14 14 16 16
## [47] 14 14 18 14 16 14 12 16 16 12 16 12 14 16 18 12 14 16 16 14 16 16 16
## [70] 16 16 16 16 14 16 16 16 16 16 16 16 16 16 14 16 14 14 12 18 16 16 16
## [93] 16 16 14 18 16 16 16 18
## Levels: 12 14 16 18
str(data)
## 'data.frame': 100 obs. of 4 variables:
## $ Age : int 54 24 23 16 18 23 50 23 22 29 ...
## $ income: int 85000 20000 19000 12000 7000 15000 80000 10000 25000 30000 ...
## $ edu : int 18 16 16 12 12 12 18 14 16 18 ...
## $ Gender: int 1 1 1 0 0 0 1 1 1 1 ...
income=data[,2]
edu=data[,3]
Age=data[,1]
Gender=data[,4]
data=data.frame(Age, income, edu, Gender)
data
## Age income edu Gender
## 1 54 85000 18 1
## 2 24 20000 16 1
## 3 23 19000 16 1
## 4 16 12000 12 0
## 5 18 7000 12 0
## 6 23 15000 12 0
## 7 50 80000 18 1
## 8 23 10000 14 1
## 9 22 25000 16 1
## 10 29 30000 18 1
## 11 26 30000 16 0
## 12 25 20000 16 0
## 13 26 15000 16 0
## 14 24 10000 16 1
## 15 24 10000 16 0
## 16 24 10000 16 0
## 17 25 20000 16 0
## 18 23 50000 18 1
## 19 34 25000 16 1
## 20 24 40000 16 1
## 21 18 50000 14 1
## 22 21 25000 16 0
## 23 24 60000 16 1
## 24 26 20000 16 1
## 25 20 20000 12 1
## 26 21 20000 14 0
## 27 21 50000 16 1
## 28 23 40000 16 1
## 29 60 50000 18 1
## 30 53 50000 12 1
## 31 60 80000 16 1
## 32 36 150000 18 1
## 33 53 200000 18 1
## 34 32 50000 16 1
## 35 28 15000 12 1
## 36 23 15000 12 1
## 37 21 50000 16 1
## 38 26 30000 14 1
## 39 21 20000 12 1
## 40 22 50000 16 1
## 41 62 80000 16 1
## 42 31 40000 16 0
## 43 35 35000 14 1
## 44 35 40000 14 1
## 45 30 20000 16 1
## 46 25 14000 16 1
## 47 21 10000 14 1
## 48 20 15000 14 1
## 49 29 120000 18 1
## 50 26 12000 14 1
## 51 32 80000 16 1
## 52 27 15000 14 1
## 53 24 20000 12 1
## 54 36 30000 16 1
## 55 25 200000 16 1
## 56 18 15000 12 1
## 57 24 40000 16 1
## 58 21 12000 12 1
## 59 20 40000 14 1
## 60 22 30000 16 1
## 61 39 50000 18 1
## 62 22 12000 12 0
## 63 19 13000 14 0
## 64 26 40000 16 1
## 65 38 100000 16 1
## 66 26 20000 14 1
## 67 25 14000 16 1
## 68 22 15000 16 1
## 69 25 48000 16 1
## 70 25 25000 16 0
## 71 22 35000 16 1
## 72 24 20000 16 1
## 73 25 25000 16 1
## 74 22 25000 14 1
## 75 30 30000 16 1
## 76 24 250000 16 1
## 77 39 100000 16 1
## 78 28 25000 16 0
## 79 44 30000 16 1
## 80 24 15000 16 1
## 81 26 20000 16 0
## 82 25 20000 16 1
## 83 26 20000 16 0
## 84 23 25000 14 0
## 85 23 12500 16 1
## 86 23 10000 14 1
## 87 24 10000 14 1
## 88 25 15000 12 0
## 89 25 20000 18 1
## 90 21 10000 16 1
## 91 24 18000 16 0
## 92 22 15000 16 1
## 93 24 15000 16 0
## 94 23 12000 16 1
## 95 30 50000 14 1
## 96 24 70000 18 1
## 97 23 50000 16 1
## 98 24 50000 16 1
## 99 22 20000 16 1
## 100 32 150000 18 1
str(data)
## 'data.frame': 100 obs. of 4 variables:
## $ Age : int 54 24 23 16 18 23 50 23 22 29 ...
## $ income: int 85000 20000 19000 12000 7000 15000 80000 10000 25000 30000 ...
## $ edu : int 18 16 16 12 12 12 18 14 16 18 ...
## $ Gender: int 1 1 1 0 0 0 1 1 1 1 ...
nrow(data)
## [1] 100
ncol(data)
## [1] 4
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
plot(data$Age, data$income)
The above Scatter plot shows the relationship of Age with respect to Income
qplot(data$Age, data$income)
The above qplot shows the samerelationship with more effective view that is age with circles and income with triangles.
qplot(Age, income,data=data, geom=c("line", "point"))
Now, in this scatterplot, the point of age and income are joined with the lines.
ggplot(data, aes(x=Age, y=income)) + geom_point()
Here, just the comparison of age and income is shown through ggplot.
data[, c("Age", "income")]
## Age income
## 1 54 85000
## 2 24 20000
## 3 23 19000
## 4 16 12000
## 5 18 7000
## 6 23 15000
## 7 50 80000
## 8 23 10000
## 9 22 25000
## 10 29 30000
## 11 26 30000
## 12 25 20000
## 13 26 15000
## 14 24 10000
## 15 24 10000
## 16 24 10000
## 17 25 20000
## 18 23 50000
## 19 34 25000
## 20 24 40000
## 21 18 50000
## 22 21 25000
## 23 24 60000
## 24 26 20000
## 25 20 20000
## 26 21 20000
## 27 21 50000
## 28 23 40000
## 29 60 50000
## 30 53 50000
## 31 60 80000
## 32 36 150000
## 33 53 200000
## 34 32 50000
## 35 28 15000
## 36 23 15000
## 37 21 50000
## 38 26 30000
## 39 21 20000
## 40 22 50000
## 41 62 80000
## 42 31 40000
## 43 35 35000
## 44 35 40000
## 45 30 20000
## 46 25 14000
## 47 21 10000
## 48 20 15000
## 49 29 120000
## 50 26 12000
## 51 32 80000
## 52 27 15000
## 53 24 20000
## 54 36 30000
## 55 25 200000
## 56 18 15000
## 57 24 40000
## 58 21 12000
## 59 20 40000
## 60 22 30000
## 61 39 50000
## 62 22 12000
## 63 19 13000
## 64 26 40000
## 65 38 100000
## 66 26 20000
## 67 25 14000
## 68 22 15000
## 69 25 48000
## 70 25 25000
## 71 22 35000
## 72 24 20000
## 73 25 25000
## 74 22 25000
## 75 30 30000
## 76 24 250000
## 77 39 100000
## 78 28 25000
## 79 44 30000
## 80 24 15000
## 81 26 20000
## 82 25 20000
## 83 26 20000
## 84 23 25000
## 85 23 12500
## 86 23 10000
## 87 24 10000
## 88 25 15000
## 89 25 20000
## 90 21 10000
## 91 24 18000
## 92 22 15000
## 93 24 15000
## 94 23 12000
## 95 30 50000
## 96 24 70000
## 97 23 50000
## 98 24 50000
## 99 22 20000
## 100 32 150000
ggplot(data, aes(x=Age, y=income)) + geom_point()
ggplot(data, aes(x=Age, y=income)) + geom_point(shape=21)
ggplot(data, aes(x=Age, y=income)) + geom_point(size=1.5)
In the above diagrams, we just still comparing the age vs income with different shape sizes using ggplot.
data[, c("Gender", "Age", "income")]
## Gender Age income
## 1 1 54 85000
## 2 1 24 20000
## 3 1 23 19000
## 4 0 16 12000
## 5 0 18 7000
## 6 0 23 15000
## 7 1 50 80000
## 8 1 23 10000
## 9 1 22 25000
## 10 1 29 30000
## 11 0 26 30000
## 12 0 25 20000
## 13 0 26 15000
## 14 1 24 10000
## 15 0 24 10000
## 16 0 24 10000
## 17 0 25 20000
## 18 1 23 50000
## 19 1 34 25000
## 20 1 24 40000
## 21 1 18 50000
## 22 0 21 25000
## 23 1 24 60000
## 24 1 26 20000
## 25 1 20 20000
## 26 0 21 20000
## 27 1 21 50000
## 28 1 23 40000
## 29 1 60 50000
## 30 1 53 50000
## 31 1 60 80000
## 32 1 36 150000
## 33 1 53 200000
## 34 1 32 50000
## 35 1 28 15000
## 36 1 23 15000
## 37 1 21 50000
## 38 1 26 30000
## 39 1 21 20000
## 40 1 22 50000
## 41 1 62 80000
## 42 0 31 40000
## 43 1 35 35000
## 44 1 35 40000
## 45 1 30 20000
## 46 1 25 14000
## 47 1 21 10000
## 48 1 20 15000
## 49 1 29 120000
## 50 1 26 12000
## 51 1 32 80000
## 52 1 27 15000
## 53 1 24 20000
## 54 1 36 30000
## 55 1 25 200000
## 56 1 18 15000
## 57 1 24 40000
## 58 1 21 12000
## 59 1 20 40000
## 60 1 22 30000
## 61 1 39 50000
## 62 0 22 12000
## 63 0 19 13000
## 64 1 26 40000
## 65 1 38 100000
## 66 1 26 20000
## 67 1 25 14000
## 68 1 22 15000
## 69 1 25 48000
## 70 0 25 25000
## 71 1 22 35000
## 72 1 24 20000
## 73 1 25 25000
## 74 1 22 25000
## 75 1 30 30000
## 76 1 24 250000
## 77 1 39 100000
## 78 0 28 25000
## 79 1 44 30000
## 80 1 24 15000
## 81 0 26 20000
## 82 1 25 20000
## 83 0 26 20000
## 84 0 23 25000
## 85 1 23 12500
## 86 1 23 10000
## 87 1 24 10000
## 88 0 25 15000
## 89 1 25 20000
## 90 1 21 10000
## 91 0 24 18000
## 92 1 22 15000
## 93 0 24 15000
## 94 1 23 12000
## 95 1 30 50000
## 96 1 24 70000
## 97 1 23 50000
## 98 1 24 50000
## 99 1 22 20000
## 100 1 32 150000
str(data)
## 'data.frame': 100 obs. of 4 variables:
## $ Age : int 54 24 23 16 18 23 50 23 22 29 ...
## $ income: int 85000 20000 19000 12000 7000 15000 80000 10000 25000 30000 ...
## $ edu : int 18 16 16 12 12 12 18 14 16 18 ...
## $ Gender: int 1 1 1 0 0 0 1 1 1 1 ...
Gender=as.factor(data$Gender)
edu=as.factor(data$edu)
data=data.frame(Age, income, edu, Gender)
str(data)
## 'data.frame': 100 obs. of 4 variables:
## $ Age : int 54 24 23 16 18 23 50 23 22 29 ...
## $ income: int 85000 20000 19000 12000 7000 15000 80000 10000 25000 30000 ...
## $ edu : Factor w/ 4 levels "12","14","16",..: 4 3 3 1 1 1 4 2 3 4 ...
## $ Gender: Factor w/ 2 levels "0","1": 2 2 2 1 1 1 2 2 2 2 ...
ggplot(data, aes(x=Age, y=income, colour=Gender)) + geom_point()
In the above graphs, Age vs Income is compared with respect to the gender’
ps <- ggplot(data, aes(x=Age, y=income))
ps + geom_point()
data[, c("Gender", "Age", "income")]
## Gender Age income
## 1 1 54 85000
## 2 1 24 20000
## 3 1 23 19000
## 4 0 16 12000
## 5 0 18 7000
## 6 0 23 15000
## 7 1 50 80000
## 8 1 23 10000
## 9 1 22 25000
## 10 1 29 30000
## 11 0 26 30000
## 12 0 25 20000
## 13 0 26 15000
## 14 1 24 10000
## 15 0 24 10000
## 16 0 24 10000
## 17 0 25 20000
## 18 1 23 50000
## 19 1 34 25000
## 20 1 24 40000
## 21 1 18 50000
## 22 0 21 25000
## 23 1 24 60000
## 24 1 26 20000
## 25 1 20 20000
## 26 0 21 20000
## 27 1 21 50000
## 28 1 23 40000
## 29 1 60 50000
## 30 1 53 50000
## 31 1 60 80000
## 32 1 36 150000
## 33 1 53 200000
## 34 1 32 50000
## 35 1 28 15000
## 36 1 23 15000
## 37 1 21 50000
## 38 1 26 30000
## 39 1 21 20000
## 40 1 22 50000
## 41 1 62 80000
## 42 0 31 40000
## 43 1 35 35000
## 44 1 35 40000
## 45 1 30 20000
## 46 1 25 14000
## 47 1 21 10000
## 48 1 20 15000
## 49 1 29 120000
## 50 1 26 12000
## 51 1 32 80000
## 52 1 27 15000
## 53 1 24 20000
## 54 1 36 30000
## 55 1 25 200000
## 56 1 18 15000
## 57 1 24 40000
## 58 1 21 12000
## 59 1 20 40000
## 60 1 22 30000
## 61 1 39 50000
## 62 0 22 12000
## 63 0 19 13000
## 64 1 26 40000
## 65 1 38 100000
## 66 1 26 20000
## 67 1 25 14000
## 68 1 22 15000
## 69 1 25 48000
## 70 0 25 25000
## 71 1 22 35000
## 72 1 24 20000
## 73 1 25 25000
## 74 1 22 25000
## 75 1 30 30000
## 76 1 24 250000
## 77 1 39 100000
## 78 0 28 25000
## 79 1 44 30000
## 80 1 24 15000
## 81 0 26 20000
## 82 1 25 20000
## 83 0 26 20000
## 84 0 23 25000
## 85 1 23 12500
## 86 1 23 10000
## 87 1 24 10000
## 88 0 25 15000
## 89 1 25 20000
## 90 1 21 10000
## 91 0 24 18000
## 92 1 22 15000
## 93 0 24 15000
## 94 1 23 12000
## 95 1 30 50000
## 96 1 24 70000
## 97 1 23 50000
## 98 1 24 50000
## 99 1 22 20000
## 100 1 32 150000
ggplot(data, aes(x=Age, y=income, colour=Gender)) + geom_point()
ggplot(data, aes(x=Age, y=income, colour=Gender, shape=Gender)) + geom_point()
ggplot(data, aes(x=Age, y=income, colour=edu)) + geom_point()
ggplot(data, aes(x=Age, y=income, colour=edu, shape=edu)) + geom_point()
All above graphs show the relationship of age, income and gender with respect to the colour and shape
ps <- ggplot(data, aes(x=Age, y=income))
ps + geom_point()
ps + geom_point(alpha=.1)
ps + geom_point(alpha=.01)
Still age vs income comparison in above graphs.
library(hexbin)
## Warning: package 'hexbin' was built under R version 3.5.3
ps + stat_binhex() +scale_fill_gradient(low="lightblue", high="red", limits=c(0, 8000))
library(plyr)
## Warning: package 'plyr' was built under R version 3.5.3
boxplot(income~Gender,data=data)
boxplot(income~edu, data=data)
boxplot(income~Gender+edu, data=data)
qplot(Gender, income, geom="boxplot")
qplot(interaction(Gender, edu), income, geom="boxplot")
ggplot(data, aes(x=interaction(Gender, edu), income))+geom_boxplot()
library(gcookbook)
## Warning: package 'gcookbook' was built under R version 3.5.2
ggplot(data, aes(edu, income))+geom_bar(stat="identity")
qplot(factor(edu), data=data)
ggplot(data, aes(Gender, income))+geom_bar(stat="identity",fill="lightblue", col="blue")
ggplot(data, aes(x=edu, y=income, fill=Gender))+geom_bar(position="dodge", stat="identity", col="blue")
ggplot(data, aes(edu, income, cultivar=Gender))+geom_bar(position="dodge", stat="identity",col="blue")+scale_fill_brewer(palette="pastel1")
## Warning in pal_name(palette, type): Unknown palette pastel1
ggplot(data, aes(edu))+geom_bar()
ggplot(data, aes(x=edu, y=income, fill=Gender)) +geom_bar(stat="identity", width=0.5, position="dodge")
ggplot(data, aes(x=edu, y=income, fill=Gender)) +geom_bar(stat="identity", width=0.5, position=position_dodge(0.7))
ggplot(data, aes(x=edu, y=income, fill=Gender)) +geom_bar(stat="identity") +guides(fill=guide_legend(reverse=TRUE))
ggplot(data, aes(x=edu, y=income, fill=Gender)) +geom_bar(stat="identity", colour="black") +guides(fill=guide_legend(reverse=TRUE)) +scale_fill_brewer(palette="Pastel1")
data=data[1:6,]
ggplot(data, aes(x=edu, y=income, fill=Gender)) +geom_bar(stat="identity", position="dodge") +geom_text(aes(label=income), vjust=1.3, colour="black",position=position_dodge(.9), size=3)
ggplot(data, aes(x=edu, y=income, fill=Gender)) +geom_bar(stat="identity", position="dodge") +geom_text(aes(label=income), vjust=-4, colour="black",position=position_dodge(.9), size=3)
hist(data$income)
hist(data$Age)
hist(data$income, breaks=10)
qplot(income, data=data, binwidth=10)
ggplot(data, aes(x=Age)) + geom_histogram(binwidth=4)
ggplot(data, aes(x=Age)) +geom_histogram(binwidth=5, fill="white", colour="black")
ggplot(data, aes(x=Age)) + geom_histogram(fill="white", colour="black") +facet_grid(Gender ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data, aes(x=Age, fill=Gender)) +geom_histogram(position="identity", alpha=0.4)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data, aes(x=income)) + geom_density()
ggplot(data, aes(x=income)) + geom_line(stat="density") +expand_limits(y=0)
ggplot(data, aes(x=income)) +geom_line(stat="density", adjust=.25, colour="red") +geom_line(stat="density") +geom_line(stat="density", adjust=2, colour="blue")
ggplot(data, aes(x=Age, y=..density..)) +geom_histogram(fill="cornsilk", colour="grey60", size=.2) +geom_density() +xlim(35, 105)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 5 rows containing non-finite values (stat_bin).
## Warning: Removed 5 rows containing non-finite values (stat_density).
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Removed 1 rows containing missing values (geom_bar).
ggplot(data, aes(x=income, fill=Gender)) + geom_density(alpha=.3)
### Here, in the density graphs, different colors and lines shows the comparisons of age vs income with respect to the age and gender.
p <- ggplot(data, aes(x=Gender, y=income))
p + geom_violin()
ggplot(data, aes(x=edu, y=income)) + geom_boxplot() +stat_summary(fun.y="mean", geom="point", shape=23, size=3, fill="white")
### By looking at the violen graphs, we come to know that males earn more as compared to the females. Also, if we consider the education level, higher the education, higher level of income you will recieve.
ggplot(data, aes(x=Gender, y=Age)) +geom_dotplot(binaxis="y", binwidth=.5, stackdir="center")
ggplot(data, aes(x=Gender, y=Age))+geom_boxplot(outlier.colour=NA, width=.4) +
geom_dotplot(binaxis="y", binwidth=.5, stackdir="center", fill=NA)
p <- ggplot(data, aes(x=Age, y=income))
p + geom_point() + stat_density2d()
p + stat_density2d(aes(colour=..level..))
p + stat_density2d(aes(fill=..density..), geom="raster", contour=FALSE)
p + geom_point() + stat_density2d(aes(alpha=..density..), geom="tile", contour=FALSE)
p <- ggplot(data.frame(x=c(-3,3)), aes(x=x)) + stat_function(fun = dnorm)
p + annotate("text", x=2, y=0.3, parse=TRUE,label="frac(1, sqrt(2 * pi)) * e ^ {-x^2 / 2}")
p + annotate("text", x=0, y=0.05, parse=TRUE, size=4, label="'Function: ' * y==frac(1, sqrt(2*pi)) * e^{-x^2/2}")
p <- ggplot(data, aes(x=Age, y=income)) + geom_point()
p <- ggplot(data, aes(x=Age, y=income, colour=Gender)) + geom_point()
p + geom_hline(yintercept=40000) + geom_vline(xintercept=25)
p + geom_abline(intercept=120000, slope=1.75)
p + annotate("text", x=3, y=20000, label="Group 1") +annotate("text", x=3, y=66, label="Group 2")
p + annotate("text", x=-Inf, y=Inf, label="Upper left", hjust=-.2, vjust=2) +annotate("text", x=mean(range(data$Age)), y=-Inf, vjust=-0.4,label="Bottom middle")
hw_means <- ddply(data, "Gender", summarise, income=mean(income))
hw_means
## Gender income
## 1 0 11333.33
## 2 1 41333.33
p + geom_hline(aes(yintercept=income, colour=Gender), data=hw_means,linetype="dashed", size=1)
p <- ggplot(subset(data, Gender=="Gender"), aes(x=Age, y=income)) +
geom_line()
p + annotate("segment", x=18, xend=62, y=25588, yend=200000)
p <- ggplot(data, aes(x=Age, y=income)) + geom_point()
p + ggtitle("Age and income of Visitors")
p + ggtitle("Age and income of Visitors") +theme(plot.title=element_text(vjust = -8))
p + annotate("text", x=mean(range(heightweight$ageYear)), y=Inf,label="Age and income of Visitors", vjust=1.5, size=4)
p + annotate("text", x=mean(range(data$income)), y=Inf,label="Age and income of Visitors", vjust=1.5, size=6)
p + annotate("text", x=25, y=53, label="Some text", size = 7, family="Times",fontface="bold.italic", colour="red")
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database
p + theme_grey(base_size=16, base_family="Times")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database
p <- ggplot(data, aes(x=Age, y=income, colour=Gender)) + geom_point()
p <- ggplot(data, aes(x=Age, y=income)) + geom_point()
p + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
p + theme(panel.grid.major.y = element_blank(),panel.grid.minor.y = element_blank())
p + theme_grey()
p <- ggplot(data, aes(x=Age, y=income, colour=Gender)) + geom_point()
p <- ggplot(data, aes(x=Age, y=income)) + geom_point()
p + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
p + theme(panel.grid.major.y = element_blank(),panel.grid.minor.y = element_blank())
p<- ggplot(data, aes(x=edu, y=income, fill=edu)) + geom_boxplot()
p
p + theme(legend.position="none")
p <- ggplot(data, aes(x=edu, y=income, fill=edu)) + geom_boxplot() +scale_fill_brewer(palette="Pastel2")
p + theme(legend.position="top")
p + theme(legend.position=c(1,0), legend.justification=c(1,0))
p + scale_fill_discrete(limits=c("trt1", "trt2", "ctrl"))
## Scale for 'fill' is already present. Adding another scale for 'fill',
## which will replace the existing scale.
hw <- ggplot(data, aes(x=Age, y=income, colour=Gender)) +geom_point(aes(size=Age)) + scale_size_continuous(range=c(1,4))
hw
hw1 <- ggplot(data, aes(x=Age, y=income, shape=Gender, colour=Gender)) +geom_point()
hw1
p <- ggplot(data, aes(x=Gender, y=income, fill=Gender)) + geom_boxplot()
p + theme(legend.text=element_text(face="italic", family="Times", colour="red",size=14))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
model <- lm(income ~ Age + I(Age^2), data=data)
summary(model)
##
## Call:
## lm(formula = income ~ Age + I(Age^2), data = data)
##
## Residuals:
## 1 2 3 4 5 6
## -44.87 1485.54 1858.14 2939.74 -4096.69 -2141.86
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 619.97 15610.89 0.040 0.971
## Age 91.35 1053.90 0.087 0.936
## I(Age^2) 27.26 14.42 1.891 0.155
##
## Residual standard error: 3448 on 3 degrees of freedom
## Multiple R-squared: 0.9916, Adjusted R-squared: 0.986
## F-statistic: 176.9 on 2 and 3 DF, p-value: 0.0007708
xmin <- min(data$Age)
xmax <- max(data$Age)
predicted <- data.frame(Age=seq(xmin, xmax, length.out=100))
predicted$income <- predict(model, predicted)
predicted
## Age income
## 1 16.00000 9060.265
## 2 16.38384 9434.182
## 3 16.76768 9816.132
## 4 17.15152 10206.115
## 5 17.53535 10604.130
## 6 17.91919 11010.178
## 7 18.30303 11424.259
## 8 18.68687 11846.373
## 9 19.07071 12276.519
## 10 19.45455 12714.698
## 11 19.83838 13160.910
## 12 20.22222 13615.155
## 13 20.60606 14077.432
## 14 20.98990 14547.742
## 15 21.37374 15026.085
## 16 21.75758 15512.461
## 17 22.14141 16006.869
## 18 22.52525 16509.310
## 19 22.90909 17019.784
## 20 23.29293 17538.290
## 21 23.67677 18064.829
## 22 24.06061 18599.401
## 23 24.44444 19142.006
## 24 24.82828 19692.643
## 25 25.21212 20251.313
## 26 25.59596 20818.016
## 27 25.97980 21392.752
## 28 26.36364 21975.520
## 29 26.74747 22566.321
## 30 27.13131 23165.155
## 31 27.51515 23772.022
## 32 27.89899 24386.921
## 33 28.28283 25009.853
## 34 28.66667 25640.818
## 35 29.05051 26279.815
## 36 29.43434 26926.845
## 37 29.81818 27581.908
## 38 30.20202 28245.004
## 39 30.58586 28916.132
## 40 30.96970 29595.293
## 41 31.35354 30282.487
## 42 31.73737 30977.714
## 43 32.12121 31680.973
## 44 32.50505 32392.265
## 45 32.88889 33111.590
## 46 33.27273 33838.948
## 47 33.65657 34574.338
## 48 34.04040 35317.761
## 49 34.42424 36069.217
## 50 34.80808 36828.705
## 51 35.19192 37596.226
## 52 35.57576 38371.780
## 53 35.95960 39155.367
## 54 36.34343 39946.986
## 55 36.72727 40746.639
## 56 37.11111 41554.324
## 57 37.49495 42370.041
## 58 37.87879 43193.791
## 59 38.26263 44025.575
## 60 38.64646 44865.390
## 61 39.03030 45713.239
## 62 39.41414 46569.120
## 63 39.79798 47433.034
## 64 40.18182 48304.981
## 65 40.56566 49184.961
## 66 40.94949 50072.973
## 67 41.33333 50969.018
## 68 41.71717 51873.095
## 69 42.10101 52785.206
## 70 42.48485 53705.349
## 71 42.86869 54633.525
## 72 43.25253 55569.734
## 73 43.63636 56513.975
## 74 44.02020 57466.249
## 75 44.40404 58426.556
## 76 44.78788 59394.895
## 77 45.17172 60371.268
## 78 45.55556 61355.673
## 79 45.93939 62348.110
## 80 46.32323 63348.581
## 81 46.70707 64357.084
## 82 47.09091 65373.620
## 83 47.47475 66398.189
## 84 47.85859 67430.790
## 85 48.24242 68471.425
## 86 48.62626 69520.091
## 87 49.01010 70576.791
## 88 49.39394 71641.523
## 89 49.77778 72714.289
## 90 50.16162 73795.086
## 91 50.54545 74883.917
## 92 50.92929 75980.780
## 93 51.31313 77085.676
## 94 51.69697 78198.605
## 95 52.08081 79319.567
## 96 52.46465 80448.561
## 97 52.84848 81585.588
## 98 53.23232 82730.648
## 99 53.61616 83883.740
## 100 54.00000 85044.865
sp <- ggplot(data, aes(x=Age, y=income)) +geom_point(colour="grey40")
sp + geom_line(data=predicted, size=1)
modlinear <- lm(income ~ Age, data)
modloess <- loess(income ~ Age, data)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15.81
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 7.19
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 972.82
summary(modloess)
## Call:
## loess(formula = income ~ Age, data = data)
##
## Number of Observations: 6
## Equivalent Number of Parameters: 4.52
## Residual Standard Error: 3920
## Trace of smoother matrix: 5 (exact)
##
## Control settings:
## span : 0.75
## degree : 2
## family : gaussian
## surface : interpolate cell = 0.2
## normalize: TRUE
## parametric: FALSE
## drop.square: FALSE