library(openintro)
## 载入需要的程序包:airports
## 载入需要的程序包:cherryblossom
## 载入需要的程序包:usdata
library(tidyverse)
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v dplyr 1.1.4 v readr 2.1.6
## v forcats 1.0.1 v stringr 1.6.0
## v ggplot2 4.0.1 v tibble 3.3.1
## v lubridate 1.9.4 v tidyr 1.3.2
## v purrr 1.2.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
?gpa
## 打开httpd帮助服务器… 好了
gpa:the grade of students studywork:the time about how many hours students spend on study sleepnight: the time about how many hours student sleep out:the time that students go out gender: female or male diferent kinds of people (2)
ggplot(data=gpa)+
geom_point(mapping=aes(x=studyweek,y=gpa))
Within the 30-50 range, GPAs tend to be generally high; between 15 and 30, GPAs vary considerably; whereas in the 0-15 bracket, GPAs are evenly distributed without discernible patterns.
ggplot(data=gpa)+
geom_point(mapping=aes(x=out,y=gpa))
Most people spend between one and three hours on leisure activities, and during this period, those with higher GPAs constitute the majority.
ggplot(data=gpa)+
geom_point(mapping=aes(x=out,y=sleepnight))
Generally speaking, those who spend longer periods travelling tend to
require more sleep. Moreover, almost everyone sleeps for at least six
hours.
ggplot(data=gpa)+
geom_point(mapping=aes(x=gender,y=studyweek))
Both males and females concentrated their weekly study time within the 0-15 hour range, but a higher proportion of female students than male students devoted more than 15 hours to study.
ggplot(data=gpa)+
geom_point(mapping=aes(x=gender,y=out), position = "jitter")
Most girls spend between one and three hours out, while half of the boys spend one to two hours out and the other half spend three to four hours out.
ggplot(data=gpa)+
geom_point(mapping=aes(x=gender,y=gpa))
Female students exhibit a greater number of high GPAs, though low scores are also not uncommon. Male students display fewer high GPAs, but these tend to be concentrated within the range of 3.4 to 3.8.
?loans_full_schema
ggplot(loans_full_schema,aes(x=emp_length))+
geom_histogram(aes(y=after_stat(density)),
boundary=0,color="black",fill="yellow")+
geom_density(linewidth=1.5,adjust=0.7)
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 817 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 817 rows containing non-finite outside the scale range
## (`stat_density()`).
ggplot(data=loans_full_schema)+
geom_point(mapping=aes(x=emp_length,y=annual_income), position = "jitter")
## Warning: Removed 817 rows containing missing values or values outside the scale range
## (`geom_point()`).
Generally speaking, income between five and ten years is lower than that
between zero and five years, while income after ten years exceeds all
preceding periods. Within each timeframe, there are individuals
commanding very high incomes.
ggplot(loans_full_schema) +
geom_bin_2d(aes(x = emp_length, y = debt_to_income),binwidth = c(1, 20))
## Warning: Removed 818 rows containing non-finite outside the scale range
## (`stat_bin2d()`).
For those with 1 to 4 years of employment, the debt-to-income ratio
tends to be higher, whereas it is lower for those with 5 to 9 years of
employment.
ggplot(data = loans_full_schema) +
geom_point(mapping = aes(x = homeownership, y = debt_to_income),position="jitter") +
facet_wrap(~ emp_length) +
ylim(0,250)
## Warning: Removed 43 rows containing missing values or values outside the scale range
## (`geom_point()`).
labs(title = "the relationship of homeownership and debt-to-income",
x = "homeownership",
y = "debt_to_income") +
theme(plot.title = element_text(hjust = 0.5, size = rel(1.5), margin = margin(15,15,15,15)),
axis.title = element_text(size = rel(1.2)),
axis.title.x = element_text(margin = margin(10,5,5,5)),
axis.title.y = element_text(margin = margin(5,10,5,5)),
axis.text = element_text(size = rel(0.7)))
## NULL
Generally speaking, homeowners across all working hours have a lower debt-to-income ratio than those who rent or have their homes mortgaged.
unique(loans_full_schema$loan_purpose)
## [1] moving debt_consolidation other credit_card
## [5] home_improvement medical house small_business
## [9] car major_purchase vacation renewable_energy
## 14 Levels: car credit_card debt_consolidation home_improvement ... wedding
ggplot(data = loans_full_schema) +
geom_point(mapping = aes(x = delinq_2y, y = debt_to_income),position = "jitter") +
ylim(0,200)+
facet_grid(application_type ~ verified_income) +
labs(title = "Individual vs. Joint Applications across Income Verification Status",
x = "Delinquencies on lines of credit in the last 2 years",
y = "Debt-to-income ratio") +
theme(plot.title = element_text(hjust = 0.5, size = rel(1.5), margin = margin(15,15,15,15)),
axis.title = element_text(size = rel(1.2)),
axis.title.x = element_text(margin = margin(10,5,5,5)),
axis.title.y = element_text(margin = margin(5,10,5,5)),
axis.text = element_text(size = rel(1.2)))
## Warning: Removed 49 rows containing missing values or values outside the scale range
## (`geom_point()`).
Single applicants are more numerous, yet their loan defaults persist for
longer periods. Joint applicants’ loan-to-income ratios are higher than
those of single applicants.
ggplot(data = loans_full_schema) +
geom_point(mapping = aes(x = delinq_2y, y = loan_amount),position = "jitter") +
facet_wrap( ~ loan_purpose) +
labs(title = "Distribution of Loan Amounts by Delinquency History and Loan Purpose",
x = "Delinquencies on lines of credit in the last 2 years",
y = "loan number") +
theme(plot.title = element_text(hjust = 0.5, size = rel(1.2), margin = margin(15,15,15,15)),
axis.title = element_text(size = rel(1.2)),
axis.title.x = element_text(margin = margin(10,5,5,5)),
axis.title.y = element_text(margin = margin(5,10,5,5)),
axis.text = element_text(size = rel(1.2)))
the main uses of loans are concentrated in debt consolidation, credit
card debt, and home improvement. Furthermore, larger loans are more
common for these purposes, and these types of loans are more likely to
result in late payments. 3.
?ames
This dataset contains information from the Ames Assessment Office used to calculate the assessed values of residential properties sold in Ames, Iowa, between 2006 and 2010. Variable: 82, Sample: 2930 Scope: Covers location, quality, structural details, and sale conditions.
ggplot(data=ames)+
geom_point(mapping=aes(x=area,y=price))
Transactions primarily centred on properties ranging from 500 to 2,500
square feet in size, with floor area and price exhibiting a broadly
positive correlation trend, though some prices were either excessively
low or high.
unique(ames$Bldg.Type)
## [1] 1Fam TwnhsE Twnhs Duplex 2fmCon
## Levels: 1Fam 2fmCon Duplex Twnhs TwnhsE
ggplot(data=ames)+
geom_boxplot(mapping=aes(x=Bldg.Type,y=price))
This table indicates that detached houses command the highest prices and exhibit the most outliers. Among terraced houses, twnhse units are priced higher than twnhs units because, unlike the middle unit, they share only one side with neighbours, typically offering better natural light and slightly larger plots. Duplexes command higher prices than 2fmcon units due to enhanced privacy.
ggplot(data=ames)+
geom_point(mapping=aes(x=area,y=price))+
facet_wrap( ~ Bldg.Type)+
labs(x="area",y="price")
Detached houses continue to command higher prices and larger floor areas than other property types. Overall, townhouses rank second in price, with the remaining three categories being fairly comparable.
table(ames$Year.Built)
##
## 1872 1875 1879 1880 1882 1885 1890 1892 1893 1895 1896 1898 1900 1901 1902 1904
## 1 1 1 5 1 2 7 2 1 3 1 1 29 2 1 1
## 1905 1906 1907 1908 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921
## 3 1 1 2 43 1 5 1 8 24 10 3 10 5 57 11
## 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1934 1935 1936 1937 1938
## 16 17 16 34 19 9 9 8 26 7 5 5 13 11 9 13
## 1939 1940 1941 1942 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956
## 20 36 23 6 15 15 11 27 18 38 18 18 24 43 34 39
## 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972
## 35 48 43 37 34 35 35 33 34 35 41 45 28 42 39 40
## 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988
## 21 23 25 54 57 42 21 27 10 7 8 19 7 11 8 15
## 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004
## 8 19 12 27 40 37 31 34 35 47 52 48 35 47 88 99
## 2005 2006 2007 2008 2009 2010
## 142 138 109 49 25 3
unique(ames$Utilities)
## [1] AllPub NoSewr NoSeWa
## Levels: AllPub NoSeWa NoSewr
ggplot(data=ames)+
geom_point(mapping=aes(x=Utilities,y=price),position="jitter")
Most people will only purchase properties with all public amenities fully developed.