source('create_datasets.R')
library(readr)
library(dplyr)
library(ggplot2)
library(openintro)
cars <- read.csv("https://assets.datacamp.com/production/course_1796/datasets/cars04.csv")
comics <- read.csv("https://assets.datacamp.com/production/course_1796/datasets/comics.csv")
life <- read.csv("https://assets.datacamp.com/production/course_1796/datasets/life_exp_raw.csv")
table function with two variables# Print the first rows of the data
head(comics)## name id align eye
## 1 Spider-Man (Peter Parker) Secret Good Hazel Eyes
## 2 Captain America (Steven Rogers) Public Good Blue Eyes
## 3 Wolverine (James \\"Logan\\" Howlett) Public Neutral Blue Eyes
## 4 Iron Man (Anthony \\"Tony\\" Stark) Public Good Blue Eyes
## 5 Thor (Thor Odinson) No Dual Good Blue Eyes
## 6 Benjamin Grimm (Earth-616) Public Good Blue Eyes
## hair gender gsm alive appearances first_appear
## 1 Brown Hair Male <NA> Living Characters 4043 Aug-62
## 2 White Hair Male <NA> Living Characters 3360 Mar-41
## 3 Black Hair Male <NA> Living Characters 3061 Oct-74
## 4 Black Hair Male <NA> Living Characters 2961 Mar-63
## 5 Blond Hair Male <NA> Living Characters 2258 Nov-50
## 6 No Hair Male <NA> Living Characters 2255 Nov-61
## publisher
## 1 marvel
## 2 marvel
## 3 marvel
## 4 marvel
## 5 marvel
## 6 marvel
# Check levels of align
levels(comics$align)## [1] "Bad" "Good" "Neutral"
## [4] "Reformed Criminals"
# Check the levels of gender
levels(comics$gender)## [1] "Female" "Male" "Other"
# Create a 2-way contingency table
table(comics$align, comics$gender)##
## Female Male Other
## Bad 1573 7561 32
## Good 2490 4809 17
## Neutral 836 1799 17
## Reformed Criminals 1 2 0
# Load dplyr
# Print tab
tab <- table(comics$align, comics$gender)
tab##
## Female Male Other
## Bad 1573 7561 32
## Good 2490 4809 17
## Neutral 836 1799 17
## Reformed Criminals 1 2 0
# Remove align level
comics <- comics %>%
filter(align != 'Reformed Criminals') %>%
droplevels()
levels(comics$align)## [1] "Bad" "Good" "Neutral"
# Load ggplot2
# Create side-by-side barchart of gender by alignment
ggplot(comics, aes(x = align, fill = gender)) +
geom_bar(position = "dodge")# Create side-by-side barchart of alignment by gender
ggplot(comics, aes(x = gender, fill = align)) +
geom_bar(positio = "dodge") +
theme(axis.text.x = element_text(angle = 90))# simplify display format
options(scipen = 999, digits = 3)
## create table of counts
tbl_cnt <- table(comics$id, comics$align)
tbl_cnt##
## Bad Good Neutral
## No Dual 474 647 390
## Public 2172 2930 965
## Secret 4493 2475 959
## Unknown 7 0 2
# Proportional table
# All values add up to 1
prop.table(tbl_cnt)##
## Bad Good Neutral
## No Dual 0.030553 0.041704 0.025139
## Public 0.140003 0.188862 0.062202
## Secret 0.289609 0.159533 0.061815
## Unknown 0.000451 0.000000 0.000129
sum(prop.table(tbl_cnt))## [1] 1
# All rows add up to 1
prop.table(tbl_cnt, 1)##
## Bad Good Neutral
## No Dual 0.314 0.428 0.258
## Public 0.358 0.483 0.159
## Secret 0.567 0.312 0.121
## Unknown 0.778 0.000 0.222
# Coluns add up to 1
prop.table(tbl_cnt, 2)##
## Bad Good Neutral
## No Dual 0.066331 0.106907 0.168394
## Public 0.303946 0.484137 0.416667
## Secret 0.628743 0.408956 0.414076
## Unknown 0.000980 0.000000 0.000864
ggplot(comics, aes(x = id, fill = align)) +
geom_bar(position = "fill") +
ylab("proportion")ggplot(comics, aes(x = align, fill = id)) +
geom_bar(position = "fill") +
ylab("proportion")tab <- table(comics$align, comics$gender)
options(scipen = 999, digits = 3) # Print fewer digits
prop.table(tab) # Joint proportions##
## Female Male Other
## Bad 0.082210 0.395160 0.001672
## Good 0.130135 0.251333 0.000888
## Neutral 0.043692 0.094021 0.000888
prop.table(tab, 2)##
## Female Male Other
## Bad 0.321 0.534 0.485
## Good 0.508 0.339 0.258
## Neutral 0.171 0.127 0.258
# Plot of gender by align
ggplot(comics, aes(x = align, fill = gender)) +
geom_bar()# Plot proportion of gender, conditional on align
ggplot(comics, aes(x = align, fill = gender)) +
geom_bar(position = "fill")# Can use table function on just one variable
# This is called a marginal distribution
table(comics$id)##
## No Dual Public Secret Unknown
## 1511 6067 7927 9
# Simple barchart
ggplot(comics, aes(x = id)) +
geom_bar()ggplot(comics, aes(x = id)) +
geom_bar() +
facet_wrap(~align)# Change the order of the levels in align
comics$align <- factor(comics$align,
levels = c("Bad", "Neutral", "Good"))
# Create plot of align
ggplot(comics, aes(x = align)) +
geom_bar()# Plot of alignment broken down by gender
ggplot(comics, aes(x = align)) +
geom_bar() +
facet_wrap(~ gender)# Put levels of flavor in decending order
lev <- c("apple", "key lime", "boston creme", "blueberry", "cherry", "pumpkin", "strawberry")
pies$flavor <- factor(pies$flavor, levels = lev)
head(pies$flavor)## [1] cherry cherry cherry cherry cherry cherry
## 7 Levels: apple key lime boston creme blueberry cherry ... strawberry
# Create barchart of flavor
ggplot(pies, aes(x = flavor)) +
geom_bar(fill = "chartreuse") +
theme(axis.text.x = element_text(angle = 90))
# A dot plot shows all the datapoints
ggplot(cars, aes(x = weight)) +
geom_dotplot(dotsize = 0.4)# A histogram groups the points into bins so it does not get overwhelming
ggplot(cars, aes(x = weight)) +
geom_histogram(dotsize = 0.4, binwidth = 500)# A density plot gives a bigger picture representation of the distribution
# It more helpful when there is a lot of data
ggplot(cars, aes(x = weight)) +
geom_density()# A boxplot is a good way to just show the summary info of the distriubtion
ggplot(cars, aes(x = 1, y = weight)) +
geom_boxplot() +
coord_flip()# Load package
library(ggplot2)
# Learn data structure
str(cars)## 'data.frame': 428 obs. of 19 variables:
## $ name : Factor w/ 425 levels "Acura 3.5 RL 4dr",..: 66 67 68 69 70 114 115 133 129 130 ...
## $ sports_car : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ suv : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ wagon : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ minivan : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ pickup : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ all_wheel : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ rear_wheel : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ msrp : int 11690 12585 14610 14810 16385 13670 15040 13270 13730 15460 ...
## $ dealer_cost: int 10965 11802 13697 13884 15357 12849 14086 12482 12906 14496 ...
## $ eng_size : num 1.6 1.6 2.2 2.2 2.2 2 2 2 2 2 ...
## $ ncyl : int 4 4 4 4 4 4 4 4 4 4 ...
## $ horsepwr : int 103 103 140 140 140 132 132 130 110 130 ...
## $ city_mpg : int 28 28 26 26 26 29 29 26 27 26 ...
## $ hwy_mpg : int 34 34 37 37 37 36 36 33 36 33 ...
## $ weight : int 2370 2348 2617 2676 2617 2581 2626 2612 2606 2606 ...
## $ wheel_base : int 98 98 104 104 104 105 105 103 103 103 ...
## $ length : int 167 153 183 183 183 174 174 168 168 168 ...
## $ width : int 66 66 69 68 69 67 67 67 67 67 ...
# Create faceted histogram
ggplot(cars, aes(x = city_mpg)) +
geom_histogram() +
facet_wrap(~ suv)unique(cars$ncyl)## [1] 4 6 3 8 5 12 10 -1
table(cars$ncyl)##
## -1 3 4 5 6 8 10 12
## 2 1 136 7 190 87 2 3
# Filter cars with 4, 6, 8 cylinders
common_cyl <- filter(cars, ncyl %in% c(4,6,8))
# Create box plots of city mpg by ncyl
ggplot(common_cyl, aes(x = as.factor(ncyl), y = city_mpg)) +
geom_boxplot()# Create overlaid density plots for same data
ggplot(common_cyl, aes(x = city_mpg, fill = as.factor(ncyl))) +
geom_density(alpha = .3)# Create hist of horsepwr
cars %>%
ggplot(aes(horsepwr)) +
geom_histogram() +
ggtitle("Horsepower distribution")# Create hist of horsepwr for affordable cars
cars %>%
filter(msrp < 25000) %>%
ggplot(aes(horsepwr)) +
geom_histogram() +
xlim(c(90, 550)) +
ggtitle("Horsepower distribtion for msrp < 25000")# Create hist of horsepwr with binwidth of 3
cars %>%
ggplot(aes(horsepwr)) +
geom_histogram(binwidth = 3) +
ggtitle("binwidth = 3")# Create hist of horsepwr with binwidth of 30
cars %>%
ggplot(aes(horsepwr)) +
geom_histogram(binwidth = 30) +
ggtitle("binwidth = 30")# Create hist of horsepwr with binwidth of 60
cars %>%
ggplot(aes(horsepwr)) +
geom_histogram(binwidth = 60) +
ggtitle("binwidth = 60")# Construct box plot of msrp
cars %>%
ggplot(aes(x = 1, y = msrp)) +
geom_boxplot()# Exclude outliers from data
cars_no_out <- cars %>%
filter(msrp < 100000)
# Construct box plot of msrp using the reduced dataset
cars_no_out %>%
ggplot(aes(x = 1, y = msrp)) +
geom_boxplot()# Create plot of city_mpg
cars %>%
ggplot(aes(x = 1, y = city_mpg)) +
geom_boxplot()cars %>%
ggplot(aes(city_mpg)) +
geom_density()# Create plot of width
cars %>%
ggplot(aes(x = 1, y = width)) +
geom_boxplot()cars %>%
ggplot(aes(x = width)) +
geom_density()# Facet hists using hwy mileage and ncyl
common_cyl %>%
ggplot(aes(x = hwy_mpg)) +
geom_histogram() +
facet_grid(ncyl ~ suv) +
ggtitle("hwy_mpg by ncyl and suv")
typical value for life expectancy?
head(life)## State County fips Year Female.life.expectancy..years.
## 1 Alabama Autauga County 1001 1985 77.02
## 2 Alabama Baldwin County 1003 1985 78.84
## 3 Alabama Barbour County 1005 1985 76.04
## 4 Alabama Bibb County 1007 1985 76.62
## 5 Alabama Blount County 1009 1985 78.92
## 6 Alabama Bullock County 1011 1985 75.12
## Female.life.expectancy..national..years.
## 1 77.79
## 2 77.79
## 3 77.79
## 4 77.79
## 5 77.79
## 6 77.79
## Female.life.expectancy..state..years. Male.life.expectancy..years.
## 1 76.9 68.13
## 2 76.9 71.14
## 3 76.9 66.83
## 4 76.9 67.34
## 5 76.9 70.60
## 6 76.9 66.61
## Male.life.expectancy..national..years.
## 1 70.85
## 2 70.85
## 3 70.85
## 4 70.85
## 5 70.85
## 6 70.85
## Male.life.expectancy..state..years.
## 1 69.06
## 2 69.06
## 3 69.06
## 4 69.06
## 5 69.06
## 6 69.06
x <- head(round(life$Female.life.expectancy..years.), 11)
x## [1] 77 79 76 77 79 75 77 77 77 78 77
mean
sum(x)/11## [1] 77.18182
mean(x)## [1] 77.18182
median
sort(x)## [1] 75 76 77 77 77 77 77 77 78 79 79
median(x)## [1] 77
mode
table(x)## x
## 75 76 77 78 79
## 1 1 6 1 2
str(gapminder)## Classes 'tbl_df', 'tbl' and 'data.frame': 1704 obs. of 6 variables:
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ pop : int 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num 779 821 853 836 740 ...
# Create dataset of 2007 data
gap2007 <- filter(gapminder, year == 2007)
# Compute groupwise mean and median lifeExp
gap2007 %>%
group_by(continent) %>%
summarize(mean(lifeExp),
median(lifeExp))## # A tibble: 5 x 3
## continent `mean(lifeExp)` `median(lifeExp)`
## <fctr> <dbl> <dbl>
## 1 Africa 54.80604 52.9265
## 2 Americas 73.60812 72.8990
## 3 Asia 70.72848 72.3960
## 4 Europe 77.64860 78.6085
## 5 Oceania 80.71950 80.7195
# Generate box plots of lifeExp for each continent
gap2007 %>%
ggplot(aes(x = continent, y = lifeExp)) +
geom_boxplot()x## [1] 77 79 76 77 79 75 77 77 77 78 77
# Look at the difference between each point and the mean
sum(x - mean(x))## [1] -5.684342e-14
# Square each difference to get rid of negatives then sum
sum((x - mean(x))^2)## [1] 13.63636
Variance
sum((x - mean(x))^2)/(length(x) - 1)## [1] 1.363636
var(x)## [1] 1.363636
Standard Deviation
sqrt(sum((x - mean(x))^2)/(length(x) - 1))## [1] 1.167748
sd(x)## [1] 1.167748
Inter Quartile Range
summary(x)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 75.00 77.00 77.00 77.18 77.50 79.00
IQR(x)## [1] 0.5
Range
max(x)## [1] 79
min(x)## [1] 75
diff(range(x))## [1] 4
str(gap2007)## Classes 'tbl_df', 'tbl' and 'data.frame': 142 obs. of 6 variables:
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 4 1 1 2 5 4 3 3 4 ...
## $ year : int 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 ...
## $ lifeExp : num 43.8 76.4 72.3 42.7 75.3 ...
## $ pop : int 31889923 3600523 33333216 12420476 40301927 20434176 8199783 708573 150448339 10392226 ...
## $ gdpPercap: num 975 5937 6223 4797 12779 ...
# Compute groupwise measures of spread
gap2007 %>%
group_by(continent) %>%
summarize(sd(lifeExp),
IQR(lifeExp),
n())## # A tibble: 5 x 4
## continent `sd(lifeExp)` `IQR(lifeExp)` `n()`
## <fctr> <dbl> <dbl> <int>
## 1 Africa 9.6307807 11.61025 52
## 2 Americas 4.4409476 4.63200 25
## 3 Asia 7.9637245 10.15200 33
## 4 Europe 2.9798127 4.78250 30
## 5 Oceania 0.7290271 0.51550 2
# Generate overlaid density plots
gap2007 %>%
ggplot(aes(x = lifeExp, fill = continent)) +
geom_density(alpha = 0.3)# Compute stats for lifeExp in Americas
head(gap2007)## # A tibble: 6 x 6
## country continent year lifeExp pop gdpPercap
## <fctr> <fctr> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 2007 43.828 31889923 974.5803
## 2 Albania Europe 2007 76.423 3600523 5937.0295
## 3 Algeria Africa 2007 72.301 33333216 6223.3675
## 4 Angola Africa 2007 42.731 12420476 4797.2313
## 5 Argentina Americas 2007 75.320 40301927 12779.3796
## 6 Australia Oceania 2007 81.235 20434176 34435.3674
gap2007 %>%
filter(continent == "Americas") %>%
summarize(mean(lifeExp),
sd(lifeExp))## # A tibble: 1 x 2
## `mean(lifeExp)` `sd(lifeExp)`
## <dbl> <dbl>
## 1 73.60812 4.440948
# Compute stats for population
gap2007 %>%
summarize(median(pop),
IQR(pop))## # A tibble: 1 x 2
## `median(pop)` `IQR(pop)`
## <dbl> <dbl>
## 1 10517531 26702008
4 chracteristics of a distribution that are of interest:
# Create density plot of old variable
gap2007 %>%
ggplot(aes(x = pop)) +
geom_density()# Transform the skewed pop variable
gap2007 <- gap2007 %>%
mutate(log_pop = log(pop))
# Create density plot of new variable
gap2007 %>%
ggplot(aes(x = log_pop)) +
geom_density()# Filter for Asia, add column indicating outliers
str(gapminder)## Classes 'tbl_df', 'tbl' and 'data.frame': 1704 obs. of 6 variables:
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ pop : int 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num 779 821 853 836 740 ...
gap_asia <- gap2007 %>%
filter(continent == "Asia") %>%
mutate(is_outlier = lifeExp < 50)
# Remove outliers, create box plot of lifeExp
gap_asia %>%
filter(!is_outlier) %>%
ggplot(aes(x = 1, y = lifeExp)) +
geom_boxplot()
# ggplot2, dplyr, and openintro are loaded
# Compute summary statistics
email %>%
group_by(spam) %>%
summarize(
median(num_char),
IQR(num_char))## # A tibble: 2 x 3
## spam `median(num_char)` `IQR(num_char)`
## <dbl> <dbl> <dbl>
## 1 0 6.831 13.58225
## 2 1 1.046 2.81800
str(email)## 'data.frame': 3921 obs. of 21 variables:
## $ spam : num 0 0 0 0 0 0 0 0 0 0 ...
## $ to_multiple : num 0 0 0 0 0 0 1 1 0 0 ...
## $ from : num 1 1 1 1 1 1 1 1 1 1 ...
## $ cc : int 0 0 0 0 0 0 0 1 0 0 ...
## $ sent_email : num 0 0 0 0 0 0 1 1 0 0 ...
## $ time : POSIXct, format: "2012-01-01 01:16:41" "2012-01-01 02:03:59" ...
## $ image : num 0 0 0 0 0 0 0 1 0 0 ...
## $ attach : num 0 0 0 0 0 0 0 1 0 0 ...
## $ dollar : num 0 0 4 0 0 0 0 0 0 0 ...
## $ winner : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ inherit : num 0 0 1 0 0 0 0 0 0 0 ...
## $ viagra : num 0 0 0 0 0 0 0 0 0 0 ...
## $ password : num 0 0 0 0 2 2 0 0 0 0 ...
## $ num_char : num 11.37 10.5 7.77 13.26 1.23 ...
## $ line_breaks : int 202 202 192 255 29 25 193 237 69 68 ...
## $ format : num 1 1 1 1 0 0 1 1 0 1 ...
## $ re_subj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ exclaim_subj: num 0 0 0 0 0 0 0 0 0 0 ...
## $ urgent_subj : num 0 0 0 0 0 0 0 0 0 0 ...
## $ exclaim_mess: num 0 1 6 48 1 1 1 18 1 0 ...
## $ number : Factor w/ 3 levels "none","small",..: 3 2 2 2 1 1 3 2 2 2 ...
table(email$spam)##
## 0 1
## 3554 367
email <- email %>%
mutate(spam = factor(ifelse(spam == 0, "not-spam", "spam")))
# Create plot
email %>%
mutate(log_num_char = log(num_char)) %>%
ggplot(aes(x = spam, y = log_num_char)) +
geom_boxplot()# Compute center and spread for exclaim_mess by spam
email %>%
group_by(spam) %>%
summarize(
median(exclaim_mess),
IQR(exclaim_mess)) ## # A tibble: 2 x 3
## spam `median(exclaim_mess)` `IQR(exclaim_mess)`
## <fctr> <dbl> <dbl>
## 1 not-spam 1 5
## 2 spam 0 1
table(email$exclaim_mess)##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 1435 733 507 128 190 113 115 51 93 45 85 17 56 20 43
## 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
## 11 29 12 26 5 29 9 15 3 11 6 11 1 6 8
## 30 31 32 33 34 35 36 38 39 40 41 42 43 44 45
## 13 12 13 3 3 2 3 3 1 2 1 1 3 3 5
## 46 47 48 49 52 54 55 57 58 62 71 75 78 89 94
## 3 2 1 3 1 1 4 2 2 2 1 1 1 1 1
## 96 139 148 157 187 454 915 939 947 1197 1203 1209 1236
## 1 1 1 1 1 1 1 1 1 1 2 1 1
# Create plot for spam and exclaim_mess
email %>%
mutate(log_exclaim_mess = log(exclaim_mess)) %>%
ggplot(aes(x = log_exclaim_mess)) +
geom_histogram() +
facet_wrap(~ spam)table(email$image)##
## 0 1 2 3 4 5 9 20
## 3811 76 17 11 2 2 1 1
# Create plot of proportion of spam by image
email %>%
mutate(has_image = image > 0) %>%
ggplot(aes(x = has_image, fill = spam)) +
geom_bar(position = "fill")# Test if images count as attachments
sum(email$image > email$attach)## [1] 0
## Within non-spam emails, is the typical length of emails shorter for
## those that were sent to multiple people?
email %>%
filter(spam == "not-spam") %>%
group_by(to_multiple) %>%
summarize(median(num_char))## # A tibble: 2 x 2
## to_multiple `median(num_char)`
## <dbl> <dbl>
## 1 0 7.1960
## 2 1 5.3645
# Question 1
## For emails containing the word "dollar", does the typical spam email
## contain a greater number of occurences of the word than the typical non-spam email?
table(email$dollar)##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 3175 120 151 10 146 20 44 12 35 10 22 10 20 7 14
## 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
## 5 23 2 14 1 10 7 12 7 7 3 7 1 5 1
## 30 32 34 36 40 44 46 48 54 63 64
## 1 2 1 2 3 3 2 1 1 1 3
email %>%
filter(dollar > 0) %>%
group_by(spam) %>%
summarize(median(dollar))## # A tibble: 2 x 2
## spam `median(dollar)`
## <fctr> <dbl>
## 1 not-spam 4
## 2 spam 2
# Question 2
## If you encounter an email with greater than 10 occurrences of the word "dollar",
## is it more likely to be spam or not -spam?
email %>%
filter(dollar > 10) %>%
ggplot(aes(x = spam)) +
geom_bar()levels(email$number)## [1] "none" "small" "big"
table(email$number)##
## none small big
## 549 2827 545
# Reorder levels
email$number <- factor(email$number, levels = c("none","small","big"))
# Construct plot of number
ggplot(email, aes(x = number)) +
geom_bar() +
facet_wrap( ~ spam)