Anderson Iris Data 1928
This is a set of plant measurements collected for two Iris species by Anderson in the 1920s and published in 1928 (Anderson 1928).
This dataset is interesting because it is the precursor to the famous Anderson Iris dataset (Anderson 1935) analysed by Fisher and which has become a standard in the machine learning literature (Fisher 1936).
It is important because it provides an alternative set of test data that is free from “data leakage”. This is when the data used to create your model leaks into the data used to test your model. As this data was completely independently collected it provides a better test case. It only contains data from two species and not three but these are the two most difficult to distinguish. The third is very easily separated from the other two.
It also provides some insight into how to keep (or not keep) a field notebook and now to not store/present data.
The paper gives summary tables with counts in ranges for each location and not the raw data and there is a different table for each of the four variables, petal width, petal length, sepal width and sepal length. None of these are matched for individual plants which is why this dataset has largely been ignored. They are in size order and you can expect that this should be the same for all measurements although this will not be true all of the time.
There are discrepancies between the numbers in the different locations and this suggests bad naming and record keeping and issues of reading the data later. It also seems to have not been written down very systematically which is why there are errors.
I have made educated guesses to try and reconstruct what I think is the real dataset moving or removing values dependent on what I reason happened between making the recordings and writing the paper. The numbers for the petal lengths have been set as the correct ones as they are the leading column in the table.
That I needed to make these adjustments stresses the importance of a well kept laboratory or field notebook and the need for careful transcription of data when it moves from one form to another as there will be transcriptional errors.
The final values are created from assuming a uniform variable across the ranges for each of the sets of counts. This is then rounded to a single decimal place as was the Fisher data.
#
# Header Data Containing the Meta Data About Collection Dates and locations
#
############################################################################
irisc1 <-c(rep(1927,25),rep(1927,12),rep(1927,21),rep(1926,17),
rep(1926,7),rep(1925,50),rep(1926,35),rep(1926,11),rep(1926,21),
rep(1926,17),rep(1926,39),rep(1925,26),rep(1926,15),rep(1924,23),
rep(1925,38),rep(1926,8),rep(1926,43),rep(1926,22),rep(1925,6),
rep(1926,40),rep(1927,33),rep(1928,27),rep(1924,26),rep(1928,1),
rep(1928,20))
irisc2 <- c(rep("Kimborough",25),rep("Wiggins",12),rep("Jackson",21),rep("Arlington",17),
rep("Huntingdon",7),rep("Camden",50),rep("Bonnieville",35),rep("Elizabethtown",11),rep("Stanton",21),
rep("Hayden",17),rep("Anna",39),rep("Vulcan",26),rep("Vulcan",15),rep("East St. Louis",23),
rep("Farmington",38),rep("Pilot Knob",8),rep("Wicks",43),rep("Valley Park",22),rep("P. des Sioux",6),
rep("P. des Sioux",40),rep("P. des Sioux",33),rep("P. des Sioux",27),rep("Louisiana",26),rep("Rich-Tex",1),
rep("Eastover",20))
irisc3 <- c(rep("Ala",25),rep("Miss",12),rep("Miss",21),rep("Tenn",17),
rep("Tenn",7),rep("Tenn",50),rep("Ky",35),rep("Ky",11),rep("Ky",21),
rep("Ind",17),rep("Ill",39),rep("Ill",26),rep("Ill",15),rep("Ill",23),
rep("Ark",38),rep("Mo",8),rep("Mo",43),rep("Mo",22),rep("Mo",6),
rep("Mo",40),rep("Mo",33),rep("Mo",27),rep("Mo",26),rep("S.C.",1),
rep("S.C.",20))
irisc1a <- c(rep(1928,5),rep(1928,18),rep(1928,15),1928,rep(1924,26),
rep(1924,8),rep(1924,10),rep(1927,31),rep(1925,28),rep(1925,23),
rep(1925,62),rep(1925,57),rep(1925,18),rep(1925,32),rep(1925,22),rep(1924,15),
rep(1924,13),rep(1924,22),rep(1926,35),rep(1926,20),rep(1926,30),
rep(1926,30),rep(1926,35),rep(1924,25),rep(1924,46))
irisc2a <- c(rep("Fair Bluff",5),rep("Wilmington",18),rep("Maysville",15),"Wolf Creek",rep("Fort Madison",26),
rep("Burk",8),rep("Gilbertville",10),rep("Sunbury",31),rep("Mill Creek",28),rep("Huron",23),
rep("Bay Bridge",62),rep("Catawba",57),rep("S. Mid. Bass Is",18),rep("N. Mid. Bass Is.",32),rep("North Bass Is.",22),
rep("Newport",15),rep("Monroe",13),rep("Brooklyn",22),rep("Lawrence",35),rep("Colon",20),rep("Centerville",30),
rep("Schoolcraft",30),rep("Hartland",35),rep("Armada",25),rep("Yale",46))
irisc3a <- c(rep("N.C.",5),rep("N.C.",18),rep("N.C.",15),"Tenn",rep("Iowa",26),
rep("Iowa",8),rep("Ohio",10),rep("Ohio",31),rep("Ohio",28),rep("Ohio",23),
rep("Ohio",62),rep("Ohio",57),rep("Ohio",18),rep("Ohio",32),rep("Ohio",22),rep("Mich.",15),
rep("Mich.",13),rep("Mich.",22),rep("Mich.",35),rep("Mich.",20),rep("Mich.",30),
rep("Mich.",30),rep("Mich.",35),rep("Mich.",25),rep("Mich.",46))
irisc1b <- c(rep(1924,42),rep(1926,19),rep(1924,13),rep(1926,35),rep(1927,30),
rep(1926,16),rep(1926,30),rep(1927,25),rep(1924,44),rep(1924,4),
rep(1924,10),rep(1924,52),rep(1927,27),rep(1924,33),rep(1927,3))
irisc2b <- c(rep("Otisville",42),rep("Otisville",19),rep("Fay Bridge",13),rep("Frankenmuth",35),rep("Frankenmuth",30),
rep("W.Saginah",16),rep("Linwood",30),rep("Linwood",25),rep("Muskegon",44),rep("Albert Lea",4),
rep("Hayward",10),rep("La Crosse",52),rep("Pardeeville",27),rep("Slinger",33),rep("Pelee Is.",3))
irisc3b <- c(rep("Mich.",42),rep("Mich.",19),rep("Mich.",13),rep("Mich.",35),rep("Mich.",30),
rep("Mich.",16),rep("Mich.",30),rep("Mich.",25),rep("Mich.",44),rep("Mich.",4),
rep("Mich.",10),rep("Wis.",52),rep("Wis.",27),rep("Wis.",33),rep("Ont.",3))
irisc1d <- c(rep(1927,26),rep(1927,20),rep(1925,43),rep(1925,14),rep(1925,13),
rep(1924,29),rep(1925,16),rep(1925,18),rep(1927,25),rep(1925,35),
rep(1925,27),rep(1925,37),rep(1925,37),rep(1926,26),rep(1926,34),rep(1924,5),
rep(1927,20),rep(1927,25),rep(1926,31),rep(1926,26),rep(1927,24),rep(1927,13),
rep(1927,14),rep(1927,13))
irisc2d <- c(rep("Hood",26),rep("Liverpool",20),rep("Harmonsburg",43),rep("Meadville",14),rep("Greenville",13),
rep("Conewango",29),rep("Conewango",16),rep("Cherry Cr.",18),rep("Villenova",25),rep("Hubbardsville",35),
rep("Pownal",27),rep("Pownal Center",37),rep("Clarendon",37),rep("Middlebury",26),rep("New Haven Jc.",34),rep("Holderness",5),
rep("Duxbury",20),rep("Alberton",25),rep("Ottawa",31),rep("L. Timagami",26),rep("Truro",24),rep("Antigonish",13),
rep("Baddeck",14),rep("McAdam",13))
irisc3d <- c(rep("Md.",26),rep("Pa.",20),rep("Pa.",43),rep("Pa.",14),rep("Pa.",13),
rep("N.Y.",29),rep("N.Y.",16),rep("N.Y.",18),rep("N.Y.",25),rep("N.Y.",35),
rep("Vt.",27),rep("Vt.",37),rep("Vt.",37),rep("Vt.",26),rep("Vt.",34),rep("N.H.",5),
rep("Mass.",20),rep("Ont.",25),rep("Ont.",31),rep("Ont.",26),rep("N.S.",24),rep("N.S.",13),
rep("N.S.",14),rep("N.B.",13))
##########Petal Data#############
irisc4 <- c(rep(8,7),rep(9,5),rep(10,8),rep(11,5),
7,rep(9,2),rep(10,4),rep(11,4),13,
rep(7,8),rep(8,5),rep(9,5),rep(10,2),11,
rep(8,7),rep(9,6),rep(10,2),11,12,
rep(7,2),8,rep(9,2),rep(10,2),
7,rep(8,8),rep(9,13),rep(10,15),rep(11,7),rep(12,3),rep(13,3),
7,rep(9,8),rep(10,11),rep(11,11),rep(12,4),
7,8,rep(9,3),rep(10,3),rep(11,3),
6,rep(7,8),rep(8,8),rep(9,3),10,
rep(6,2),rep(7,2),rep(8,5),rep(9,4),rep(10,3),11,
rep(6,2),rep(7,6),rep(8,16),rep(9,5),rep(10,8),11,12,
6,rep(7,3),rep(8,4),rep(9,7),rep(10,6),rep(11,5),
6,rep(8,4),rep(9,5),rep(10,5),
rep(7,2),rep(8,3),rep(9,6),rep(10,8),rep(11,4),
rep(7,4),rep(8,2),rep(9,14),rep(10,8),rep(11,9),12,
rep(8,2),rep(10,4),11,12,
rep(5,2),rep(6,10),rep(7,10),rep(8,10),rep(9,8),rep(10,3),
6,7,rep(8,11),rep(9,7),rep(10,2),
rep(7,5),8,
rep(5,3),rep(6,7),rep(7,9),rep(8,11),rep(9,6),rep(10,3),11,
7,rep(8,9),rep(9,6),rep(10,8),rep(11,6),rep(12,3),
rep(8,4),rep(9,9),rep(10,6),rep(11,6), rep(13,2),
6,rep(7,2),rep(8,12),rep(9,9),rep(10,2),
11,
7,8,rep(9,7),rep(10,6),rep(12,4),13)
irisc5 <- c(1.35,rep(1.55,3),rep(1.75,11),rep(1.95,5),2.15,rep(2.35,2),rep(2.55,2),
rep(0.95,2),1.15,rep(1.35,3),rep(1.75,2),rep(1.95,2),2.15,2.35,
rep(1.35,3),rep(1.55,4),rep(1.75,8),rep(1.95,4),rep(2.15,2),
1.15,1.35,rep(1.55,6),rep(1.75,7),rep(1.92,2),
1.35,rep(1.65,3),1.75,1.95,2.15,
1.35,rep(1.55,3),rep(1.75,12),rep(1.95,11),rep(2.15,9),rep(2.35,11),rep(2.55,3),
rep(1.35,2),rep(1.55,3),rep(1.75,14),rep(1.95,11),rep(2.15,4),2.35,
1.15,1.35,rep(1.75,3),rep(1.95,2),rep(2.15,2),2.35,2.55,
0.95,rep(1.35,4),rep(1.55,11),1.75,rep(1.95,2),rep(2.15,2),
0.95,rep(1.15,3),rep(1.35,5),rep(1.55,3),rep(1.75,3),rep(1.95,2),
rep(1.35,5),rep(1.55,3),rep(1.75,18),rep(1.95,8),rep(2.15,2),rep(2.35,2),2.55,
rep(1.55,8),rep(1.75,7),rep(1.95,9),2.15,2.15,
1.15,rep(1.35,4),rep(1.55,5),rep(1.75,4),1.95,
rep(1.35,3),rep(1.55,5),rep(1.75,8),rep(1.95,2),rep(2.15,3),rep(2.55,2),
1.35,1.55,rep(1.75,4),rep(1.95,7),rep(2.15,9),rep(2.35,11),rep(2.55,3),rep(2.75,2),
1.55,rep(1.95,4),2.15,rep(2.35,2),
rep(1.15,9),rep(1.35,11),rep(1.55,9),rep(1.75,8),rep(1.95,4),2.15,2.75,
rep(1.15,3),rep(1.35,3),rep(1.55,1),rep(1.75,7),rep(1.95,5),rep(2.15,3),
rep(1.15,3),1.35,1.55,1.55,
rep(0.95,2),rep(1.15,5),rep(1.35,12),rep(1.55,9),rep(1.75,4),rep(1.95,4),2.15,2.15,2.35,2.35,
1.15,rep(1.35,9),rep(1.55,6),rep(1.75,7),rep(1.95,4),rep(2.15,3),2.35,2.55,2.95,
1.15,rep(1.55,2),rep(1.75,7),rep(1.95,6),rep(2.15,4),rep(2.35,2),rep(2.55,3),2.75,2.95,
0.95,rep(1.35,8),rep(1.55,3),rep(1.75,7),rep(1.95,6),2.15,
1.55,
0.95,1.35,rep(1.75,8),rep(1.95,8),rep(2.15,2)
)
irisc4a <- c(9,10,10,10,11,
7,10,10,rep(11,7),rep(12,4),13,13,14,14,
8,8,9,9,10,10,rep(11,7),12,13,
10,
5,5,6,6,rep(7,9),rep(8,7),rep(9,4),10,11,
rep(7,4),8,8,9,9,
6,rep(8,4),9,rep(10,4),
6,7,7,7,rep(8,5),rep(9,7),rep(10,10),rep(11,5),
4,4,4,rep(5,9),rep(6,7),rep(7,5),rep(8,3),10,
4,4,rep(5,3),rep(6,8),rep(7,3),rep(8,5),9,9,
rep(4,4),rep(5,16),rep(6,24),rep(7,14),rep(8,4),
3,3,rep(4,6),rep(5,18),rep(6,17),rep(7,13),9,
3,4,4,4,5,5,5,rep(6,10),7,
4,5,5,5,rep(6,10),rep(7,11),8,8,8,9,9,9,10,
3,4,5,5,5,rep(6,4),rep(7,11),8,8,
5,5,rep(6,3),rep(7,4),rep(8,5),9,
6,rep(7,4),rep(8,6),9,10,
4,5,rep(6,7),rep(7,8),rep(8,3),9,11,
rep(6,3),rep(7,5),rep(8,12),rep(9,10),rep(10,4),11,
6,rep(7,9),rep(8,6),9,9,10,11,
6,6,rep(7,4),rep(8,8),rep(9,9),rep(10,7),
rep(7,10),rep(8,10),rep(9,5),rep(10,4),11,
6,6,6,rep(7,9),rep(8,14),rep(9,8),10,
5,5,rep(6,12),rep(7,8),8,8,8,
5,5,rep(6,10),rep(7,18),rep(7,12),8,8,8,9
)
irisc5a <- c(1.15,1.35,1.35,1.55,1.75,
0.95,1.35,rep(1.55,2),rep(1.75,5),rep(1.95,4),rep(2.15,4),2.35,
1.55,rep(1.75,6),rep(1.95,3),rep(2.15,2),rep(2.35,2),2.55,
1.75,
1.15,rep(1.35,6),rep(1.55,7),rep(1.75,8),1.95,rep(2.15,3),
1.55,1.55,rep(1.75,4),1.95,2.15,
rep(1.35,6),1.55,1.75,2.15,2.15,
1.15,rep(1.35,3),rep(1.55,2),rep(1.75,10),rep(1.95,11),rep(2.15,3),2.55,
rep(0.95,8),rep(1.15,4),rep(1.35,7),rep(1.55,3),rep(1.75,4),rep(1.95,2),
0.75,rep(0.95,3),1.15,rep(1.35,11),rep(1.55,5),1.95,2.15,
rep(0.75,3),rep(0.95,9),rep(1.15,16),rep(1.35,17),rep(1.55,5),rep(1.75,9),rep(1.95,3),
0.75,rep(0.95,5),rep(1.15,18),rep(1.35,13),rep(1.55,9),rep(1.75,6),rep(1.95,4),2.15,
rep(0.75,2),0.95,rep(1.15,6),rep(1.35,7),1.55,1.75,
0.75,0.95,rep(1.15,3),rep(1.35,7),rep(1.55,14),rep(1.75,5),1.95,
rep(0.95,5),rep(1.15,5),rep(1.35,7),rep(1.55,3),rep(1.95,2),
1.15,rep(1.35,2),rep(1.55,6),rep(1.75,3),1.95,2.15,2.35,
rep(1.35,6),1.55,rep(1.75,4),1.95,2.15,
rep(1.15,2),rep(1.35,7),rep(1.55,5),rep(1.75,4),1.95,2.15,2.15,2.35,
1.35,rep(1.55,9),rep(1.75,6),rep(1.95,13),rep(2.15,4),2.35,2.55,
1.15,1.15,rep(1.35,4),rep(1.55,8),rep(1.75,5),1.95,
1.15,1.15,rep(1.35,3),rep(1.55,9),rep(1.75,10),rep(1.95,3),2.15,rep(2.35,2),
rep(0.95,3),rep(1.35,2),rep(1.55,6),rep(1.74,11),rep(1.95,6),rep(2.15,2),
1.15,rep(1.35,2),rep(1.55,7),rep(1.75,10),rep(1.95,8),rep(2.15,4),rep(2.35,3),
rep(1.15,2),rep(1.35,11),rep(1.55,5),rep(1.75,6),1.95,
rep(1.15,3),rep(1.35,9),rep(1.55,12),rep(1.75,12),rep(1.95,7),rep(2.15,2),2.35
)
irisc4b <-c(4,4,rep(5,6),rep(6,9),rep(7,13),rep(8,8),rep(9,3),10,
rep(6,5),rep(7,8),rep(8,3),rep(9,3),
4,5,rep(6,6),rep(8,3),9,9,
rep(6,4),rep(7,11),rep(8,10),rep(9,7),rep(10,3),
6,rep(7,8),rep(8,11),rep(9,7),rep(10,3),
6,7,7,rep(8,10),9,9,9,
6,6,rep(7,8),rep(8,15),rep(9,5),
5,6,6,rep(7,5),rep(8,15),9,10,
4,4,rep(5,5),rep(6,10),rep(7,14),rep(8,10),9,9,10,
6,6,7,7,
6,rep(7,6),8,8,9,
4,rep(6,8),rep(7,14),rep(8,15),rep(9,9),rep(10,4),11,
rep(6,6),rep(7,4),rep(8,10),rep(9,5),10,10,
5,rep(6,6),rep(7,7),rep(8,13),rep(9,5),10,
6,7,8)
irisc5b <- c(0.95,rep(1.15,3),rep(1.35,15),rep(1.55,11),rep(1.75,10),1.95,2.15,
rep(1.55,6),rep(1.75,6),rep(1.95,5),rep(2.15,2),
1.15,rep(1.35,6),rep(1.55,2),rep(1.75,2),rep(1.95,2),
rep(1.15,3),rep(1.35,2),rep(1.55,4),rep(1.75,14),rep(1.95,9),2.15,2.35,2.75,
1.15,rep(1.35,2),rep(1.55,8),rep(1.75,10),rep(1.95,6),rep(2.15,2),2.35,
1.55,rep(1.75,4),rep(1.95,7),rep(2.15,3),2.35,
1.15,rep(1.35,3),rep(1.55,3),rep(1.75,8),rep(1.95,9),rep(2.15,5),2.35,
1.15,rep(1.35,2),rep(1.55,8),rep(1.75,3),rep(1.95,5),rep(2.15,2),rep(2.35,4),
rep(1.15,2),rep(1.35,9),rep(1.55,16),rep(1.75,8),rep(1.95,4),rep(2.15,2),rep(2.35,3),
rep(1.55,3),1.75,
1.15,rep(1.35,5),rep(1.55,3),1.75,
rep(1.15,7),rep(1.35,7),rep(1.55,16),rep(1.75,11),rep(1.95,6),rep(2.15,4),2.35,
rep(1.55,7),rep(1.75,10),rep(1.95,6),rep(2.15,4),
rep(1.15,2),rep(1.35,2),rep(1.55,9),rep(1.75,12),rep(1.95,2),rep(2.15,4),rep(2.35,2),
1.35,1.55,1.95
)
irisc4d <- c(rep(3,3),rep(4,6),rep(5,9),rep(6,7),7,
rep(4,4),rep(5,3),rep(6,8),rep(7,5),
rep(2,3),rep(3,3),rep(4,20),rep(5,9),rep(6,8),
rep(3,3),rep(4,3),rep(5,7),6,
1, 3,3,rep(4,4),rep(5,4),6,7,
4,5,5,5,rep(6,14),rep(7,4),rep(8,6),9,
2,rep(3,4),rep(4,4),rep(5,4),rep(6,3),
3,rep(4,4),rep(5,10),rep(6,3),
rep(5,7),rep(6,8),rep(7,8),8,8,
3,rep(4,17),rep(5,14),rep(6,3),
2,rep(3,12),rep(4,10),rep(5,3),6,
1,rep(3,12),rep(4,19),rep(5,5),
2,2,rep(3,6),rep(4,15),rep(5,9),rep(6,4),7,
4,4,rep(5,7),rep(6,7),rep(7,9),8,
3,rep(4,5),rep(5,9),rep(6,14),rep(7,3),8,8,
4,5,5,5,6,
rep(4,3),rep(5,11),rep(6,5),7,
rep(4,3),rep(5,7),rep(6,12),7,7,7,
4,4,4,rep(5,7),rep(6,8),rep(7,10),8,8,9,
4,4,4,rep(5,7),rep(6,5),rep(7,9),8,9,
rep(5,6),rep(6,9),rep(7,3),rep(8,6),
3,rep(4,5),rep(5,6),7,
rep(5,3),rep(6,9),7,7,
4,4,5,5,rep(6,7),8,8)
irisc5d <- c(0.55,rep(0.75,3),rep(0.95,8),rep(1.15,6),rep(1.35,5),rep(1.55,2),1.75,
0.75,rep(0.95,3),rep(1.15,3),rep(1.35,5),rep(1.55,6),1.75,1.95,
rep(0.75,6),rep(0.95,4),rep(1.15,17),rep(1.35,8),rep(1.55,8),
rep(0.75,9),rep(0.95,4),1.15,
rep(0.75,5),rep(0.95,7),1.15,
rep(1.15,2),rep(1.35,4),rep(1.55,6),rep(1.75,6),rep(1.95,9),rep(2.15,2),
rep(0.75,2),rep(0.95,5),rep(1.15,4),rep(1.35,4),rep(1.55,1),
rep(0.75,3),rep(0.95,5),rep(1.15,5),rep(1.35,2),rep(1.55,3),
rep(1.35,6),rep(1.55,8),rep(1.75,9),rep(1.95,2),
rep(0.75,5),rep(0.95,13),rep(1.15,10),rep(1.35,5),rep(1.55,2),
0.55,rep(0.75,11),rep(0.95,11),rep(1.15,2),rep(1.35,2),
rep(0.75,10),rep(0.95,9),rep(1.15,11),rep(1.35,7),1.75,
rep(0.75,4),rep(0.95,15),rep(1.15,13),rep(1.35,4),1.55,
rep(1.15,4),rep(1.35,10),rep(1.55,6),rep(1.75,4),rep(1.95,2),
rep(0.95,3),rep(1.15,5),rep(1.35,9),rep(1.55,8),rep(1.75,6),rep(1.95,2),2.15,
0.75,0.95,0.95,1.15,1.15,
rep(0.95,1),rep(1.15,8),rep(1.35,7),rep(1.55,1),rep(1.95,2),2.35,
rep(1.15,1),rep(1.35,5),rep(1.55,8),rep(1.75,9),rep(1.95,1),2.15,
rep(0.95,3),rep(1.15,10),rep(1.35,7),rep(1.55,7),rep(1.75,3),2.35,
rep(0.95,4),rep(1.15,10),rep(1.35,9),rep(1.55,2),rep(1.75,1),
rep(0.95,1),rep(1.15,4),rep(1.35,10),rep(1.55,5),rep(1.95,3),
rep(1.15,2),rep(1.35,6),rep(1.55,4),rep(1.75,1),
rep(1.15,6),rep(1.35,1),rep(1.55,2),rep(1.75,4),1.95,
0.95,rep(1.15,2),rep(1.35,2),rep(1.55,3),rep(1.75,3),1.95,2.15
)
######### Sepal Data #########
irisc6 <- c(rep(5,4),rep(6,6),rep(7,3),rep(8,8),rep(9,3),11,
rep(6,3),rep(8,6),9,10,11,
rep(4,2),rep(5,7),rep(6,3),rep(7,3),rep(8,5),9,
rep(5,6),rep(6,5),rep(7,3),rep(8,2),9,
5,6,7,7,8,9,9,
4,5,rep(6,7),rep(7,15),rep(8,11),rep(9,7),rep(10,3),rep(11,5),
5,rep(6,5),rep(7,8),rep(8,8),rep(9,9),rep(10,3),11,
5,6,7,rep(8,6),rep(9,2),
3,rep(4,4),rep(5,7),rep(6,6),rep(7,3),
rep(4,2),rep(5,5),rep(6,5),rep(7,6),8,
rep(4,2),rep(5,4),rep(6,8),rep(7,14),rep(8,8),rep(9,2),
3,rep(4,2),rep(5,2),rep(6,7),rep(7,8),rep(8,2),rep(9,3),rep(10,2),
5,rep(6,3),rep(7,8),rep(9,2),10,
3,5,5,6,6,rep(7,8),rep(9,4),rep(10,3),11,
rep(5,6),rep(6,7),rep(7,10),rep(8,7),rep(9,6),10,11,
rep(6,2),rep(7,2),rep(9,3),10,
3,rep(4,6),rep(5,13),rep(6,8),rep(7,8),rep(8,5),rep(9,2),
3,5,rep(6,10),rep(7,7),8,8,9,12,
4,5,6,6,7,8,
3,3,rep(4,3),rep(5,13),rep(6,10),rep(7,5),rep(8,7),
rep(6,10),rep(7,8),rep(8,3),rep(9,5),rep(10,3),11,
rep(6,7),rep(7,6),rep(8,10),rep(9,4),10,
rep(5,2),rep(6,8),rep(7,9),rep(8,7),9,
10,
rep(6,3),rep(7,5),rep(8,3),rep(9,3),rep(10,5),12)
irisc6a <- c(rep(8,3),9,9,
6,7,rep(8,3),rep(9,4),rep(10,6),rep(11,2),12,
rep(8,6),9,rep(10,5),rep(11,3),
9,
rep(4,4),rep(5,5),rep(6,7),rep(7,5),rep(8,5),
rep(5,2),rep(6,3),rep(7,2),rep(8,1),
rep(5,1),rep(6,3),rep(7,5),rep(8,1),
rep(6,5),rep(7,8),rep(8,5),rep(9,10),rep(10,3),
1,2,2,rep(3,7),rep(4,5),rep(5,7),rep(6,3),rep(7,2),8,
rep(3,3),rep(4,3),rep(5,6),rep(6,4),rep(7,2),rep(8,4),9,
rep(2,4),rep(3,11),rep(4,21),rep(5,15),rep(6,10),7,
1,rep(2,5),rep(3,13),rep(4,17),rep(5,14),rep(6,6),7,
1,2,rep(3,3),rep(4,3),rep(5,10),
2,rep(3,4),rep(4,12),rep(5,9),rep(6,4),7,8,
1,rep(2,3),rep(3,2),rep(4,6),rep(5,6),rep(6,4),
rep(4,3),rep(5,4),rep(6,3),rep(7,4),8,
rep(5,3),rep(6,6),rep(7,3),8,
2,3,rep(4,7),rep(5,7),rep(6,3),rep(7,2),9,
rep(4,2),rep(5,7),rep(6,10),rep(7,11),rep(8,4),9,
4,rep(5,6),rep(6,8),rep(7,2),8,9,10,
rep(5,4),rep(6,9),rep(7,13),rep(8,4),
4,4,rep(5,4),rep(6,6),rep(7,9),rep(8,6),rep(9,2),10,
rep(4,3),rep(5,7),rep(6,16),rep(7,6),rep(8,3),
rep(3,2),rep(4,3),rep(5,10),rep(6,8),rep(7,2),
rep(2,2),rep(3,2),rep(4,9),rep(5,13),rep(6,12),rep(7,5),rep(8,3)
)
irisc6b <- c(2,rep(3,3),rep(4,4),rep(5,19),rep(6,7),rep(7,7),8,
3,rep(4,4),rep(5,5),rep(6,9),
3,3,rep(4,5),rep(6,2),rep(7,3),8,
rep(3,3),rep(4,2),rep(5,9),rep(6,7),rep(7,6),rep(8,6),9,9,
rep(4,2),rep(5,10),rep(6,10),rep(7,2),rep(8,4),rep(9,2),
rep(4,2),rep(5,2),rep(6,7),rep(7,2),rep(8,3),
rep(4,2),rep(5,7),rep(6,8),rep(7,5),rep(8,7),9,
3,rep(4,3),rep(5,4),rep(6,10),rep(7,4),8,9,11,
2,2,rep(3,6),rep(4,10),rep(5,14),rep(6,11),7,
2,3,3,3,
rep(4,2),rep(5,3),rep(6,3),rep(7,2),
2,3,rep(4,3),rep(5,11),rep(6,18),rep(7,12),8,8,rep(9,3),11,
3,rep(4,6),rep(5,10),rep(6,5),7,rep(8,4),
rep(3,3),rep(4,2),rep(5,5),rep(6,11),rep(7,9),rep(8,2),9,
4,5,6
)
irisc6d <- c(rep(4,4),rep(5,8),rep(6,9),rep(7,5),
rep(4,2),rep(5,3),rep(6,7),rep(7,4),rep(8,4),
rep(1,2),rep(2,1),rep(3,6),rep(4,16),rep(5,7),rep(6,6),rep(7,5),
2,rep(3,3),rep(4,7),rep(5,3),
2,rep(3,5),rep(4,5),5,6,
rep(4,3),rep(5,8),rep(6,8),rep(7,9),10,
rep(2,2),rep(3,3),rep(4,9),rep(5,2),
rep(2,2),rep(3,6),rep(4,7),rep(5,3),
rep(4,4),rep(5,8),rep(6,8),rep(7,4),8,
rep(3,3),rep(4,4),rep(5,14),rep(6,12),7,8,
1,rep(2,3),rep(3,7),rep(4,8),rep(5,7),6,
1,rep(2,4),rep(3,8),rep(4,17),rep(5,7),
rep(1,2),rep(2,1),rep(3,8),rep(4,13),rep(5,9),rep(6,4),
rep(3,2),rep(4,2),rep(5,3),rep(6,11),rep(7,4),rep(8,4),
rep(4,5),rep(5,11),rep(6,11),rep(7,4),rep(8,3),
3,4,5,5,6,
rep(3,2),rep(4,4),rep(5,10),rep(6,3),8,
rep(4,5),rep(5,10),rep(6,10),
rep(5,6),rep(6,11),rep(7,6),rep(8,8),
rep(4,2),rep(5,5),rep(6,8),rep(7,5),rep(8,5),9,
rep(4,3),rep(5,4),rep(6,5),rep(7,7),rep(8,3),rep(9,2),
2,rep(3,2),rep(4,5),rep(5,2),rep(6,3),
3,rep(4,4),rep(5,2),rep(6,5),rep(7,2),
rep(5,6),rep(6,4),rep(7,2),8
)
irisc7 <- c(rep(5,2),rep(6,3),rep(7,7),rep(8,4),rep(9,2),rep(10,5),11,11,
rep(4,2),5,rep(6,2),7,rep(8,4),10,11,
rep(4,1),rep(5,2),rep(6,5),rep(7,3),rep(8,5),rep(9,3),10,11,
rep(6,2),rep(7,3),rep(8,10),rep(10,2),
5,5,7,rep(9,4),
5,rep(7,6),rep(8,12),rep(9,14),rep(10,9),rep(11,5),12,rep(13,2),
rep(7,3),rep(8,6),rep(9,12),rep(10,9),rep(11,4),12,
6,7,8,9,9,10,10,rep(11,3),12,
4,rep(5,4),rep(6,5),rep(7,4),rep(8,3),rep(9,2),rep(10,2),
rep(3,2),rep(4,5),rep(5,2),rep(6,4),rep(8,3),9,
rep(5,2),rep(6,4),rep(7,8),rep(8,10),rep(9,8),rep(10,5),rep(12,2),
4,rep(5,2),rep(6,6),rep(7,6),rep(8,5),rep(9,5),10,
rep(6,4),rep(7,4),rep(8,4),rep(9,3),
rep(4,3),rep(5,3),rep(6,3),rep(7,3),rep(8,3),rep(9,3),rep(10,2),rep(11,3),
4,rep(5,2),rep(6,2),rep(7,6),rep(8,4),rep(9,10),rep(10,5),11,12,12,rep(13,4),14,
8,rep(9,3),10,11,11,12,
3,rep(4,3),rep(5,7),rep(6,7),rep(7,10),rep(8,9),rep(9,4),10,11,
4,5,5,5,6,6,7,7,8,8,8,9,9,9,rep(10,6),11,11,
7,8,8,10,10,12,
4,4,rep(5,4),rep(6,5),rep(7,5),rep(8,13),rep(9,5),10,11,11,12,12,15,
5,6,6,rep(7,5),rep(8,6),rep(9,6),rep(10,5),rep(11,3),rep(12,4),14,
6,8,rep(9,9),rep(10,4),rep(11,7),rep(12,2),rep(13,2),14,
4,6,rep(7,5),rep(8,7),rep(9,7),rep(10,3),rep(11,2),
6,
4,5,6,rep(7,6),rep(8,3),rep(9,6),10,11
)
irisc7a <- c(5,7,8,8,9,
5,rep(7,3),rep(8,3),rep(9,6),rep(10,1),rep(11,2),12,14,
8,rep(9,4),rep(10,6),rep(11,2),rep(12,2),
7,
4,rep(4,3),rep(6,5),rep(7,3),rep(8,5),rep(9,4),10,rep(11,3),14,
rep(5,1),rep(6,1),rep(8,3),rep(9,1),10,11,
rep(6,4),rep(7,2),rep(8,1),rep(9,2),11,
5,rep(6,2),rep(7,6),rep(8,4),rep(9,7),rep(10,4),rep(11,4),12,12,13,
2,2,rep(3,7),rep(4,5),rep(5,4),rep(6,4),rep(7,3),8,8,9,
rep(4,6),rep(5,1),rep(6,5),rep(7,6),rep(8,3),9,9,
rep(3,2),rep(4,11),rep(5,13),rep(6,16),rep(7,15),rep(8,2),9,10,11,
rep(3,2),rep(4,9),rep(5,12),rep(6,12),rep(7,10),rep(8,6),rep(9,4),rep(10,2),
3,rep(5,6),rep(6,3),rep(7,6),rep(6,2),
4,rep(5,9),rep(6,5),rep(7,8),rep(8,4),rep(9,4),12,
2,rep(4,2),rep(5,3),rep(6,5),rep(7,9),rep(9,2),
rep(5,2),rep(6,2),rep(7,3),rep(8,4),rep(9,3),11,
3,rep(5,3),rep(6,3),rep(7,2),rep(8,2),rep(11,2),
4,5,5,rep(6,4),rep(7,4),rep(8,6),rep(9,2),rep(10,3),
rep(4,2),rep(5,1),rep(6,4),rep(7,8),rep(8,8),rep(9,8),rep(10,2),rep(11,2),
rep(5,3),rep(6,5),rep(7,5),rep(8,4),rep(9,1),rep(10,2),
rep(6,2),rep(7,8),rep(8,7),rep(9,8),rep(10,3),rep(12,2),
4,rep(5,4),rep(6,5),rep(7,6),rep(8,6),rep(9,6),rep(10,2),
4,rep(5,3),rep(6,7),rep(7,5),rep(8,10),rep(9,6),10,11,12,
rep(4,3),rep(5,10),rep(6,4),rep(7,6),8,9,
rep(3,2),rep(4,5),rep(5,13),rep(6,12),rep(7,6),rep(8,4),9,9,10,10
)
irisc7b <- c(3,3,rep(4,6),rep(5,12),rep(6,9),rep(7,9),rep(8,4),
4,rep(5,4),rep(6,5),rep(7,5),rep(8,4),
rep(5,4),rep(6,6),rep(7,3),
rep(4,3),rep(5,2),rep(6,9),rep(7,6),rep(8,8),rep(9,3),rep(10,2),rep(11,2),
2,rep(4,2),5,rep(6,3),rep(7,6),rep(8,8),rep(9,7),rep(10,2),
5,rep(6,3),rep(7,3),rep(8,3),rep(9,4),rep(10,2),
4,5,rep(6,2),rep(7,5),rep(8,6),rep(9,7),rep(10,5),rep(11,2),12,
4,5,6,6,7,7,rep(8,6),rep(9,3),rep(10,4),rep(11,3),12,12,13,
rep(2,4),rep(4,5),rep(5,11),rep(6,10),rep(7,4),rep(8,6),rep(9,2),10,11,
4,5,5,6,
3,rep(5,4),rep(6,4),8,
5,rep(6,11),rep(7,15),rep(8,8),rep(9,6),rep(10,9),rep(11,2),
5,5,rep(6,6),rep(7,3),rep(8,5),rep(9,7),10,rep(11,3),
rep(4,2),rep(5,7),rep(6,10),rep(7,3),rep(8,6),rep(9,4),10,
7,8,9
)
irisc7d <- c(rep(4,1),rep(5,2),rep(6,5),rep(7,4),rep(8,9),rep(9,3),rep(10,1),11,
7,rep(8,6),rep(9,4),rep(10,6),rep(11,3),
rep(4,2),rep(5,1),rep(6,6),rep(7,16),rep(8,7),rep(9,6),rep(10,5),
3, rep(5,4),rep(6,4),rep(7,4),8,
3,4,rep(5,3),6,6,7,7,rep(8,3),10,
5,7,8,rep(9,6),rep(10,10),rep(11,7),rep(12,3),
4,5,5,6,6,6,rep(7,6),rep(8,3),9,
4,5,rep(6,3),rep(7,8),rep(8,3),rep(9,2),
rep(6,2),rep(7,4),rep(8,6),rep(9,10),rep(10,2),11,
4,rep(5,5),rep(6,4),rep(7,8),rep(8,3),rep(9,10),rep(11,2),rep(12,2),
rep(3,2),rep(4,6),rep(5,7),rep(6,8),rep(7,3),8,
rep(3,3),rep(4,7),rep(5,4),rep(6,11),rep(7,6),rep(8,5),9,
3,4,rep(5,4),rep(6,12),rep(7,8),rep(8,8),rep(9,3),
5,5,rep(6,3),rep(7,3),rep(8,4),rep(9,8),rep(10,3),11,rep(12,2),
5,rep(6,2),rep(7,4),rep(8,5),rep(9,8),rep(10,8),rep(11,4),rep(12,2),
4,5,7,8,9,
5,rep(7,3),rep(8,8),rep(9,3),rep(10,3),11,12,
6,7,rep(8,7),rep(9,5),rep(10,6),rep(11,3),12,13,
rep(6,3),rep(7,6),rep(8,5),rep(9,6),rep(10,4),rep(11,3),12,12,13,13,
rep(7,2),rep(8,3),rep(9,5),rep(10,10),rep(11,3),rep(12,2),13,
rep(6,2),rep(7,4),rep(8,9),rep(10,3),rep(11,2),rep(12,3),13,
rep(7,4),8,rep(9,4),rep(10,3),11,
rep(6,2),rep(7,4),rep(8,3),rep(9,5),
6,6,rep(8,2),rep(9,5),rep(10,2),12,13
)
#######################################################################
#
# Transform the Data from Columns to Numerical
#
######################################################################
irisc4 <- round(irisc4*0.4+1.65+runif(583, -0.2, 0.2),1)
irisc4a <- round(irisc4a*0.4+1.65+runif(627, -0.2, 0.2),1)
irisc4b <- round(irisc4b*0.4+1.65+runif(383, -0.2, 0.2),1)
irisc4d <- round(irisc4d*0.4+1.65+runif(571, -0.2, 0.2),1)
irisc5 <- round(irisc5+runif(583, -0.1, 0.1),1)
irisc5a <- round(irisc5a+runif(627, -0.1, 0.1),1)
irisc5b <- round(irisc5b+runif(383, -0.1, 0.1),1)
irisc5d <- round(irisc5d+runif(571, -0.1, 0.1),1)
irisc6 <- round(irisc6*0.4+2.55+runif(583, -0.2, 0.2),1)
irisc6a <- round(irisc6a*0.4+2.55+runif(627, -0.2, 0.2),1)
irisc6b <- round(irisc6b*0.4+2.55+runif(383, -0.2, 0.2),1)
irisc6d <- round(irisc6d*0.4+2.55+runif(571, -0.2, 0.2),1)
irisc7 <- round(irisc7*0.2+1.25+runif(583, -0.1, 0.1),1)
irisc7a <- round(irisc7a*0.2+1.25+runif(627, -0.1, 0.1),1)
irisc7b <- round(irisc7b*0.2+1.25+runif(383, -0.1, 0.1),1)
irisc7d <- round(irisc7d*0.2+1.25+runif(571, -0.1, 0.1),1)
species1 <- c(rep("Virginica",583))
species2 <- c(rep("Virginica",627))
species3 <- c(rep("Virginica",383))
species4 <- c(rep("Versicolor",571))
d1 <- data.frame(irisc1,irisc2,irisc3,irisc4,irisc5,irisc6,irisc7,species1)
d2 <- data.frame(irisc1a,irisc2a,irisc3a,irisc4a,irisc5a,irisc6a,irisc7a,species2)
d3 <- data.frame(irisc1b,irisc2b,irisc3b,irisc4b,irisc5b,irisc6b,irisc7b,species3)
d4 <- data.frame(irisc1d,irisc2d,irisc3d,irisc4d,irisc5d,irisc6d,irisc7d,species4)
colnames(d1) <- c("Year", "Location", "State", "Petal.Length", "Petal.Width",
"Sepal.Length", "Sepal.Width", "Species")
colnames(d2) <- c("Year", "Location", "State", "Petal.Length", "Petal.Width",
"Sepal.Length", "Sepal.Width", "Species")
colnames(d3) <- c("Year", "Location", "State", "Petal.Length", "Petal.Width",
"Sepal.Length", "Sepal.Width", "Species")
colnames(d4) <- c("Year", "Location", "State", "Petal.Length", "Petal.Width",
"Sepal.Length", "Sepal.Width", "Species")
d5 <- rbind(d1,d2,d3,d4)
write_csv(d5, "Andersons_Irises_1928.csv")