'dplyr mutate only works once
I'd like to add a new variable to a dataframe for plotting labels, as seen in the top voted answer here
Here's the data:
small <- structure(list(Site = structure(c(1L, 20L, 20L, 6L, 18L, 7L,
8L, 4L, 6L, 20L, 15L, 8L, 14L, 3L, 20L, 4L, 20L, 1L, 15L, 18L,
15L, 1L, 15L, 11L, 20L, 20L, 16L, 4L, 14L, 3L, 2L, 4L, 4L, 11L,
14L, 4L, 15L, 20L, 20L, 18L, 15L, 14L, 4L, 20L, 6L, 4L, 4L, 15L,
7L, 20L, 2L, 5L, 6L, 3L, 7L, 14L, 14L, 5L, 4L, 14L, 20L, 12L,
6L, 5L, 20L, 4L, 8L, 20L, 4L, 4L, 15L, 6L, 5L, 20L, 14L, 13L,
8L, 20L, 14L, 20L, 8L, 3L, 18L, 6L, 19L, 19L, 20L, 19L, 18L,
4L, 1L, 8L, 4L, 1L, 6L, 4L, 7L, 6L, 14L, 15L), .Label = c("1305",
"1307", "1308", "1309", "1312", "1313", "1314", "1316", "1454",
"1455", "1456", "1457", "1458", "1459", "1461", "1462", "51242",
"735", "739", "cc82"), class = "factor"), Sample = c(807001,
549001, 1776001, 708001, 1428001, 144001, 273001, 2031001, 186001,
4449001, 495001, 1, 75001, 1395001, 801001, 393001, 48001, 186001,
633001, 252001, 1047001, 780001, 1149001, 123001, 1803001, 1920001,
93001, 1539001, 1278001, 252001, 129001, 2415001, 4839001, 222001,
798001, 2709001, 1350001, 2763001, 2367001, 852001, 1239001,
1344001, 2364001, 3141001, 1383001, 4284001, 213001, 696001,
198001, 4521001, 156001, 822001, 1056001, 753001, 48001, 678001,
132001, 189001, 2958001, 951001, 351001, 183001, 726001, 939001,
942001, 3144001, 393001, 2559001, 1230001, 3507001, 825001, 1314001,
525001, 2643001, 1080001, 48001, 657001, 129001, 219001, 2853001,
27001, 1002001, 372001, 1197001, 183001, 546001, 3189001, 1191001,
225001, 4173001, 66001, 687001, 882001, 453001, 771001, 1749001,
39001, 579001, 837001, 2130001), Species = c(1034.99999063637,
1034.38745958467, 1034.93602944106, 1034.86617039632, 1034.99994688683,
1031.33494747455, 1034.01286119925, 1034.93820951036, 1034.04207802324,
1034.99999978378, 1034.7195310584, 1.0000000000006, 1032.22721814454,
1034.99998599318, 1034.61810976437, 1033.93049848926, 1029.76888222768,
1034.4773467232, 1034.8464918186, 1033.88103519209, 1034.98062874486,
1034.99997102765, 1034.9894164954, 1031.22209052865, 1034.9393924588,
1034.95228329769, 1029.57766168241, 1034.84617251106, 1034.99716319884,
1034.01759652049, 1034.12421865692, 1034.97165766244, 1034.99999931756,
1032.5264160899, 1034.9302506075, 1034.9852789914, 1034.99732449456,
1034.99393176586, 1034.98256074378, 1034.82915901302, 1034.99407894687,
1034.99848385292, 1034.96842702321, 1034.9982024792, 1034.99990895745,
1034.99994016069, 1033.46577496826, 1034.8847185144, 1032.07439991374,
1034.99999994015, 1034.37061686683, 1033.9995317372, 1034.98014310935,
1034.7765851292, 1023.56493233315, 1034.87292826712, 1033.5159578377,
1033.67572213954, 1034.99198243374, 1034.97016563294, 1034.08306140893,
1024.75667862308, 1034.87575799201, 1033.99992196217, 1034.70367833352,
1034.99510596938, 1034.31114418187, 1034.9893038116, 1034.73597419662,
1034.99835620314, 1034.93758219921, 1034.99920511546, 1033.98476342599,
1034.99147574006, 1034.98682314168, 990.598200299325, 1034.72635751567,
1033.22853472628, 1034.07153177872, 1034.99535891042, 1020.83157812414,
1034.91858260823, 1034.25487458056, 1034.99478390852, 1033.10757669472,
1033.94643834015, 1034.99848918266, 1034, 1033.75393660622, 1034.99989098177,
1032.40178509791, 1034.75797238777, 1034.52419010051, 1034.97051191563,
1034.8976530452, 1034.89479904612, 1019.98410325148, 1034.78158579658,
1034.94325878046, 1035)), class = "data.frame", row.names = c(NA,
-100L))
The new
df should contain a 4th column called lab
, with the Site
value in it if that row had the highest Sample
value, otherwise it should be empty:
new = small %>%
mutate(lab = if_else(Sample == max(Sample), as.character(Site), NA_character_))
Instead, only the highest Sample for the 1309
site value has a lab
entry. There should be one lab
entry for every highest Site
. The Site
variable is a factor.
This image is an example that works from the link above, where there is one label entry for each state with the highest year:
This is the toy data from that question where grouping was not required:
temp.dat <- structure(list(Year = c("2003", "2004", "2005", "2006", "2007",
"2008", "2009", "2010", "2011", "2012", "2013", "2014", "2003",
"2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011",
"2012", "2013", "2014", "2003", "2004", "2005", "2006", "2007",
"2008", "2009", "2010", "2011", "2012", "2013", "2014", "2003",
"2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011",
"2012", "2013", "2014"), State = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("VIC",
"NSW", "QLD", "WA"), class = "factor"), Capex = c(5.35641472365348,
5.76523240652641, 5.24727577535625, 5.57988239709746, 5.14246402568366,
4.96786288162828, 5.493190785287, 6.08500616799372, 6.5092228474591,
7.03813541623157, 8.34736513875897, 9.04992300432169, 7.15830329914056,
7.21247045701994, 7.81373928617117, 7.76610217197542, 7.9744994967006,
7.93734452080786, 8.29289899132255, 7.85222269563982, 8.12683746325074,
8.61903784301649, 9.7904327253813, 9.75021175267288, 8.2950673974226,
6.6272705639724, 6.50170524635367, 6.15609626379471, 6.43799637295979,
6.9869551384028, 8.36305663640294, 8.31382617231745, 8.65409824343971,
9.70529678167458, 11.3102788081848, 11.8696420977237, 6.77937303542605,
5.51242844820827, 5.35789621712839, 4.38699327451101, 4.4925792218211,
4.29934654081527, 4.54639175257732, 4.70040615159951, 5.04056109514957,
5.49921208937735, 5.96590909090909, 6.18700407463007)), class = "data.frame", row.names = c(NA,
-48L), .Names = c("Year", "State", "Capex"))
And running this correctly assigns the state to the max(year) without grouping as seen in the screenshot:
temp.dat %>%
mutate(label = if_else(Year == max(Year), as.character(State), NA_character_))
Solution 1:[1]
If you want this done for every Site, you need to put group_by(Site)
before the mutate
.
small %>%
group_by(Site) %>%
mutate(lab = if_else(Sample == max(Sample), as.character(Site), NA_character_))
In the other question, you want to label the max year. Every site has the same max year, 2014. The global max is the same as the group max in every group, so no grouping is needed. (Though grouping wouldn't hurt anything and would make the code more robust in case some site didn't have the same max year.)
Here, every site has a different max sample and only one site has the global max sample. If you want the max sample by site, you need group_by(Site)
.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | Gregor Thomas |