How can I subset a data frame for cases matching on all but 2 variables?

-1

I am working with a data set in which a subset of participants repeated some condition combinations. I would like to create a single data set with just the conditions that have two repetitions, so that I can check reliability and consistency. So the final data set will only contain subjects and conditions that were done twice, so with a first rep value and a second rep value.

Example data:

Data <- structure(list(Sub = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 

4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 

4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 

4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L), .Label = c("1", "2", "4", "7", "8", "9", "10", "11", "12", 

"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"

), class = "factor"), Sys = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 

1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 

1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L

), Samp = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 

1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 

1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("T1", 

"T2", "T3", "T4"), class = "factor"), Cond = c("A", "A", "A", 

"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 

"A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", 

"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", 

"B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C", 

"C", "D", "D", "D", "D", "D", "D", "D", "D", "E", "E", "E", "E", 

"E", "E", "E", "E", "C", "C", "C", "C", "C", "C", "C", "C", "D", 

"D", "D", "D", "D", "D", "D", "D", "E", "E", "E", "E", "E", "E", 

"E", "E", "C", "C", "C", "C", "C", "C", "C", "C", "D", "D", "D", 

"D", "D", "D", "D", "D", "E", "E", "E", "E", "E", "E", "E", "E", 

"C", "C", "C", "C", "C", "C", "C", "C", "D", "D", "D", "D", "D", 

"D", "D", "D", "C", "C", "C", "C", "C", "C", "C", "C", "D", "D", 

"D", "D", "D", "D", "D", "D", "C", "C", "C", "C", "C", "C", "C", 

"C", "D", "D", "D", "D", "D", "D", "D", "D"), Rep = c(1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 

2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 

2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 

2L, 1L, 2L, 1L, 2L, 1L), Score = c(92.6666666666667, 77.6666666666667, 

106.333333333333, 47.3333333333333, 70.3333333333333, 48.6666666666667, 

96.6666666666667, 51.6666666666667, 80.3333333333333, 55.3333333333333, 

55.6666666666667, 22.3333333333333, 71.6666666666667, 31.6666666666667, 

74.3333333333333, 15.6666666666667, 67.3333333333333, 48.6666666666667, 

54.6666666666667, 41.3333333333333, 74, 54, 97, 72.3333333333333, 

26.625, 7.5, 5.25, -1.5, 6, -33, -8.25000000000001, -32.25, 19.875, 

4.5, 9.75, 5.625, -16.5, -21, 18.375, -16.5, -16.875, -18, -18.375, 

0.375, 0, 0, -17.25, -18, 68.3333333333333, 51, 102, 85.3333333333333, 

64.3333333333333, 50.3333333333333, 93.3333333333333, 104.333333333333, 

27, 76, 50, 44, 81, 91, 59.3333333333333, 91.6666666666667, 80.6666666666667, 

32.6666666666667, 96, 67, 42.3333333333333, 49.3333333333333, 

71, 63.3333333333333, 59, 47.3333333333333, 70.3333333333333, 

67.3333333333333, 56.3333333333333, 60.3333333333333, 115.333333333333, 

112.333333333333, 36.3333333333333, 80.3333333333333, 40.3333333333333, 

97, 87, 100.333333333333, 61.6666666666667, 104, 71, 28.6666666666667, 

88, 29.6666666666667, 47.6666666666667, 25.6666666666667, 62.6666666666667, 

35.6666666666667, 109, 66.3333333333333, 112, 39.6666666666667, 

69.6666666666667, 98.3333333333333, 74, 40.6666666666667, 47.3333333333333, 

75.6666666666667, 43.6666666666667, 74.3333333333333, 43, 75, 

39, 73.6666666666667, 68.6666666666667, 36.3333333333333, 107.666666666667, 

41, 98.6666666666667, 65.6666666666667, 74.6666666666667, 75.6666666666667, 

68.3333333333333, 68.3333333333333, 41, 51, 100, 102, 78, 85.3333333333333, 

36.3333333333333, 27, 88, 76, 45.3333333333333, 50, 37, 44, 37.3333333333333, 

59, 34.3333333333333, 47.3333333333333, 72.3333333333333, 70.3333333333333, 

67.6666666666667, 67.3333333333333, 56, 36.3333333333333, 83.3333333333333, 

80.3333333333333, 61, 40.3333333333333, 78.6666666666667, 97, 

67.6666666666667, 109, 67.3333333333333, 66.3333333333333, 97, 

112, 70.6666666666667, 39.6666666666667, 40.3333333333333, 47.3333333333333, 

95, 75.6666666666667, 71.6666666666667, 43.6666666666667, 72.6666666666667, 

74.3333333333333)), class = "data.frame", row.names = c(NA, -168L

))

If I look at just the number of rows of the repeated conditions I see 24 observations:

Data %>% 

    filter(Rep == 2) %>%

    nrow()

[1] 24

If I look at just the first reps I see 144 observations:

Data %>% 

    filter(Rep == 1) %>%

    nrow()

[1] 144

If I try the _join functions from dplyr I get back more than just the matched cases; I get 48 rows returned:

Joined_Data <- right_join(Data %>% 

                 filter(Rep == 1) %>%

                 rename("Score_1" = Score) %>%

                 select(-Rep),

               Data %>% 

                 filter(Rep == 2) %>%

                 rename("Score_2" = Score) %>%

                 select(-Rep),

               by = c("Sub", "Sys", "Samp", "Cond")) 





nrow(Joined_Data)

[1] 48

This seems to be because each row is duplicated:

head(Joined_Data)

  Sub Sys Samp Cond   Score_1   Score_2

1   1   1   T1    C  68.33333  68.33333

2   1   1   T1    C  68.33333  68.33333

3   1   2   T1    C  51.00000  41.00000

4   1   2   T1    C  51.00000  41.00000

5   1   1   T2    C 102.00000 100.00000

6   1   1   T2    C 102.00000 100.00000

I can drop the duplicates by passing the tibble through distinct(), but the approach seems messy.

Passing the above through distinct() does give the desired output

Joined_Data <- right_join(Data %>% 

                     filter(Rep == 1) %>%

                     rename("Score_1" = Score) %>%

                     select(-Rep),

                   Data %>% 

                     filter(Rep == 2) %>%

                     rename("Score_2" = Score) %>%

                     select(-Rep),

                   by = c("Sub", "Sys", "Samp", "Cond")) %>%

                 distinct()



   Sub Sys Samp Cond   Score_1   Score_2

1    1   1   T1    C  68.33333  68.33333

2    1   2   T1    C  51.00000  41.00000

3    1   1   T2    C 102.00000 100.00000

4    1   2   T2    C  85.33333  78.00000

5    1   1   T1    D  27.00000  36.33333

6    1   2   T1    D  76.00000  88.00000

7    1   1   T2    D  50.00000  45.33333

8    1   2   T2    D  44.00000  37.00000

9    7   1   T1    C  59.00000  37.33333

10   7   2   T1    C  47.33333  34.33333

11   7   1   T2    C  70.33333  72.33333

12   7   2   T2    C  67.33333  67.66667

13   7   1   T1    D  36.33333  56.00000

14   7   2   T1    D  80.33333  83.33333

15   7   1   T2    D  40.33333  61.00000

16   7   2   T2    D  97.00000  78.66667

17  11   1   T1    C 109.00000  67.66667

18  11   2   T1    C  66.33333  67.33333

19  11   1   T2    C 112.00000  97.00000

20  11   2   T2    C  39.66667  70.66667

21  11   1   T1    D  47.33333  40.33333

22  11   2   T1    D  75.66667  95.00000

23  11   1   T2    D  43.66667  71.66667

24  11   2   T2    D  74.33333  72.66667

Then I can gather this tibble back into a long format:

Desired output:

Joined_Data %>%

    gather(Rep, Rating, Rating_1:Rating_2) %>%

      separate(Rep, c(NA, "Rep"), sep = "_", remove = T )



       Sub Sys Samp Cond Rep     Score

    1    1   1   T1    C   1  68.33333

    2    1   2   T1    C   1  51.00000

    3    1   1   T2    C   1 102.00000

    4    1   2   T2    C   1  85.33333

    5    1   1   T1    D   1  27.00000

    6    1   2   T1    D   1  76.00000

    7    1   1   T2    D   1  50.00000

    8    1   2   T2    D   1  44.00000

    9    7   1   T1    C   1  59.00000

    10   7   2   T1    C   1  47.33333

    11   7   1   T2    C   1  70.33333

    12   7   2   T2    C   1  67.33333

    13   7   1   T1    D   1  36.33333

    14   7   2   T1    D   1  80.33333

    15   7   1   T2    D   1  40.33333

    16   7   2   T2    D   1  97.00000

    17  11   1   T1    C   1 109.00000

    18  11   2   T1    C   1  66.33333

    19  11   1   T2    C   1 112.00000

    20  11   2   T2    C   1  39.66667

    21  11   1   T1    D   1  47.33333

    22  11   2   T1    D   1  75.66667

    23  11   1   T2    D   1  43.66667

    24  11   2   T2    D   1  74.33333

    25   1   1   T1    C   2  68.33333

    26   1   2   T1    C   2  41.00000

    27   1   1   T2    C   2 100.00000

    28   1   2   T2    C   2  78.00000

    29   1   1   T1    D   2  36.33333

    30   1   2   T1    D   2  88.00000

    31   1   1   T2    D   2  45.33333

    32   1   2   T2    D   2  37.00000

    33   7   1   T1    C   2  37.33333

    34   7   2   T1    C   2  34.33333

    35   7   1   T2    C   2  72.33333

    36   7   2   T2    C   2  67.66667

    37   7   1   T1    D   2  56.00000

    38   7   2   T1    D   2  83.33333

    39   7   1   T2    D   2  61.00000

    40   7   2   T2    D   2  78.66667

    41  11   1   T1    C   2  67.66667

    42  11   2   T1    C   2  67.33333

    43  11   1   T2    C   2  97.00000

    44  11   2   T2    C   2  70.66667

    45  11   1   T1    D   2  40.33333

    46  11   2   T1    D   2  95.00000

    47  11   1   T2    D   2  71.66667

    48  11   2   T2    D   2  72.66667

This seems like a lot of clunky steps, so I'm wondering if there is a more efficient way to subset the data to only only have specific cases that have 2 scores (one from the first rep, one from the second) and disregard/drop cases with only 1 rep/score.

Is there a cleaner/better way to accomplish the above?

edited Nov 22 '18 at 20:53

asked Nov 21 '18 at 4:20

JLC

1699

2

What is your expected output? If you want rows with only two repetitions doesn't Data %>% filter(Rep == 2) already give you what you want ?

– Ronak Shah
Nov 21 '18 at 4:48

your question is very confusion , please provide more information

– Hunaidkhan
Nov 21 '18 at 4:54

add a comment |

-1

Example data:

Data <- structure(list(Sub = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 

4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 

4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 

4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L), .Label = c("1", "2", "4", "7", "8", "9", "10", "11", "12", 

"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"

), class = "factor"), Sys = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 

1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 

1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L

), Samp = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 

1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 

1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("T1", 

"T2", "T3", "T4"), class = "factor"), Cond = c("A", "A", "A", 

"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 

"A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", 

"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", 

"B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C", 

"C", "D", "D", "D", "D", "D", "D", "D", "D", "E", "E", "E", "E", 

"E", "E", "E", "E", "C", "C", "C", "C", "C", "C", "C", "C", "D", 

"D", "D", "D", "D", "D", "D", "D", "E", "E", "E", "E", "E", "E", 

"E", "E", "C", "C", "C", "C", "C", "C", "C", "C", "D", "D", "D", 

"D", "D", "D", "D", "D", "E", "E", "E", "E", "E", "E", "E", "E", 

"C", "C", "C", "C", "C", "C", "C", "C", "D", "D", "D", "D", "D", 

"D", "D", "D", "C", "C", "C", "C", "C", "C", "C", "C", "D", "D", 

"D", "D", "D", "D", "D", "D", "C", "C", "C", "C", "C", "C", "C", 

"C", "D", "D", "D", "D", "D", "D", "D", "D"), Rep = c(1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 

2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 

2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 

2L, 1L, 2L, 1L, 2L, 1L), Score = c(92.6666666666667, 77.6666666666667, 

106.333333333333, 47.3333333333333, 70.3333333333333, 48.6666666666667, 

96.6666666666667, 51.6666666666667, 80.3333333333333, 55.3333333333333, 

55.6666666666667, 22.3333333333333, 71.6666666666667, 31.6666666666667, 

74.3333333333333, 15.6666666666667, 67.3333333333333, 48.6666666666667, 

54.6666666666667, 41.3333333333333, 74, 54, 97, 72.3333333333333, 

26.625, 7.5, 5.25, -1.5, 6, -33, -8.25000000000001, -32.25, 19.875, 

4.5, 9.75, 5.625, -16.5, -21, 18.375, -16.5, -16.875, -18, -18.375, 

0.375, 0, 0, -17.25, -18, 68.3333333333333, 51, 102, 85.3333333333333, 

64.3333333333333, 50.3333333333333, 93.3333333333333, 104.333333333333, 

27, 76, 50, 44, 81, 91, 59.3333333333333, 91.6666666666667, 80.6666666666667, 

32.6666666666667, 96, 67, 42.3333333333333, 49.3333333333333, 

71, 63.3333333333333, 59, 47.3333333333333, 70.3333333333333, 

67.3333333333333, 56.3333333333333, 60.3333333333333, 115.333333333333, 

112.333333333333, 36.3333333333333, 80.3333333333333, 40.3333333333333, 

97, 87, 100.333333333333, 61.6666666666667, 104, 71, 28.6666666666667, 

88, 29.6666666666667, 47.6666666666667, 25.6666666666667, 62.6666666666667, 

35.6666666666667, 109, 66.3333333333333, 112, 39.6666666666667, 

69.6666666666667, 98.3333333333333, 74, 40.6666666666667, 47.3333333333333, 

75.6666666666667, 43.6666666666667, 74.3333333333333, 43, 75, 

39, 73.6666666666667, 68.6666666666667, 36.3333333333333, 107.666666666667, 

41, 98.6666666666667, 65.6666666666667, 74.6666666666667, 75.6666666666667, 

68.3333333333333, 68.3333333333333, 41, 51, 100, 102, 78, 85.3333333333333, 

36.3333333333333, 27, 88, 76, 45.3333333333333, 50, 37, 44, 37.3333333333333, 

59, 34.3333333333333, 47.3333333333333, 72.3333333333333, 70.3333333333333, 

67.6666666666667, 67.3333333333333, 56, 36.3333333333333, 83.3333333333333, 

80.3333333333333, 61, 40.3333333333333, 78.6666666666667, 97, 

67.6666666666667, 109, 67.3333333333333, 66.3333333333333, 97, 

112, 70.6666666666667, 39.6666666666667, 40.3333333333333, 47.3333333333333, 

95, 75.6666666666667, 71.6666666666667, 43.6666666666667, 72.6666666666667, 

74.3333333333333)), class = "data.frame", row.names = c(NA, -168L

))

If I look at just the number of rows of the repeated conditions I see 24 observations:

Data %>% 

    filter(Rep == 2) %>%

    nrow()

[1] 24

If I look at just the first reps I see 144 observations:

Data %>% 

    filter(Rep == 1) %>%

    nrow()

[1] 144

If I try the _join functions from dplyr I get back more than just the matched cases; I get 48 rows returned:

Joined_Data <- right_join(Data %>% 

                 filter(Rep == 1) %>%

                 rename("Score_1" = Score) %>%

                 select(-Rep),

               Data %>% 

                 filter(Rep == 2) %>%

                 rename("Score_2" = Score) %>%

                 select(-Rep),

               by = c("Sub", "Sys", "Samp", "Cond")) 





nrow(Joined_Data)

[1] 48

This seems to be because each row is duplicated:

head(Joined_Data)

  Sub Sys Samp Cond   Score_1   Score_2

1   1   1   T1    C  68.33333  68.33333

2   1   1   T1    C  68.33333  68.33333

3   1   2   T1    C  51.00000  41.00000

4   1   2   T1    C  51.00000  41.00000

5   1   1   T2    C 102.00000 100.00000

6   1   1   T2    C 102.00000 100.00000

I can drop the duplicates by passing the tibble through distinct(), but the approach seems messy.

Passing the above through distinct() does give the desired output

Joined_Data <- right_join(Data %>% 

                     filter(Rep == 1) %>%

                     rename("Score_1" = Score) %>%

                     select(-Rep),

                   Data %>% 

                     filter(Rep == 2) %>%

                     rename("Score_2" = Score) %>%

                     select(-Rep),

                   by = c("Sub", "Sys", "Samp", "Cond")) %>%

                 distinct()



   Sub Sys Samp Cond   Score_1   Score_2

1    1   1   T1    C  68.33333  68.33333

2    1   2   T1    C  51.00000  41.00000

3    1   1   T2    C 102.00000 100.00000

4    1   2   T2    C  85.33333  78.00000

5    1   1   T1    D  27.00000  36.33333

6    1   2   T1    D  76.00000  88.00000

7    1   1   T2    D  50.00000  45.33333

8    1   2   T2    D  44.00000  37.00000

9    7   1   T1    C  59.00000  37.33333

10   7   2   T1    C  47.33333  34.33333

11   7   1   T2    C  70.33333  72.33333

12   7   2   T2    C  67.33333  67.66667

13   7   1   T1    D  36.33333  56.00000

14   7   2   T1    D  80.33333  83.33333

15   7   1   T2    D  40.33333  61.00000

16   7   2   T2    D  97.00000  78.66667

17  11   1   T1    C 109.00000  67.66667

18  11   2   T1    C  66.33333  67.33333

19  11   1   T2    C 112.00000  97.00000

20  11   2   T2    C  39.66667  70.66667

21  11   1   T1    D  47.33333  40.33333

22  11   2   T1    D  75.66667  95.00000

23  11   1   T2    D  43.66667  71.66667

24  11   2   T2    D  74.33333  72.66667

Then I can gather this tibble back into a long format:

Desired output:

Joined_Data %>%

    gather(Rep, Rating, Rating_1:Rating_2) %>%

      separate(Rep, c(NA, "Rep"), sep = "_", remove = T )



       Sub Sys Samp Cond Rep     Score

    1    1   1   T1    C   1  68.33333

    2    1   2   T1    C   1  51.00000

    3    1   1   T2    C   1 102.00000

    4    1   2   T2    C   1  85.33333

    5    1   1   T1    D   1  27.00000

    6    1   2   T1    D   1  76.00000

    7    1   1   T2    D   1  50.00000

    8    1   2   T2    D   1  44.00000

    9    7   1   T1    C   1  59.00000

    10   7   2   T1    C   1  47.33333

    11   7   1   T2    C   1  70.33333

    12   7   2   T2    C   1  67.33333

    13   7   1   T1    D   1  36.33333

    14   7   2   T1    D   1  80.33333

    15   7   1   T2    D   1  40.33333

    16   7   2   T2    D   1  97.00000

    17  11   1   T1    C   1 109.00000

    18  11   2   T1    C   1  66.33333

    19  11   1   T2    C   1 112.00000

    20  11   2   T2    C   1  39.66667

    21  11   1   T1    D   1  47.33333

    22  11   2   T1    D   1  75.66667

    23  11   1   T2    D   1  43.66667

    24  11   2   T2    D   1  74.33333

    25   1   1   T1    C   2  68.33333

    26   1   2   T1    C   2  41.00000

    27   1   1   T2    C   2 100.00000

    28   1   2   T2    C   2  78.00000

    29   1   1   T1    D   2  36.33333

    30   1   2   T1    D   2  88.00000

    31   1   1   T2    D   2  45.33333

    32   1   2   T2    D   2  37.00000

    33   7   1   T1    C   2  37.33333

    34   7   2   T1    C   2  34.33333

    35   7   1   T2    C   2  72.33333

    36   7   2   T2    C   2  67.66667

    37   7   1   T1    D   2  56.00000

    38   7   2   T1    D   2  83.33333

    39   7   1   T2    D   2  61.00000

    40   7   2   T2    D   2  78.66667

    41  11   1   T1    C   2  67.66667

    42  11   2   T1    C   2  67.33333

    43  11   1   T2    C   2  97.00000

    44  11   2   T2    C   2  70.66667

    45  11   1   T1    D   2  40.33333

    46  11   2   T1    D   2  95.00000

    47  11   1   T2    D   2  71.66667

    48  11   2   T2    D   2  72.66667

Is there a cleaner/better way to accomplish the above?

edited Nov 22 '18 at 20:53

asked Nov 21 '18 at 4:20

JLC

1699

2

What is your expected output? If you want rows with only two repetitions doesn't Data %>% filter(Rep == 2) already give you what you want ?

– Ronak Shah
Nov 21 '18 at 4:48

your question is very confusion , please provide more information

– Hunaidkhan
Nov 21 '18 at 4:54

add a comment |

-1

Example data:

Data <- structure(list(Sub = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 

4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 

4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 

4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L), .Label = c("1", "2", "4", "7", "8", "9", "10", "11", "12", 

"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"

), class = "factor"), Sys = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 

1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 

1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L

), Samp = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 

1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 

1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("T1", 

"T2", "T3", "T4"), class = "factor"), Cond = c("A", "A", "A", 

"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 

"A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", 

"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", 

"B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C", 

"C", "D", "D", "D", "D", "D", "D", "D", "D", "E", "E", "E", "E", 

"E", "E", "E", "E", "C", "C", "C", "C", "C", "C", "C", "C", "D", 

"D", "D", "D", "D", "D", "D", "D", "E", "E", "E", "E", "E", "E", 

"E", "E", "C", "C", "C", "C", "C", "C", "C", "C", "D", "D", "D", 

"D", "D", "D", "D", "D", "E", "E", "E", "E", "E", "E", "E", "E", 

"C", "C", "C", "C", "C", "C", "C", "C", "D", "D", "D", "D", "D", 

"D", "D", "D", "C", "C", "C", "C", "C", "C", "C", "C", "D", "D", 

"D", "D", "D", "D", "D", "D", "C", "C", "C", "C", "C", "C", "C", 

"C", "D", "D", "D", "D", "D", "D", "D", "D"), Rep = c(1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 

2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 

2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 

2L, 1L, 2L, 1L, 2L, 1L), Score = c(92.6666666666667, 77.6666666666667, 

106.333333333333, 47.3333333333333, 70.3333333333333, 48.6666666666667, 

96.6666666666667, 51.6666666666667, 80.3333333333333, 55.3333333333333, 

55.6666666666667, 22.3333333333333, 71.6666666666667, 31.6666666666667, 

74.3333333333333, 15.6666666666667, 67.3333333333333, 48.6666666666667, 

54.6666666666667, 41.3333333333333, 74, 54, 97, 72.3333333333333, 

26.625, 7.5, 5.25, -1.5, 6, -33, -8.25000000000001, -32.25, 19.875, 

4.5, 9.75, 5.625, -16.5, -21, 18.375, -16.5, -16.875, -18, -18.375, 

0.375, 0, 0, -17.25, -18, 68.3333333333333, 51, 102, 85.3333333333333, 

64.3333333333333, 50.3333333333333, 93.3333333333333, 104.333333333333, 

27, 76, 50, 44, 81, 91, 59.3333333333333, 91.6666666666667, 80.6666666666667, 

32.6666666666667, 96, 67, 42.3333333333333, 49.3333333333333, 

71, 63.3333333333333, 59, 47.3333333333333, 70.3333333333333, 

67.3333333333333, 56.3333333333333, 60.3333333333333, 115.333333333333, 

112.333333333333, 36.3333333333333, 80.3333333333333, 40.3333333333333, 

97, 87, 100.333333333333, 61.6666666666667, 104, 71, 28.6666666666667, 

88, 29.6666666666667, 47.6666666666667, 25.6666666666667, 62.6666666666667, 

35.6666666666667, 109, 66.3333333333333, 112, 39.6666666666667, 

69.6666666666667, 98.3333333333333, 74, 40.6666666666667, 47.3333333333333, 

75.6666666666667, 43.6666666666667, 74.3333333333333, 43, 75, 

39, 73.6666666666667, 68.6666666666667, 36.3333333333333, 107.666666666667, 

41, 98.6666666666667, 65.6666666666667, 74.6666666666667, 75.6666666666667, 

68.3333333333333, 68.3333333333333, 41, 51, 100, 102, 78, 85.3333333333333, 

36.3333333333333, 27, 88, 76, 45.3333333333333, 50, 37, 44, 37.3333333333333, 

59, 34.3333333333333, 47.3333333333333, 72.3333333333333, 70.3333333333333, 

67.6666666666667, 67.3333333333333, 56, 36.3333333333333, 83.3333333333333, 

80.3333333333333, 61, 40.3333333333333, 78.6666666666667, 97, 

67.6666666666667, 109, 67.3333333333333, 66.3333333333333, 97, 

112, 70.6666666666667, 39.6666666666667, 40.3333333333333, 47.3333333333333, 

95, 75.6666666666667, 71.6666666666667, 43.6666666666667, 72.6666666666667, 

74.3333333333333)), class = "data.frame", row.names = c(NA, -168L

))

If I look at just the number of rows of the repeated conditions I see 24 observations:

Data %>% 

    filter(Rep == 2) %>%

    nrow()

[1] 24

If I look at just the first reps I see 144 observations:

Data %>% 

    filter(Rep == 1) %>%

    nrow()

[1] 144

If I try the _join functions from dplyr I get back more than just the matched cases; I get 48 rows returned:

Joined_Data <- right_join(Data %>% 

                 filter(Rep == 1) %>%

                 rename("Score_1" = Score) %>%

                 select(-Rep),

               Data %>% 

                 filter(Rep == 2) %>%

                 rename("Score_2" = Score) %>%

                 select(-Rep),

               by = c("Sub", "Sys", "Samp", "Cond")) 





nrow(Joined_Data)

[1] 48

This seems to be because each row is duplicated:

head(Joined_Data)

  Sub Sys Samp Cond   Score_1   Score_2

1   1   1   T1    C  68.33333  68.33333

2   1   1   T1    C  68.33333  68.33333

3   1   2   T1    C  51.00000  41.00000

4   1   2   T1    C  51.00000  41.00000

5   1   1   T2    C 102.00000 100.00000

6   1   1   T2    C 102.00000 100.00000

I can drop the duplicates by passing the tibble through distinct(), but the approach seems messy.

Passing the above through distinct() does give the desired output

Joined_Data <- right_join(Data %>% 

                     filter(Rep == 1) %>%

                     rename("Score_1" = Score) %>%

                     select(-Rep),

                   Data %>% 

                     filter(Rep == 2) %>%

                     rename("Score_2" = Score) %>%

                     select(-Rep),

                   by = c("Sub", "Sys", "Samp", "Cond")) %>%

                 distinct()



   Sub Sys Samp Cond   Score_1   Score_2

1    1   1   T1    C  68.33333  68.33333

2    1   2   T1    C  51.00000  41.00000

3    1   1   T2    C 102.00000 100.00000

4    1   2   T2    C  85.33333  78.00000

5    1   1   T1    D  27.00000  36.33333

6    1   2   T1    D  76.00000  88.00000

7    1   1   T2    D  50.00000  45.33333

8    1   2   T2    D  44.00000  37.00000

9    7   1   T1    C  59.00000  37.33333

10   7   2   T1    C  47.33333  34.33333

11   7   1   T2    C  70.33333  72.33333

12   7   2   T2    C  67.33333  67.66667

13   7   1   T1    D  36.33333  56.00000

14   7   2   T1    D  80.33333  83.33333

15   7   1   T2    D  40.33333  61.00000

16   7   2   T2    D  97.00000  78.66667

17  11   1   T1    C 109.00000  67.66667

18  11   2   T1    C  66.33333  67.33333

19  11   1   T2    C 112.00000  97.00000

20  11   2   T2    C  39.66667  70.66667

21  11   1   T1    D  47.33333  40.33333

22  11   2   T1    D  75.66667  95.00000

23  11   1   T2    D  43.66667  71.66667

24  11   2   T2    D  74.33333  72.66667

Then I can gather this tibble back into a long format:

Desired output:

Joined_Data %>%

    gather(Rep, Rating, Rating_1:Rating_2) %>%

      separate(Rep, c(NA, "Rep"), sep = "_", remove = T )



       Sub Sys Samp Cond Rep     Score

    1    1   1   T1    C   1  68.33333

    2    1   2   T1    C   1  51.00000

    3    1   1   T2    C   1 102.00000

    4    1   2   T2    C   1  85.33333

    5    1   1   T1    D   1  27.00000

    6    1   2   T1    D   1  76.00000

    7    1   1   T2    D   1  50.00000

    8    1   2   T2    D   1  44.00000

    9    7   1   T1    C   1  59.00000

    10   7   2   T1    C   1  47.33333

    11   7   1   T2    C   1  70.33333

    12   7   2   T2    C   1  67.33333

    13   7   1   T1    D   1  36.33333

    14   7   2   T1    D   1  80.33333

    15   7   1   T2    D   1  40.33333

    16   7   2   T2    D   1  97.00000

    17  11   1   T1    C   1 109.00000

    18  11   2   T1    C   1  66.33333

    19  11   1   T2    C   1 112.00000

    20  11   2   T2    C   1  39.66667

    21  11   1   T1    D   1  47.33333

    22  11   2   T1    D   1  75.66667

    23  11   1   T2    D   1  43.66667

    24  11   2   T2    D   1  74.33333

    25   1   1   T1    C   2  68.33333

    26   1   2   T1    C   2  41.00000

    27   1   1   T2    C   2 100.00000

    28   1   2   T2    C   2  78.00000

    29   1   1   T1    D   2  36.33333

    30   1   2   T1    D   2  88.00000

    31   1   1   T2    D   2  45.33333

    32   1   2   T2    D   2  37.00000

    33   7   1   T1    C   2  37.33333

    34   7   2   T1    C   2  34.33333

    35   7   1   T2    C   2  72.33333

    36   7   2   T2    C   2  67.66667

    37   7   1   T1    D   2  56.00000

    38   7   2   T1    D   2  83.33333

    39   7   1   T2    D   2  61.00000

    40   7   2   T2    D   2  78.66667

    41  11   1   T1    C   2  67.66667

    42  11   2   T1    C   2  67.33333

    43  11   1   T2    C   2  97.00000

    44  11   2   T2    C   2  70.66667

    45  11   1   T1    D   2  40.33333

    46  11   2   T1    D   2  95.00000

    47  11   1   T2    D   2  71.66667

    48  11   2   T2    D   2  72.66667

Is there a cleaner/better way to accomplish the above?

edited Nov 22 '18 at 20:53

asked Nov 21 '18 at 4:20

JLC

1699

Example data:

Data <- structure(list(Sub = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 

4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 

4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 

4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 

8L), .Label = c("1", "2", "4", "7", "8", "9", "10", "11", "12", 

"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"

), class = "factor"), Sys = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 

1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 

1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 

1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L

), Samp = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 1L, 1L, 

1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 

1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 

1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("T1", 

"T2", "T3", "T4"), class = "factor"), Cond = c("A", "A", "A", 

"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 

"A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", 

"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", 

"B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C", 

"C", "D", "D", "D", "D", "D", "D", "D", "D", "E", "E", "E", "E", 

"E", "E", "E", "E", "C", "C", "C", "C", "C", "C", "C", "C", "D", 

"D", "D", "D", "D", "D", "D", "D", "E", "E", "E", "E", "E", "E", 

"E", "E", "C", "C", "C", "C", "C", "C", "C", "C", "D", "D", "D", 

"D", "D", "D", "D", "D", "E", "E", "E", "E", "E", "E", "E", "E", 

"C", "C", "C", "C", "C", "C", "C", "C", "D", "D", "D", "D", "D", 

"D", "D", "D", "C", "C", "C", "C", "C", "C", "C", "C", "D", "D", 

"D", "D", "D", "D", "D", "D", "C", "C", "C", "C", "C", "C", "C", 

"C", "D", "D", "D", "D", "D", "D", "D", "D"), Rep = c(1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 

2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 

2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 

2L, 1L, 2L, 1L, 2L, 1L), Score = c(92.6666666666667, 77.6666666666667, 

106.333333333333, 47.3333333333333, 70.3333333333333, 48.6666666666667, 

96.6666666666667, 51.6666666666667, 80.3333333333333, 55.3333333333333, 

55.6666666666667, 22.3333333333333, 71.6666666666667, 31.6666666666667, 

74.3333333333333, 15.6666666666667, 67.3333333333333, 48.6666666666667, 

54.6666666666667, 41.3333333333333, 74, 54, 97, 72.3333333333333, 

26.625, 7.5, 5.25, -1.5, 6, -33, -8.25000000000001, -32.25, 19.875, 

4.5, 9.75, 5.625, -16.5, -21, 18.375, -16.5, -16.875, -18, -18.375, 

0.375, 0, 0, -17.25, -18, 68.3333333333333, 51, 102, 85.3333333333333, 

64.3333333333333, 50.3333333333333, 93.3333333333333, 104.333333333333, 

27, 76, 50, 44, 81, 91, 59.3333333333333, 91.6666666666667, 80.6666666666667, 

32.6666666666667, 96, 67, 42.3333333333333, 49.3333333333333, 

71, 63.3333333333333, 59, 47.3333333333333, 70.3333333333333, 

67.3333333333333, 56.3333333333333, 60.3333333333333, 115.333333333333, 

112.333333333333, 36.3333333333333, 80.3333333333333, 40.3333333333333, 

97, 87, 100.333333333333, 61.6666666666667, 104, 71, 28.6666666666667, 

88, 29.6666666666667, 47.6666666666667, 25.6666666666667, 62.6666666666667, 

35.6666666666667, 109, 66.3333333333333, 112, 39.6666666666667, 

69.6666666666667, 98.3333333333333, 74, 40.6666666666667, 47.3333333333333, 

75.6666666666667, 43.6666666666667, 74.3333333333333, 43, 75, 

39, 73.6666666666667, 68.6666666666667, 36.3333333333333, 107.666666666667, 

41, 98.6666666666667, 65.6666666666667, 74.6666666666667, 75.6666666666667, 

68.3333333333333, 68.3333333333333, 41, 51, 100, 102, 78, 85.3333333333333, 

36.3333333333333, 27, 88, 76, 45.3333333333333, 50, 37, 44, 37.3333333333333, 

59, 34.3333333333333, 47.3333333333333, 72.3333333333333, 70.3333333333333, 

67.6666666666667, 67.3333333333333, 56, 36.3333333333333, 83.3333333333333, 

80.3333333333333, 61, 40.3333333333333, 78.6666666666667, 97, 

67.6666666666667, 109, 67.3333333333333, 66.3333333333333, 97, 

112, 70.6666666666667, 39.6666666666667, 40.3333333333333, 47.3333333333333, 

95, 75.6666666666667, 71.6666666666667, 43.6666666666667, 72.6666666666667, 

74.3333333333333)), class = "data.frame", row.names = c(NA, -168L

))

If I look at just the number of rows of the repeated conditions I see 24 observations:

Data %>% 

    filter(Rep == 2) %>%

    nrow()

[1] 24

If I look at just the first reps I see 144 observations:

Data %>% 

    filter(Rep == 1) %>%

    nrow()

[1] 144

If I try the _join functions from dplyr I get back more than just the matched cases; I get 48 rows returned:

Joined_Data <- right_join(Data %>% 

                 filter(Rep == 1) %>%

                 rename("Score_1" = Score) %>%

                 select(-Rep),

               Data %>% 

                 filter(Rep == 2) %>%

                 rename("Score_2" = Score) %>%

                 select(-Rep),

               by = c("Sub", "Sys", "Samp", "Cond")) 





nrow(Joined_Data)

[1] 48

This seems to be because each row is duplicated:

head(Joined_Data)

  Sub Sys Samp Cond   Score_1   Score_2

1   1   1   T1    C  68.33333  68.33333

2   1   1   T1    C  68.33333  68.33333

3   1   2   T1    C  51.00000  41.00000

4   1   2   T1    C  51.00000  41.00000

5   1   1   T2    C 102.00000 100.00000

6   1   1   T2    C 102.00000 100.00000

I can drop the duplicates by passing the tibble through distinct(), but the approach seems messy.

Passing the above through distinct() does give the desired output

Joined_Data <- right_join(Data %>% 

                     filter(Rep == 1) %>%

                     rename("Score_1" = Score) %>%

                     select(-Rep),

                   Data %>% 

                     filter(Rep == 2) %>%

                     rename("Score_2" = Score) %>%

                     select(-Rep),

                   by = c("Sub", "Sys", "Samp", "Cond")) %>%

                 distinct()



   Sub Sys Samp Cond   Score_1   Score_2

1    1   1   T1    C  68.33333  68.33333

2    1   2   T1    C  51.00000  41.00000

3    1   1   T2    C 102.00000 100.00000

4    1   2   T2    C  85.33333  78.00000

5    1   1   T1    D  27.00000  36.33333

6    1   2   T1    D  76.00000  88.00000

7    1   1   T2    D  50.00000  45.33333

8    1   2   T2    D  44.00000  37.00000

9    7   1   T1    C  59.00000  37.33333

10   7   2   T1    C  47.33333  34.33333

11   7   1   T2    C  70.33333  72.33333

12   7   2   T2    C  67.33333  67.66667

13   7   1   T1    D  36.33333  56.00000

14   7   2   T1    D  80.33333  83.33333

15   7   1   T2    D  40.33333  61.00000

16   7   2   T2    D  97.00000  78.66667

17  11   1   T1    C 109.00000  67.66667

18  11   2   T1    C  66.33333  67.33333

19  11   1   T2    C 112.00000  97.00000

20  11   2   T2    C  39.66667  70.66667

21  11   1   T1    D  47.33333  40.33333

22  11   2   T1    D  75.66667  95.00000

23  11   1   T2    D  43.66667  71.66667

24  11   2   T2    D  74.33333  72.66667

Then I can gather this tibble back into a long format:

Desired output:

Joined_Data %>%

    gather(Rep, Rating, Rating_1:Rating_2) %>%

      separate(Rep, c(NA, "Rep"), sep = "_", remove = T )



       Sub Sys Samp Cond Rep     Score

    1    1   1   T1    C   1  68.33333

    2    1   2   T1    C   1  51.00000

    3    1   1   T2    C   1 102.00000

    4    1   2   T2    C   1  85.33333

    5    1   1   T1    D   1  27.00000

    6    1   2   T1    D   1  76.00000

    7    1   1   T2    D   1  50.00000

    8    1   2   T2    D   1  44.00000

    9    7   1   T1    C   1  59.00000

    10   7   2   T1    C   1  47.33333

    11   7   1   T2    C   1  70.33333

    12   7   2   T2    C   1  67.33333

    13   7   1   T1    D   1  36.33333

    14   7   2   T1    D   1  80.33333

    15   7   1   T2    D   1  40.33333

    16   7   2   T2    D   1  97.00000

    17  11   1   T1    C   1 109.00000

    18  11   2   T1    C   1  66.33333

    19  11   1   T2    C   1 112.00000

    20  11   2   T2    C   1  39.66667

    21  11   1   T1    D   1  47.33333

    22  11   2   T1    D   1  75.66667

    23  11   1   T2    D   1  43.66667

    24  11   2   T2    D   1  74.33333

    25   1   1   T1    C   2  68.33333

    26   1   2   T1    C   2  41.00000

    27   1   1   T2    C   2 100.00000

    28   1   2   T2    C   2  78.00000

    29   1   1   T1    D   2  36.33333

    30   1   2   T1    D   2  88.00000

    31   1   1   T2    D   2  45.33333

    32   1   2   T2    D   2  37.00000

    33   7   1   T1    C   2  37.33333

    34   7   2   T1    C   2  34.33333

    35   7   1   T2    C   2  72.33333

    36   7   2   T2    C   2  67.66667

    37   7   1   T1    D   2  56.00000

    38   7   2   T1    D   2  83.33333

    39   7   1   T2    D   2  61.00000

    40   7   2   T2    D   2  78.66667

    41  11   1   T1    C   2  67.66667

    42  11   2   T1    C   2  67.33333

    43  11   1   T2    C   2  97.00000

    44  11   2   T2    C   2  70.66667

    45  11   1   T1    D   2  40.33333

    46  11   2   T1    D   2  95.00000

    47  11   1   T2    D   2  71.66667

    48  11   2   T2    D   2  72.66667

Is there a cleaner/better way to accomplish the above?

r dplyr

edited Nov 22 '18 at 20:53

asked Nov 21 '18 at 4:20

JLC

1699

edited Nov 22 '18 at 20:53

asked Nov 21 '18 at 4:20

JLC

1699

edited Nov 22 '18 at 20:53

asked Nov 21 '18 at 4:20

JLC

1699

asked Nov 21 '18 at 4:20

JLC

1699

asked Nov 21 '18 at 4:20

JLC

1699

2

What is your expected output? If you want rows with only two repetitions doesn't Data %>% filter(Rep == 2) already give you what you want ?

– Ronak Shah
Nov 21 '18 at 4:48

your question is very confusion , please provide more information

– Hunaidkhan
Nov 21 '18 at 4:54

add a comment |

2

What is your expected output? If you want rows with only two repetitions doesn't Data %>% filter(Rep == 2) already give you what you want ?

– Ronak Shah
Nov 21 '18 at 4:48

your question is very confusion , please provide more information

– Hunaidkhan
Nov 21 '18 at 4:54

What is your expected output? If you want rows with only two repetitions doesn't Data %>% filter(Rep == 2) already give you what you want ?

– Ronak Shah
Nov 21 '18 at 4:48

your question is very confusion , please provide more information

– Hunaidkhan
Nov 21 '18 at 4:54

add a comment |

2 Answers
2

active

oldest

votes

If I got what you want correctly, I believe this will do the trick:

Data <- Data %>% unique()



Data %>% 

  group_by(Sub, Sys, Samp, Cond) %>%      # check the number of rows for each combination of Sub, Sys, Samp and Cond

  summarise(cnt = n()) %>% 

  filter(cnt > 1) %>%          # filter out groups with just one row and then remove the count column 

  select(-cnt) %>%         

  left_join(Data, by = c('Sub', 'Sys', 'Samp', 'Cond'))

edited Nov 21 '18 at 19:41

answered Nov 21 '18 at 9:16

DS_UNI

1,230512

The score for the first rep is missing with this approach.

– JLC
Nov 21 '18 at 14:09

I thought that was the goal to be honest, then I didn't quite get what you wanted. Let's say you have the case, which I'll add in the next comment, which rows do you want to get at the end?

– DS_UNI
Nov 21 '18 at 14:20

structure(list(Sub = structure(c(8L, 8L, 8L), .Label = c("1", "2", "4", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"), class = "factor"), Sys = c(1L, 1L, 1L), Samp = structure(c(2L, 2L, 2L), .Label = c("T1", "T2", "T3", "T4"), class = "factor"), Cond = c("D", "D", "D"), Rep = c(1L, 2L, 1L), Score = c(43.6666666666667, 71.6666666666667, 43.6666666666667)), class = "data.frame", row.names = c(NA, -3L), .Names = c("Sub", "Sys", "Samp", "Cond", "Rep", "Score" ))

– DS_UNI
Nov 21 '18 at 14:20

The final result in my original question is the desired output. I'm just wondering if there is a more efficient way to do it.

– JLC
Nov 21 '18 at 15:42

aha, ok! I edited my answer

– DS_UNI
Nov 21 '18 at 19:40

|
show 2 more comments

If I understood correctly, this is what you want.

NB: I'm going for the data.table approach, you could as well modify it for dplyr, though one simple merge for me seems cleaner than those long pipes :)

require(data.table); require(magrittr)

Data <- as.data.table(Data)



merge(x = Data[ Rep == 1, .(Sub, Sys, Samp, Cond, Score)] %>% .[!duplicated(.), ],

      y = Data[ Rep == 2, .(Sub, Sys, Samp, Cond, Score)],

      by = c('Sub', 'Sys', 'Samp', 'Cond'),

      all.y = T, suffixes = c('_1', '_2'))

answered Nov 22 '18 at 21:25

Nutle

313215

add a comment |

Your Answer

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53405204%2fhow-can-i-subset-a-data-frame-for-cases-matching-on-all-but-2-variables%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

2 Answers
2

active

oldest

votes

2 Answers
2

active

oldest

votes

If I got what you want correctly, I believe this will do the trick:

Data <- Data %>% unique()



Data %>% 

  group_by(Sub, Sys, Samp, Cond) %>%      # check the number of rows for each combination of Sub, Sys, Samp and Cond

  summarise(cnt = n()) %>% 

  filter(cnt > 1) %>%          # filter out groups with just one row and then remove the count column 

  select(-cnt) %>%         

  left_join(Data, by = c('Sub', 'Sys', 'Samp', 'Cond'))

edited Nov 21 '18 at 19:41

answered Nov 21 '18 at 9:16

DS_UNI

1,230512

The score for the first rep is missing with this approach.

– JLC
Nov 21 '18 at 14:09

I thought that was the goal to be honest, then I didn't quite get what you wanted. Let's say you have the case, which I'll add in the next comment, which rows do you want to get at the end?

– DS_UNI
Nov 21 '18 at 14:20

structure(list(Sub = structure(c(8L, 8L, 8L), .Label = c("1", "2", "4", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"), class = "factor"), Sys = c(1L, 1L, 1L), Samp = structure(c(2L, 2L, 2L), .Label = c("T1", "T2", "T3", "T4"), class = "factor"), Cond = c("D", "D", "D"), Rep = c(1L, 2L, 1L), Score = c(43.6666666666667, 71.6666666666667, 43.6666666666667)), class = "data.frame", row.names = c(NA, -3L), .Names = c("Sub", "Sys", "Samp", "Cond", "Rep", "Score" ))

– DS_UNI
Nov 21 '18 at 14:20

The final result in my original question is the desired output. I'm just wondering if there is a more efficient way to do it.

– JLC
Nov 21 '18 at 15:42

aha, ok! I edited my answer

– DS_UNI
Nov 21 '18 at 19:40

|
show 2 more comments

If I got what you want correctly, I believe this will do the trick:

Data <- Data %>% unique()



Data %>% 

  group_by(Sub, Sys, Samp, Cond) %>%      # check the number of rows for each combination of Sub, Sys, Samp and Cond

  summarise(cnt = n()) %>% 

  filter(cnt > 1) %>%          # filter out groups with just one row and then remove the count column 

  select(-cnt) %>%         

  left_join(Data, by = c('Sub', 'Sys', 'Samp', 'Cond'))

edited Nov 21 '18 at 19:41

answered Nov 21 '18 at 9:16

DS_UNI

1,230512

The score for the first rep is missing with this approach.

– JLC
Nov 21 '18 at 14:09

I thought that was the goal to be honest, then I didn't quite get what you wanted. Let's say you have the case, which I'll add in the next comment, which rows do you want to get at the end?

– DS_UNI
Nov 21 '18 at 14:20

structure(list(Sub = structure(c(8L, 8L, 8L), .Label = c("1", "2", "4", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"), class = "factor"), Sys = c(1L, 1L, 1L), Samp = structure(c(2L, 2L, 2L), .Label = c("T1", "T2", "T3", "T4"), class = "factor"), Cond = c("D", "D", "D"), Rep = c(1L, 2L, 1L), Score = c(43.6666666666667, 71.6666666666667, 43.6666666666667)), class = "data.frame", row.names = c(NA, -3L), .Names = c("Sub", "Sys", "Samp", "Cond", "Rep", "Score" ))

– DS_UNI
Nov 21 '18 at 14:20

The final result in my original question is the desired output. I'm just wondering if there is a more efficient way to do it.

– JLC
Nov 21 '18 at 15:42

aha, ok! I edited my answer

– DS_UNI
Nov 21 '18 at 19:40

|
show 2 more comments

If I got what you want correctly, I believe this will do the trick:

Data <- Data %>% unique()



Data %>% 

  group_by(Sub, Sys, Samp, Cond) %>%      # check the number of rows for each combination of Sub, Sys, Samp and Cond

  summarise(cnt = n()) %>% 

  filter(cnt > 1) %>%          # filter out groups with just one row and then remove the count column 

  select(-cnt) %>%         

  left_join(Data, by = c('Sub', 'Sys', 'Samp', 'Cond'))

edited Nov 21 '18 at 19:41

answered Nov 21 '18 at 9:16

DS_UNI

1,230512

If I got what you want correctly, I believe this will do the trick:

Data <- Data %>% unique()



Data %>% 

  group_by(Sub, Sys, Samp, Cond) %>%      # check the number of rows for each combination of Sub, Sys, Samp and Cond

  summarise(cnt = n()) %>% 

  filter(cnt > 1) %>%          # filter out groups with just one row and then remove the count column 

  select(-cnt) %>%         

  left_join(Data, by = c('Sub', 'Sys', 'Samp', 'Cond'))

edited Nov 21 '18 at 19:41

answered Nov 21 '18 at 9:16

DS_UNI

1,230512

edited Nov 21 '18 at 19:41

answered Nov 21 '18 at 9:16

DS_UNI

1,230512

answered Nov 21 '18 at 9:16

DS_UNI

1,230512

answered Nov 21 '18 at 9:16

DS_UNI

1,230512

The score for the first rep is missing with this approach.

– JLC
Nov 21 '18 at 14:09

I thought that was the goal to be honest, then I didn't quite get what you wanted. Let's say you have the case, which I'll add in the next comment, which rows do you want to get at the end?

– DS_UNI
Nov 21 '18 at 14:20

structure(list(Sub = structure(c(8L, 8L, 8L), .Label = c("1", "2", "4", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"), class = "factor"), Sys = c(1L, 1L, 1L), Samp = structure(c(2L, 2L, 2L), .Label = c("T1", "T2", "T3", "T4"), class = "factor"), Cond = c("D", "D", "D"), Rep = c(1L, 2L, 1L), Score = c(43.6666666666667, 71.6666666666667, 43.6666666666667)), class = "data.frame", row.names = c(NA, -3L), .Names = c("Sub", "Sys", "Samp", "Cond", "Rep", "Score" ))

– DS_UNI
Nov 21 '18 at 14:20

The final result in my original question is the desired output. I'm just wondering if there is a more efficient way to do it.

– JLC
Nov 21 '18 at 15:42

aha, ok! I edited my answer

– DS_UNI
Nov 21 '18 at 19:40

|
show 2 more comments

The score for the first rep is missing with this approach.

– JLC
Nov 21 '18 at 14:09

I thought that was the goal to be honest, then I didn't quite get what you wanted. Let's say you have the case, which I'll add in the next comment, which rows do you want to get at the end?

– DS_UNI
Nov 21 '18 at 14:20

structure(list(Sub = structure(c(8L, 8L, 8L), .Label = c("1", "2", "4", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"), class = "factor"), Sys = c(1L, 1L, 1L), Samp = structure(c(2L, 2L, 2L), .Label = c("T1", "T2", "T3", "T4"), class = "factor"), Cond = c("D", "D", "D"), Rep = c(1L, 2L, 1L), Score = c(43.6666666666667, 71.6666666666667, 43.6666666666667)), class = "data.frame", row.names = c(NA, -3L), .Names = c("Sub", "Sys", "Samp", "Cond", "Rep", "Score" ))

– DS_UNI
Nov 21 '18 at 14:20

The final result in my original question is the desired output. I'm just wondering if there is a more efficient way to do it.

– JLC
Nov 21 '18 at 15:42

aha, ok! I edited my answer

– DS_UNI
Nov 21 '18 at 19:40

The score for the first rep is missing with this approach.

– JLC
Nov 21 '18 at 14:09

I thought that was the goal to be honest, then I didn't quite get what you wanted. Let's say you have the case, which I'll add in the next comment, which rows do you want to get at the end?

– DS_UNI
Nov 21 '18 at 14:20

structure(list(Sub = structure(c(8L, 8L, 8L), .Label = c("1", "2", "4", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"), class = "factor"), Sys = c(1L, 1L, 1L), Samp = structure(c(2L, 2L, 2L), .Label = c("T1", "T2", "T3", "T4"), class = "factor"), Cond = c("D", "D", "D"), Rep = c(1L, 2L, 1L), Score = c(43.6666666666667, 71.6666666666667, 43.6666666666667)), class = "data.frame", row.names = c(NA, -3L), .Names = c("Sub", "Sys", "Samp", "Cond", "Rep", "Score" ))

– DS_UNI
Nov 21 '18 at 14:20

The final result in my original question is the desired output. I'm just wondering if there is a more efficient way to do it.

– JLC
Nov 21 '18 at 15:42

aha, ok! I edited my answer

– DS_UNI
Nov 21 '18 at 19:40

|
show 2 more comments

If I understood correctly, this is what you want.

NB: I'm going for the data.table approach, you could as well modify it for dplyr, though one simple merge for me seems cleaner than those long pipes :)

require(data.table); require(magrittr)

Data <- as.data.table(Data)



merge(x = Data[ Rep == 1, .(Sub, Sys, Samp, Cond, Score)] %>% .[!duplicated(.), ],

      y = Data[ Rep == 2, .(Sub, Sys, Samp, Cond, Score)],

      by = c('Sub', 'Sys', 'Samp', 'Cond'),

      all.y = T, suffixes = c('_1', '_2'))

answered Nov 22 '18 at 21:25

Nutle

313215

add a comment |

If I understood correctly, this is what you want.

NB: I'm going for the data.table approach, you could as well modify it for dplyr, though one simple merge for me seems cleaner than those long pipes :)

require(data.table); require(magrittr)

Data <- as.data.table(Data)



merge(x = Data[ Rep == 1, .(Sub, Sys, Samp, Cond, Score)] %>% .[!duplicated(.), ],

      y = Data[ Rep == 2, .(Sub, Sys, Samp, Cond, Score)],

      by = c('Sub', 'Sys', 'Samp', 'Cond'),

      all.y = T, suffixes = c('_1', '_2'))

answered Nov 22 '18 at 21:25

Nutle

313215

add a comment |

If I understood correctly, this is what you want.

NB: I'm going for the data.table approach, you could as well modify it for dplyr, though one simple merge for me seems cleaner than those long pipes :)

require(data.table); require(magrittr)

Data <- as.data.table(Data)



merge(x = Data[ Rep == 1, .(Sub, Sys, Samp, Cond, Score)] %>% .[!duplicated(.), ],

      y = Data[ Rep == 2, .(Sub, Sys, Samp, Cond, Score)],

      by = c('Sub', 'Sys', 'Samp', 'Cond'),

      all.y = T, suffixes = c('_1', '_2'))

answered Nov 22 '18 at 21:25

Nutle

313215

If I understood correctly, this is what you want.

NB: I'm going for the data.table approach, you could as well modify it for dplyr, though one simple merge for me seems cleaner than those long pipes :)

require(data.table); require(magrittr)

Data <- as.data.table(Data)



merge(x = Data[ Rep == 1, .(Sub, Sys, Samp, Cond, Score)] %>% .[!duplicated(.), ],

      y = Data[ Rep == 2, .(Sub, Sys, Samp, Cond, Score)],

      by = c('Sub', 'Sys', 'Samp', 'Cond'),

      all.y = T, suffixes = c('_1', '_2'))

answered Nov 22 '18 at 21:25

Nutle

313215

answered Nov 22 '18 at 21:25

Nutle

313215

answered Nov 22 '18 at 21:25

Nutle

313215

answered Nov 22 '18 at 21:25

Nutle

313215

add a comment |

draft saved

draft discarded

Thanks for contributing an answer to Stack Overflow!

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

To learn more, see our tips on writing great answers.

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

vj 0IzAvP,HiBP1NsJiBdX1WXI kP,4kzkFX,9iNoeP0RgfWXzXJY2gfGModJyVlOB8Qfr0yaWFkOiHRtv

搜尋此網誌

Cfrgtkky