7 Compare to data transfer of 2019 and preserve IDs

SOVON asks us to preserve IDs used in mapped data as sent in April 2019.

7.1 Read data

First, we need to read the data we sent them in April 2019.

crbirding_users_2019 <- read_csv(here::here("data",
                                  "processed",
                                  "crbirding_users_20190410.csv"),
                                 col_types = cols(
                                   .default = col_character(),
                                   user_id = col_logical(),
                                   user_reference = col_number()),
                       na = "")
crbirding_birds_2019 <- read_csv(here::here("data",
                                  "processed",
                                  "crbirding_birds_20190410.csv"),
                       col_types = cols(
                         .default = col_character(),
                         bird_id = col_logical(),
                         bird_reference = col_number(),
                         bird_bto = col_logical(),
                         bird_birth_year = col_logical(),
                         bird_date_begin = col_datetime(
                           format = "%d-%m-%Y"),
                         bird_date_end = col_datetime(
                           format = "%d-%m-%Y")),
                       na = "")
crbirding_observations_2019 <- read_csv(here::here(
  "data",
  "processed",
  "crbirding_observations_20190410.csv"),
  col_types = cols(
    .default = col_character(),
    user_id = col_logical(),
    user_reference = col_number(),
    bird_id = col_logical(),
    bird_reference = col_number(),
    observation_id = col_logical(),
    observation_reference = col_number(),
    observation_date = col_datetime(
      format = "%d-%m-%Y"),
    observation_time = col_logical(),
    check_bird = col_logical(),
    MRI = col_number(),
    melder_ringersnummer = col_logical(),
    condition = col_number()
  ),
  na = "")

We read also the data we mapped just now.

crbirding_users <- read_csv(here::here("data",
                                       "processed",
                                       "crbirding_users.csv"),
                            col_types = cols(
                              .default = col_character(),
                              user_id = col_logical(),
                              user_reference = col_number()),
                            na = "")
crbirding_birds <- read_csv(here::here("data",
                                       "processed",
                                       "crbirding_birds.csv"),
                            col_types = cols(
                              .default = col_character(),
                              bird_id = col_logical(),
                              bird_reference = col_number(),
                              bird_bto = col_logical(),
                              bird_birth_year = col_logical(),
                              bird_date_begin = col_datetime(
                                format = ""),
                              bird_date_end = col_datetime(
                                format = "")),
                            na = "")
crbirding_observations <- read_csv(here::here(
  "data",
  "processed",
  "crbirding_observations.csv"),
  col_types = cols(
    .default = col_character(),
    user_id = col_logical(),
    user_reference = col_number(),
    bird_id = col_logical(),
    bird_reference = col_number(),
    observation_id = col_logical(),
    observation_reference = col_number(),
    observation_date = col_datetime(
      format = ""),
    observation_time = col_logical(),
    check_bird = col_logical(),
    MRI = col_number(),
    melder_ringersnummer = col_logical(),
    observation_condition = col_number()
  ),
  na = "")

To improve the comparison of bird data, we read also raw observations:

obs_and_acts <- read_tsv(
  here::here("data", "interim", "obs_and_actions.tsv"),
  col_types = cols(
    .default = col_character(),
    sovon_bird_reference = col_double(),
    Nummer = col_double(),
    Datum = col_datetime(format = ""),
    WaarnemerNummer = col_double(),
    PlaatsLengtegraadGraden = col_double(),
    PlaatsLengtegraadMinuten = col_double(),
    PlaatsLengtegraadSeconden = col_double(),
    PlaatsBreedtegraadGraden = col_double(),
    PlaatsBreedtegraadMinuten = col_double(),
    PlaatsBreedtegraadSeconden = col_double(),
    BevestigingDatum = col_datetime(format = ""),
    AanmaakDatum = col_datetime(format = ""),
    WijzigDatum = col_datetime(format = "")
  )
)

7.2 Users

Number of new users based on new user_reference (email and other private info not shown):

crbirding_users %>%
  filter(!user_reference %in% crbirding_users_2019$user_reference) %>%
  select(user_reference, user_country, user_role)
## filter: removed 1,861 rows (91%), 178 rows remaining
## select: dropped 8 variables (user_id, user_email, user_first_name, user_last_name, user_address, …)

Old user_reference IDs not anymore present (email and other private info not shown):

crbirding_users_2019 %>%
  filter(!user_reference %in% crbirding_users$user_reference) %>%
  select(user_reference, user_country, user_role)
## filter: removed 1,861 rows (>99%), one row remaining
## select: dropped 8 variables (user_id, user_email, user_first_name, user_last_name, user_address, …)

Users from 2019 where some changes have been applied:

same_ref_ids <- 
  crbirding_users %>%
  filter(user_reference %in% crbirding_users_2019$user_reference) %>%
  pull(user_reference)
## filter: removed 178 rows (9%), 1,861 rows remaining
ids_users_with_changes <- 
  crbirding_users_2019 %>%
  filter(user_reference %in% same_ref_ids) %>%
  anti_join(crbirding_users,
            by = names(crbirding_users)) %>%
  pull(user_reference)
## filter: removed one row (<1%), 1,861 rows remaining
## anti_join: added no columns
##            > rows only in x       7
##            > rows only in y  (  185)
##            > matched rows    (1,854)
##            >                 =======
##            > rows total           7
ids_users_with_changes
## [1]  880  529 1609 1913  712 1786  542

7.3 Map birds

Before mapping birds, we have to modify the color rings in crbirding_birds_2019 by applying the syntax discussed in #107 and #67. SOVON asks to follow this syntax: ring color + hyphen (-) + inscription without inscription color, e.g. W-EAAM instead of WN(EAAM).

First, check that all color rings from 2019 have two letters before bracket (:

crbirding_birds_2019 %>%
  filter(is.na(bird_shorthand) | 
           str_sub(bird_shorthand, start = 3, end = 3) == "(") %>%
  nrow == nrow(crbirding_birds_2019)
## filter: no rows removed
## [1] TRUE

We proceed then to modify the color rings:

crbirding_birds_2019 <-
  crbirding_birds_2019 %>%
  mutate(bird_shorthand = if_else(
    !is.na(bird_shorthand),
    str_c(str_sub(bird_shorthand, start = 1, end = 1),
          "-",
          str_extract(bird_shorthand, pattern = "(?<=\\()(.*?)(?=\\))")),
    bird_shorthand
))
## mutate: changed 10,126 values (93%) of 'bird_shorthand' (0 new NA)

Preview:

crbirding_birds_2019 %>%
  select(bird_shorthand) %>%
  filter(!is.na(bird_shorthand)) %>%
  head(n = 100)
## select: dropped 16 variables (bird_id, bird_reference, bird_euring, bird_bto, bird_scheme, …)
## filter: removed 724 rows (7%), 10,126 rows remaining

Now we can proceed mapping birds data.

7.3.1 Map bird_reference

In INBO ring table, birds were uniquely identified by their very first ring (data were spread and untidy). We added a bird_reference field to gather information about multiple rings and link them to the same bird. This means that this field cannot be used to compare the actual INBO data with the data of 2019. We have to compare data by the very first color ring.

As the dot for readibility is only preserved for the very last ring, it can be better to compare data by removing it from bird shorthand. We create an auxiliary column called inscription for both crbirding_birds and crbirding_birds_2019:

crbirding_birds_2019 <- 
  crbirding_birds_2019 %>%
  mutate(inscription = if_else(!is.na(bird_shorthand),
                               str_extract(
                                str_remove(bird_shorthand, "\\."),
                                "(?<=\\-)[A-Z0-9]*"),
                              NA_character_))
## mutate: new variable 'inscription' (character) with 10,014 unique values and 7% NA
crbirding_birds <- 
  crbirding_birds %>%
  mutate(inscription = if_else(!is.na(bird_shorthand),
                               str_extract(
                                str_remove(bird_shorthand, "\\."),
                                "(?<=\\-)[A-Z0-9]*"),
                              NA_character_))
## mutate: new variable 'inscription' (character) with 11,486 unique values and 6% NA

We add the first color ring inscription as additional column, first_color_ring:

crbirding_birds_first_color_ring <-
  crbirding_birds %>% 
  select(bird_reference, inscription, bird_date_begin) %>%
  group_by(bird_reference) %>%
  filter(!is.na(inscription)) %>%
  mutate(first_color_ring = if_else(
    bird_date_begin == min(bird_date_begin, na.rm = TRUE),
    inscription,
    NA_character_)) %>%
  ungroup() %>%
  filter(!is.na(first_color_ring)) %>%
  distinct(bird_reference, first_color_ring) %>%
  right_join(crbirding_birds, by = c("bird_reference"))
## select: dropped 15 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## group_by: one grouping variable (bird_reference)
## filter (grouped): removed 762 rows (6%), 11,614 rows remaining
## mutate (grouped): new variable 'first_color_ring' (character) with 11,137 unique values and 4% NA
## ungroup: no grouping variables
## filter: removed 478 rows (4%), 11,136 rows remaining
## distinct: no rows removed
## right_join: added 17 columns (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
##             > rows only in x  (     0)
##             > rows only in y        0
##             > matched rows     12,376
##             >                 ========
##             > rows total       12,376

All birds should get a valid first_color_ring column. Exceptions:

crbirding_birds_first_color_ring %>%
  filter(is.na(first_color_ring))
## filter: removed all rows (100%)

We do the same with ring data from 2019:

crbirding_birds_first_color_ring_2019 <-
  crbirding_birds_2019 %>% 
  select(bird_reference, inscription, bird_date_begin) %>%
  group_by(bird_reference) %>%
  filter(!is.na(inscription)) %>%
  mutate(first_color_ring = if_else(
    bird_date_begin == min(bird_date_begin,na.rm = TRUE),
    inscription,
    NA_character_)) %>%
  ungroup() %>%
  filter(!is.na(first_color_ring)) %>%
  distinct(bird_reference, first_color_ring) %>%
  right_join(crbirding_birds_2019, by = c("bird_reference"))
## select: dropped 15 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## group_by: one grouping variable (bird_reference)
## filter (grouped): removed 724 rows (7%), 10,126 rows remaining
## mutate (grouped): new variable 'first_color_ring' (character) with 9,686 unique values and 4% NA
## ungroup: no grouping variables
## filter: removed 441 rows (4%), 9,685 rows remaining
## distinct: no rows removed
## right_join: added 17 columns (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
##             > rows only in x  (     0)
##             > rows only in y        0
##             > matched rows     10,850
##             >                 ========
##             > rows total       10,850

Even in this case all birds should get a valid first_color_ring column. Exceptions:

crbirding_birds_first_color_ring_2019 %>%
  filter(is.na(first_color_ring))
## filter: removed all rows (100%)

Now we can map the new bird_reference to the bird_reference of 2019:

crbirding_birds_mapping_bird_reference <-
  crbirding_birds_first_color_ring %>%
  distinct(bird_reference, first_color_ring) %>%
  left_join(crbirding_birds_first_color_ring_2019 %>%
              distinct(bird_reference, first_color_ring) %>%
              rename(bird_reference_2019 = bird_reference),
            by = "first_color_ring") %>%
  select(first_color_ring, everything())
## distinct: removed 1,240 rows (10%), 11,136 rows remaining
## distinct: removed 1,165 rows (11%), 9,685 rows remaining
## rename: renamed one variable (bird_reference_2019)
## left_join: added one column (bird_reference_2019)
##            > rows only in x    1,504
##            > rows only in y  (    53)
##            > matched rows      9,632
##            >                 ========
##            > rows total       11,136
## select: columns reordered (first_color_ring, bird_reference, bird_reference_2019)
head(crbirding_birds_mapping_bird_reference, n = 100)

We add bird_reference_2019 to crbirding_birds and crbirding_observations:

crbirding_birds <- 
  crbirding_birds %>%
  left_join((crbirding_birds_mapping_bird_reference %>%
              select(bird_reference, bird_reference_2019)),
            by = "bird_reference") %>%
  select(bird_id, bird_reference, bird_reference_2019, everything())
## select: dropped one variable (first_color_ring)
## left_join: added one column (bird_reference_2019)
##            > rows only in x        0
##            > rows only in y  (     0)
##            > matched rows     12,376
##            >                 ========
##            > rows total       12,376
## select: columns reordered (bird_id, bird_reference, bird_reference_2019, bird_euring, bird_bto, …)

and crbirding_observations:

crbirding_observations <-
  crbirding_observations %>%
  left_join((crbirding_birds_mapping_bird_reference %>%
              select(bird_reference, bird_reference_2019)),
            by = "bird_reference")
## select: dropped one variable (first_color_ring)
## left_join: added one column (bird_reference_2019)
##            > rows only in x         0
##            > rows only in y  (      0)
##            > matched rows     150,662
##            >                 =========
##            > rows total       150,662

Number of birds present in 2019 but with bird_reference not equal to bird_reference_2019 :

crbirding_birds %>%
  filter(!is.na(bird_reference_2019)) %>%
  filter(bird_reference != bird_reference_2019) %>%
  nrow
## filter: removed 1,538 rows (12%), 10,838 rows remaining
## filter: removed 1,530 rows (14%), 9,308 rows remaining
## [1] 9308

This is not surprising as bird_reference has been defined as a progressive number, so it is sufficient the change of one bird reference to modify all the other references coming after.

First change appears at:

crbirding_birds %>%
  filter(!is.na(bird_reference_2019)) %>%
  filter(bird_reference != bird_reference_2019) %>%
  filter(bird_reference == min(bird_reference)) %>%
  select(bird_reference, bird_reference_2019, bird_shorthand)
## filter: removed 1,538 rows (12%), 10,838 rows remaining
## filter: removed 1,530 rows (14%), 9,308 rows remaining
## filter: removed 9,306 rows (>99%), 2 rows remaining
## select: dropped 16 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)

7.3.2 New bird_reference

Some birds were not present in data transfer of April 2019, so the bird_reference cannot be mapped to bird_reference_2019.

This case includes:

  1. New birds: birds ringed for the very first time after the previous data transfer in April 2019.
  2. Birds with modified first color ring: birds whose very first color ring has been corrected or added. This case is due to the fact that the first color ring is the real unique identifier in INBO ring table tblKleurring
crbirding_birds %>%
  anti_join(crbirding_birds_2019,
            by = c("bird_reference_2019" = "bird_reference"))
## anti_join: added no columns
##            > rows only in x    1,538
##            > rows only in y  (    54)
##            > matched rows    (10,838)
##            >                 ========
##            > rows total        1,538

Based on column bird_date_begin (see above) we can see that most but not all rings have been placed after 2019-04-10, date of previous export. It can also occur that there are old rings which have been added to database after that date, so they are actually new rings, database speaking. So, to check which rings have been entered before 2019-04-10 we use column AanmaakDatum in raw observation data:

supected_new_birds_before_export_date <- 
  crbirding_birds %>%
  filter(is.na(bird_reference_2019)) %>%
  left_join(crbirding_birds_first_color_ring %>%
              distinct(bird_reference, first_color_ring),
            by = "bird_reference") %>%
  select(bird_reference, first_color_ring) %>%
  left_join(obs_and_acts,
            by = c("first_color_ring" = "KleurringNummer")) %>%
  select(bird_reference, first_color_ring, AanmaakDatum) %>%
  group_by(bird_reference, first_color_ring) %>%
  filter(AanmaakDatum == min(AanmaakDatum)) %>%
  ungroup() %>%
  filter(AanmaakDatum < as_datetime("2019-04-10")) %>%
  left_join(crbirding_birds,
            by = c("bird_reference"))
## filter: removed 10,838 rows (88%), 1,538 rows remaining
## distinct: removed 1,240 rows (10%), 11,136 rows remaining
## left_join: added one column (first_color_ring)
##            > rows only in x       0
##            > rows only in y  (9,632)
##            > matched rows     1,538
##            >                 =======
##            > rows total       1,538
## select: dropped 18 variables (bird_id, bird_reference_2019, bird_euring, bird_bto, bird_shorthand, …)
## left_join: added 41 columns (sovon_bird_reference, Nummer, Datum, EuringCode, LeeftijdCode, …)
##            > rows only in x         4
##            > rows only in y  (145,639)
##            > matched rows       5,245    (includes duplicates)
##            >                 =========
##            > rows total         5,249
## select: dropped 40 variables (sovon_bird_reference, Nummer, Datum, EuringCode, LeeftijdCode, …)
## group_by: 2 grouping variables (bird_reference, first_color_ring)
## filter (grouped): removed 3,633 rows (69%), 1,616 rows remaining
## ungroup: no grouping variables
## filter: removed 1,614 rows (>99%), 2 rows remaining
## left_join: added 18 columns (bird_id, bird_reference_2019, bird_euring, bird_bto, bird_shorthand, …)
##            > rows only in x        0
##            > rows only in y  (12,374)
##            > matched rows          2
##            >                 ========
##            > rows total            2
supected_new_birds_before_export_date

To better retrieve potential matches to birds from 2019’s export, we check birds from 2019 not included in actual data:

crbirding_birds_first_color_ring_2019 %>%
  anti_join(crbirding_birds_first_color_ring,
            by = "first_color_ring")
## anti_join: added no columns
##            > rows only in x       54
##            > rows only in y  ( 1,538)
##            > matched rows    (10,796)
##            >                 ========
##            > rows total           54

As expected most of these rings are the so called virtual rings that we have decided to not include in new export (see #81-issuecomment-698964036).

If we exclude them from this overview, the following birds remain:

crbirding_birds_first_color_ring_2019 %>%
  filter(str_sub(first_color_ring, start = 3, end = 3) != "B") %>%
  anti_join(crbirding_birds_first_color_ring,
            by = "first_color_ring")
## filter: removed 67 rows (1%), 10,783 rows remaining
## anti_join: added no columns
##            > rows only in x        4
##            > rows only in y  ( 1,555)
##            > matched rows    (10,779)
##            >                 ========
##            > rows total            4

The ring ALOB is not the first ring in the new data, but it replaces the ring LPAV. We match it and assign bird_reference_2019 to this ring:

bird_reference_ALOB <- 
  crbirding_birds %>%
  filter(inscription == "ALOB") %>%
  pull(bird_reference)
## filter: removed 12,375 rows (>99%), one row remaining
bird_reference_ALOB_2019 <-
  crbirding_birds_2019 %>%
  filter(inscription == "ALOB") %>%
  pull(bird_reference)
## filter: removed 10,849 rows (>99%), one row remaining
crbirding_birds <-
  crbirding_birds %>%
  mutate(bird_reference_2019 = if_else(bird_reference == bird_reference_ALOB,
                                       bird_reference_ALOB_2019,
                                       bird_reference_2019))
## mutate: changed 2 values (<1%) of 'bird_reference_2019' (0 new NA)
crbirding_observations <-
  crbirding_observations %>%
  mutate(bird_reference_2019 = if_else(bird_reference == bird_reference_ALOB,
                                       bird_reference_ALOB_2019,
                                       bird_reference_2019))
## mutate: changed 30 values (<1%) of 'bird_reference_2019' (0 new NA)

The ring EAU has been removed from the database at the preprocessing stage (see section about the removal of some rings).

Based on metal ring (bird_ring_number) information, date of ringing (bird_date_begin) and other fields we can see that the ring FMOV is the same as ring FMOU from 2019 export:

all(
  crbirding_birds_2019 %>%
    filter(bird_shorthand == "B-FM.OU") %>%
    select(bird_date_begin, bird_ring_number, bird_age_ringing, bird_ring_position) %>%
    pull() ==
    (crbirding_birds %>%
       filter(bird_shorthand == "B-FM.OV") %>%
       select(bird_date_begin, bird_ring_number, bird_age_ringing, bird_ring_position) %>%
       pull()))
## filter: removed 10,849 rows (>99%), one row remaining
## select: dropped 14 variables (bird_id, bird_reference, bird_euring, bird_bto, bird_shorthand, …)
## filter: removed 12,375 rows (>99%), one row remaining
## select: dropped 15 variables (bird_id, bird_reference, bird_reference_2019, bird_euring, bird_bto, …)
## [1] TRUE

So, we can assign bird_reference_2019 to this ring:

bird_reference_FMOV <- 
  crbirding_birds %>%
  filter(inscription == "FMOV") %>%
  pull(bird_reference)
## filter: removed 12,375 rows (>99%), one row remaining
bird_reference_FMOU_2019 <- 
  crbirding_birds_2019 %>%
  filter(inscription == "FMOU") %>%
  pull(bird_reference)
## filter: removed 10,849 rows (>99%), one row remaining
crbirding_birds <-
  crbirding_birds %>%
  mutate(bird_reference_2019 = if_else(bird_reference == bird_reference_FMOV,
                                       bird_reference_FMOU_2019,
                                       bird_reference_2019))
## mutate: changed one value (<1%) of 'bird_reference_2019' (1 fewer NA)
crbirding_observations <-
  crbirding_observations %>%
  mutate(bird_reference_2019 = if_else(bird_reference == bird_reference_FMOV,
                                       bird_reference_FMOU_2019,
                                       bird_reference_2019))
## mutate: changed one value (<1%) of 'bird_reference_2019' (1 fewer NA)

Show changes in column bird_reference_2019:

crbirding_birds %>%
  filter(bird_reference %in% c(bird_reference_ALOB, bird_reference_FMOV)) %>%
  select(bird_reference, bird_reference_2019, bird_shorthand)
## filter: removed 12,373 rows (>99%), 3 rows remaining
## select: dropped 16 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)

7.3.3 Use bird_reference from 2019 as primary key

To ease the work of SOVON It team and as discussed in issue 128 we reuse the old bird references (bird_reference_2019) as primary key in crbirding_birds where present, being careful to change the foreign key in crbirding_observations as well.

We do it by using a helping column, new_bird_reference filled with old bird references where possible and with the new ones plus 20000 to ensure unicity:

crbirding_birds <-
  crbirding_birds %>%
  mutate(new_bird_reference = if_else(!is.na(bird_reference_2019),
                                      bird_reference_2019,
                                      bird_reference + 20000))
## mutate: new variable 'new_bird_reference' (double) with 11,136 unique values and 0% NA
crbirding_observations <-
  crbirding_observations %>%
  mutate(new_bird_reference = if_else(!is.na(bird_reference_2019),
                                      bird_reference_2019,
                                      bird_reference + 20000))
## mutate: new variable 'new_bird_reference' (double) with 11,136 unique values and 0% NA

Is new_bird_reference unique for both birds and observations?

nrow(crbirding_birds %>%
  distinct(bird_reference, new_bird_reference)) == 
  nrow(crbirding_birds %>%
         distinct(bird_reference))
## distinct: removed 1,240 rows (10%), 11,136 rows remaining
## distinct: removed 1,240 rows (10%), 11,136 rows remaining
## [1] TRUE
nrow(crbirding_observations %>%
  distinct(bird_reference, new_bird_reference)) == 
  nrow(crbirding_observations %>%
         distinct(bird_reference))
## distinct: removed 139,526 rows (93%), 11,136 rows remaining
## distinct: removed 139,526 rows (93%), 11,136 rows remaining
## [1] TRUE

Again, double checking that new_bird_reference is unique:

nrow(crbirding_observations %>%
  distinct(bird_reference, new_bird_reference)) == 
  nrow(crbirding_observations %>%
         distinct(bird_reference))
## distinct: removed 139,526 rows (93%), 11,136 rows remaining
## distinct: removed 139,526 rows (93%), 11,136 rows remaining
## [1] TRUE

Final check: are all bird reference ids in new_bird_reference in crbirding_observations in crbirding_birds as well?

crbirding_observations %>%
  filter(!new_bird_reference %in% crbirding_birds$new_bird_reference) %>%
  nrow() == 0
## filter: removed all rows (100%)
## [1] TRUE

If FALSE, it means some observations have a broken link to birds and action is needed to solve this bug.

If TRUE, we can safely copy the values of new_bird_reference to bird_reference and remove the help columns bird_reference_2019 and inscription:

crbirding_birds <- 
  crbirding_birds %>%
  select(-c(bird_reference, bird_reference_2019, inscription)) %>%
  rename(bird_reference = new_bird_reference) %>%
  # reorder columns 
  select(bird_id, bird_reference, everything())
## select: dropped 3 variables (bird_reference, bird_reference_2019, inscription)
## rename: renamed one variable (bird_reference)
## select: columns reordered (bird_id, bird_reference, bird_euring, bird_bto, bird_shorthand, …)
crbirding_observations <- 
  crbirding_observations %>%
  select(-bird_reference) %>%
  rename(bird_reference = new_bird_reference) %>%
  # reorder columns 
  select(user_id,
         user_reference,
         bird_id,
         bird_reference,
         everything())
## select: dropped one variable (bird_reference)
## rename: renamed one variable (bird_reference)
## select: columns reordered (user_id, user_reference, bird_id, bird_reference, observation_id, …)
crbirding_birds_2019 <- 
  crbirding_birds_2019 %>%
  select(-c(inscription))
## select: dropped one variable (inscription)

7.3.4 Changed bird_euring

crbirding_birds %>%
  # remove new rings
  filter(bird_reference - 20000 < 0) %>%
  anti_join(crbirding_birds_2019,
            by = c("bird_reference",
                   "bird_euring")) %>%
  select(bird_reference, bird_euring) %>%
  left_join(crbirding_birds_2019 %>%
              rename(bird_euring_2019 = bird_euring) %>%
              select(bird_reference, bird_euring_2019),
            by = ("bird_reference"))
## filter: removed 1,537 rows (12%), 10,839 rows remaining
## anti_join: added no columns
##            > rows only in x        9
##            > rows only in y  (    61)
##            > matched rows    (10,830)
##            >                 ========
##            > rows total            9
## select: dropped 15 variables (bird_id, bird_bto, bird_shorthand, bird_scheme, bird_ring_number, …)
## rename: renamed one variable (bird_euring_2019)
## select: dropped 15 variables (bird_id, bird_bto, bird_shorthand, bird_scheme, bird_ring_number, …)
## left_join: added one column (bird_euring_2019)
##            > rows only in x        0
##            > rows only in y  (10,841)
##            > matched rows          9
##            >                 ========
##            > rows total            9

7.3.5 Changed bird_shorthand

Birds with changed ring history:

bird_ref_changed_shorthand <-
  crbirding_birds %>%
  # remove new rings
  filter(bird_reference - 20000 < 0) %>%
  select(bird_reference, bird_shorthand) %>%
  anti_join(crbirding_birds_2019,
            by = c("bird_reference", "bird_shorthand")) %>%
  distinct(bird_reference)
## filter: removed 1,537 rows (12%), 10,839 rows remaining
## select: dropped 15 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)
## anti_join: added no columns
##            > rows only in x       50
##            > rows only in y  (    77)
##            > matched rows    (10,789)
##            >                 ========
##            > rows total           50
## distinct: removed 21 rows (42%), 29 rows remaining
bird_ref_changed_shorthand

bird_shorthand history in crbirding_birds:

crbirding_birds %>%
  filter(bird_reference %in% bird_ref_changed_shorthand$bird_reference) %>%
  select(bird_reference,
         bird_shorthand,
         bird_date_begin,
         bird_date_end) %>%
  arrange(bird_reference)
## filter: removed 12,314 rows (99%), 62 rows remaining
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)

Values of bird_shorthand in crbirding_birds_2019:

crbirding_birds_2019 %>%
  filter(bird_reference %in% bird_ref_changed_shorthand$bird_reference) %>%
  select(bird_reference,
         bird_shorthand,
         bird_date_begin,
         bird_date_end) %>%
  # add "_2019" to make sure we are speaking of 2019's export data
  rename_all(paste0, "_2019") %>%
  arrange(bird_reference_2019)
## filter: removed 10,813 rows (>99%), 37 rows remaining
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)
## rename_all: renamed 4 variables (bird_reference_2019, bird_shorthand_2019, bird_date_begin_2019, bird_date_end_2019)

Most of these changes are due to change the color rings.

7.3.6 Changed bird_scheme

Rings with changed bird_scheme:

changed_bird_scheme <- 
  crbirding_birds %>%
  # remove new rings
  filter(bird_reference - 20000 < 0) %>%
  anti_join(crbirding_birds_2019,
            by = c("bird_reference", "bird_scheme")) %>%
  select(bird_reference, bird_reference,
         bird_scheme, bird_date_begin, bird_date_end) %>%
  left_join(crbirding_birds_2019 %>%
              rename(bird_scheme_2019 = bird_scheme) %>%
              select(bird_reference,
                     bird_scheme_2019,
                     bird_date_begin,
                     bird_date_end),
            by = (c("bird_reference", "bird_date_begin", "bird_date_end"))) %>%
  filter(!is.na(bird_scheme_2019) | !is.na(bird_scheme)) %>%
  select(bird_reference,
         bird_scheme,
         bird_scheme_2019,
         everything())
## filter: removed 1,537 rows (12%), 10,839 rows remaining
## anti_join: added no columns
##            > rows only in x   4,401
##            > rows only in y  (4,412)
##            > matched rows    (6,438)
##            >                 =======
##            > rows total       4,401
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_ring_number, …)
## rename: renamed one variable (bird_scheme_2019)
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_ring_number, …)
## left_join: added one column (bird_scheme_2019)
##            > rows only in x      53
##            > rows only in y  (6,502)
##            > matched rows     4,348
##            >                 =======
##            > rows total       4,401
## filter: removed 13 rows (<1%), 4,388 rows remaining
## select: columns reordered (bird_reference, bird_scheme, bird_scheme_2019, bird_date_begin, bird_date_end)
changed_bird_scheme

Mapping values:

changed_bird_scheme %>%
  group_by(bird_scheme, bird_scheme_2019) %>%
  count()
## group_by: 2 grouping variables (bird_scheme, bird_scheme_2019)
## count: now 4 rows and 3 columns, 2 group variables remaining (bird_scheme, bird_scheme_2019)

7.3.7 Changed bird_ring_number

Rings with changed bird_ring_number:

changed_bird_ring_number <- 
  crbirding_birds %>%
  # remove new rings
  filter(bird_reference - 20000 < 0) %>%
  anti_join(crbirding_birds_2019,
            by = c("bird_reference", "bird_ring_number")) %>%
  select(bird_reference, bird_ring_number, bird_date_begin, bird_date_end) %>%
  left_join(crbirding_birds_2019 %>%
              rename(bird_ring_number_2019 = bird_ring_number) %>%
              select(bird_reference,
                     bird_ring_number_2019,
                     bird_date_begin,
                     bird_date_end),
            by = (c("bird_reference", "bird_date_begin", "bird_date_end"))) %>%
  filter(!is.na(bird_ring_number_2019) | !is.na(bird_ring_number)) %>%
  select(contains("reference"),
         contains("number"),
         contains("date"))
## filter: removed 1,537 rows (12%), 10,839 rows remaining
## anti_join: added no columns
##            > rows only in x      110
##            > rows only in y  (   145)
##            > matched rows    (10,729)
##            >                 ========
##            > rows total          110
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## rename: renamed one variable (bird_ring_number_2019)
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## left_join: added one column (bird_ring_number_2019)
##            > rows only in x       25
##            > rows only in y  (10,765)
##            > matched rows         85
##            >                 ========
##            > rows total          110
## filter: removed 15 rows (14%), 95 rows remaining
## select: columns reordered (bird_reference, bird_ring_number, bird_ring_number_2019, bird_date_begin, bird_date_end)
changed_bird_ring_number

7.3.8 Changed bird_name

Rings with changed bird_name:

changed_bird_name <- 
  crbirding_birds %>%
  # remove new rings
  filter(bird_reference - 20000 < 0) %>%
  anti_join(crbirding_birds_2019,
            by = c("bird_reference", "bird_name")) %>%
  select(bird_reference, 
         bird_name, 
         bird_date_begin,
         bird_date_end) %>%
  left_join(crbirding_birds_2019 %>%
              rename(bird_name_2019 = bird_name) %>%
              select(bird_reference,
                     bird_name_2019,
                     bird_date_begin,
                     bird_date_end),
            by = (c("bird_reference", "bird_date_begin", "bird_date_end"))) %>%
  filter(!is.na(bird_name_2019) | !is.na(bird_name)) %>%
  select(bird_reference,
         contains("name"),
         contains("date"))
## filter: removed 1,537 rows (12%), 10,839 rows remaining
## anti_join: added no columns
##            > rows only in x        2
##            > rows only in y  (    53)
##            > matched rows    (10,837)
##            >                 ========
##            > rows total            2
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## rename: renamed one variable (bird_name_2019)
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## left_join: added one column (bird_name_2019)
##            > rows only in x        2
##            > rows only in y  (10,850)
##            > matched rows          0
##            >                 ========
##            > rows total            2
## filter: no rows removed
## select: columns reordered (bird_reference, bird_name, bird_name_2019, bird_date_begin, bird_date_end)
changed_bird_name

7.3.9 Changed bird_sex

Rings with changed bird_sex:

changed_bird_sex <- 
  crbirding_birds %>%
  # remove new rings
  filter(bird_reference - 20000 < 0) %>%
  anti_join(crbirding_birds_2019,
            by = c("bird_reference", "bird_sex")) %>%
  select(bird_reference, 
         bird_sex, 
         bird_date_begin,
         bird_date_end) %>%
  left_join(crbirding_birds_2019 %>%
              rename(bird_sex_2019 = bird_sex) %>%
              select(bird_reference,
                     bird_sex_2019,
                     bird_date_begin,
                     bird_date_end),
            by = (c("bird_reference", "bird_date_begin", "bird_date_end"))) %>%
  filter(!is.na(bird_sex_2019) | !is.na(bird_sex)) %>%
  select(contains("reference"),
         contains("sex"),
         contains("date"))
## filter: removed 1,537 rows (12%), 10,839 rows remaining
## anti_join: added no columns
##            > rows only in x       55
##            > rows only in y  (   105)
##            > matched rows    (10,784)
##            >                 ========
##            > rows total           55
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## rename: renamed one variable (bird_sex_2019)
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## left_join: added one column (bird_sex_2019)
##            > rows only in x        4
##            > rows only in y  (10,799)
##            > matched rows         51
##            >                 ========
##            > rows total           55
## filter: no rows removed
## select: columns reordered (bird_reference, bird_sex, bird_sex_2019, bird_date_begin, bird_date_end)
changed_bird_sex

7.3.10 Changed bird_date_begin

Rings with changed bird_date_begin excluding the birds with new rings:

same_rings <- 
  crbirding_birds %>%
  # remove new rings
  filter(bird_reference - 20000 < 0) %>%
  select(bird_reference,
         bird_shorthand,
         bird_date_begin,
         bird_date_end) %>%
  inner_join(crbirding_birds_2019 %>%
               select(bird_reference,
                      bird_date_begin_2019 = bird_date_begin,
                      bird_shorthand,
                      bird_date_end_2019 = bird_date_end),
             by = c("bird_reference", "bird_shorthand"))
## filter: removed 1,537 rows (12%), 10,839 rows remaining
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)
## select: renamed 2 variables (bird_date_begin_2019, bird_date_end_2019) and dropped 13 variables
## inner_join: added 2 columns (bird_date_begin_2019, bird_date_end_2019)
##             > rows only in x  (    50)
##             > rows only in y  (    77)
##             > matched rows     11,032    (includes duplicates)
##             >                 ========
##             > rows total       11,032
changed_bird_date_begin_end <-
  same_rings %>%
  group_by(bird_reference) %>%
  filter(!bird_date_begin %in% bird_date_begin_2019 | 
           !bird_date_end %in% bird_date_end_2019) %>%
  filter(!is.na(bird_date_begin) &
           !is.na(bird_date_begin_2019) &
           !is.na(bird_date_end) &
           !is.na(bird_date_end_2019)) %>%
  distinct(bird_reference)
## group_by: one grouping variable (bird_reference)
## filter (grouped): removed 10,954 rows (99%), 78 rows remaining
## filter (grouped): removed 77 rows (99%), one row remaining
## distinct (grouped): no rows removed
changed_bird_date_begin_end

Actual color ring history:

changed_bird_date_begin_end %>%
  left_join(crbirding_birds,
            by = "bird_reference") %>%
  select(bird_reference, bird_shorthand, bird_date_begin, bird_date_end)
## left_join: added 16 columns (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
##            > rows only in x        0
##            > rows only in y  (12,373)
##            > matched rows          3    (includes duplicates)
##            >                 ========
##            > rows total            3
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)

Color ring history data from 2019:

changed_bird_date_begin_end %>%
  left_join(crbirding_birds_2019,
            by = "bird_reference") %>%
  select(bird_reference, bird_shorthand, bird_date_begin, bird_date_end)
## left_join: added 16 columns (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
##            > rows only in x        0
##            > rows only in y  (10,846)
##            > matched rows          4    (includes duplicates)
##            >                 ========
##            > rows total            4
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)

7.3.11 Save bird data with updated bird_reference

We save crbirding_birds and crbirding_observations with the updated unique identifiers in bird_reference:

# save bird data
crbirding_birds <- 
  crbirding_birds %>%
  mutate(
    bird_date_begin = as.Date(bird_date_begin),
    bird_date_end = as.Date(bird_date_end)) %>%
  write_csv(path = here::here("data",
                              "processed",
                              "crbirding_birds.csv"),
  na = ""
)
## mutate: converted 'bird_date_begin' from double to Date (0 new NA)
##         converted 'bird_date_end' from double to Date (0 new NA)

7.4 Map observations

Number of new observations based on new observation_reference (email and other private info not shown):

crbirding_observations %>%
  filter(!observation_reference %in% 
           crbirding_observations_2019$observation_reference) %>%
  nrow()
## filter: removed 134,917 rows (90%), 15,745 rows remaining
## [1] 15745

Old observations not anymore present (all columns info from 2019 data):

crbirding_observations_2019 %>%
  filter(!observation_reference %in% 
           crbirding_observations$observation_reference) %>%
  select(-contains("melder"))
## filter: removed 134,917 rows (>99%), 117 rows remaining
## select: dropped 3 variables (melder, melder_email, melder_ringersnummer)

Save in same_ref_ids the observation reference of observations present in both data.frames:

same_ref_ids <- 
  crbirding_observations %>%
  filter(observation_reference %in% crbirding_observations_2019$observation_reference) %>%
  pull(observation_reference)
## filter: removed 15,745 rows (10%), 134,917 rows remaining

7.4.1 Changed geospatial information

Details of observations with changes in latitude (observation_lat) and/or longitude (observation_lng):

crbirding_observations_2019 %>%
  filter(observation_reference %in% same_ref_ids) %>%
  anti_join(crbirding_observations,
            by = c("observation_reference",
                   "observation_lat",
                   "observation_lng")) %>%
  select(observation_reference, observation_lat, observation_lng) %>%
  rename(observation_lat_2019 = observation_lat,
         observation_lng_2019 = observation_lng) %>%
  left_join(crbirding_observations %>%
              select(observation_reference,
                     observation_lat,
                     observation_lng),
            by = "observation_reference") %>%
  select(observation_reference,
         observation_lat_2019,
         observation_lat,
         observation_lng_2019,
         observation_lng)
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
##            > rows only in x    11,216
##            > rows only in y  ( 26,961)
##            > matched rows    (123,701)
##            >                 =========
##            > rows total        11,216
## select: dropped 22 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed 2 variables (observation_lat_2019, observation_lng_2019)
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added 2 columns (observation_lat, observation_lng)
##            > rows only in x         0
##            > rows only in y  (139,446)
##            > matched rows      11,216
##            >                 =========
##            > rows total        11,216
## select: columns reordered (observation_reference, observation_lat_2019, observation_lat, observation_lng_2019, observation_lng)

Notice how these are observations where the longitude direction was not mapped properly during export of 2019 (see issue #105).

Details of observations with changes in location (column observation_location):

crbirding_observations_2019 %>%
  filter(observation_reference %in% same_ref_ids) %>%
  anti_join(crbirding_observations,
            by = c("observation_reference",
                   "observation_location")) %>%
  select(observation_reference, observation_location) %>%
  rename(observation_location_2019 = observation_location) %>%
  left_join(crbirding_observations %>%
              select(observation_reference, observation_location),
            by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
##            > rows only in x       147
##            > rows only in y  ( 15,892)
##            > matched rows    (134,770)
##            >                 =========
##            > rows total           147
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (observation_location_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (observation_location)
##            > rows only in x         0
##            > rows only in y  (150,515)
##            > matched rows         147
##            >                 =========
##            > rows total           147

7.4.2 Changed temporal information

Details of observations with changes in temporal information (column observation_date):

crbirding_observations_2019 %>%
  filter(observation_reference %in% same_ref_ids) %>%
  anti_join(crbirding_observations,
            by = c("observation_reference",
                   "observation_date")) %>%
  select(observation_reference, observation_date) %>%
  rename(observation_date_2019 = observation_date) %>%
  left_join(crbirding_observations %>%
              select(observation_reference, observation_date),
            by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
##            > rows only in x         5
##            > rows only in y  ( 15,750)
##            > matched rows    (134,912)
##            >                 =========
##            > rows total             5
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (observation_date_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (observation_date)
##            > rows only in x         0
##            > rows only in y  (150,657)
##            > matched rows           5
##            >                 =========
##            > rows total             5

7.4.3 Changed user information

Details of observations with changes in user detail(no details about users shown, just user_reference):

crbirding_observations_2019 %>%
  filter(observation_reference %in% same_ref_ids) %>%
  anti_join(crbirding_observations,
            by = c("observation_reference",
                   "user_reference",
                   "melder",
                   "melder_email")) %>%
  select(observation_reference, user_reference) %>%
  rename(user_reference_2019 = user_reference) %>%
  left_join(crbirding_observations %>%
              select(observation_reference, user_reference),
            by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
##            > rows only in x        71
##            > rows only in y  ( 15,816)
##            > matched rows    (134,846)
##            >                 =========
##            > rows total            71
## select: dropped 23 variables (user_id, bird_id, bird_reference, observation_id, observation_date, …)
## rename: renamed one variable (user_reference_2019)
## select: dropped 24 variables (user_id, bird_id, bird_reference, observation_id, observation_date, …)
## left_join: added one column (user_reference)
##            > rows only in x         0
##            > rows only in y  (150,591)
##            > matched rows          71
##            >                 =========
##            > rows total            71

Most user_reference are the same as in 2019: it means that changes are in the columns hidden for privacy reasons. These user ids are indeed the ones stored above in variable ids_users_with_changes in the Users section.

7.4.4 Changed metal ring information

Details of observations with changes in metal ring information (columns MRI and ring_number):

crbirding_observations_2019 %>%
  filter(observation_reference %in% same_ref_ids) %>%
  anti_join(crbirding_observations,
            by = c("observation_reference",
                   "MRI",
                   "ring_number")) %>%
  select(observation_reference, MRI, ring_number) %>%
  rename(MRI_2019 = MRI,
         ring_number_2019 = ring_number) %>%
  left_join(crbirding_observations %>%
              select(observation_reference, MRI, ring_number),
            by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
##            > rows only in x         5
##            > rows only in y  ( 15,750)
##            > matched rows    (134,912)
##            >                 =========
##            > rows total             5
## select: dropped 22 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed 2 variables (MRI_2019, ring_number_2019)
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added 2 columns (MRI, ring_number)
##            > rows only in x         0
##            > rows only in y  (150,657)
##            > matched rows           5
##            >                 =========
##            > rows total             5

7.4.5 Changed condition

First we rename column condition of crbirding_observation_2019 to new column name observation_condition:

crbirding_observations_2019 <-
  crbirding_observations_2019 %>%
  rename(observation_condition = condition)
## rename: renamed one variable (observation_condition)

Details of observations with changes in condition:

crbirding_observations_2019 %>%
  filter(observation_reference %in% same_ref_ids) %>%
  anti_join(crbirding_observations,
            by = c("observation_reference",
                   "observation_condition")) %>%
  select(observation_reference, observation_condition) %>%
  rename(observation_condition_2019 = observation_condition) %>%
  left_join(crbirding_observations %>%
              select(observation_reference, observation_condition),
            by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
##            > rows only in x   134,917
##            > rows only in y  (150,662)
##            > matched rows    (      0)
##            >                 =========
##            > rows total       134,917
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (observation_condition_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (observation_condition)
##            > rows only in x         0
##            > rows only in y  ( 15,745)
##            > matched rows     134,917
##            >                 =========
##            > rows total       134,917

A lot of changes expected as the mapping codes used for observation_condition changed.

7.4.6 Changed status

First we rename column status of crbirding_observation_2019 to new column name status_full_grown_bird:

crbirding_observations_2019 <-
  crbirding_observations_2019 %>%
  rename(status_full_grown_bird = status)
## rename: renamed one variable (status_full_grown_bird)

Details of observations with changes in status (column status_full_grown_bird):

crbirding_observations_2019 %>%
  filter(observation_reference %in% same_ref_ids) %>%
  anti_join(crbirding_observations,
            by = c("observation_reference",
                   "status_full_grown_bird")) %>%
  select(observation_reference, status_full_grown_bird) %>%
  rename(status_full_grown_bird_2019 = status_full_grown_bird) %>%
  left_join(crbirding_observations %>%
              select(observation_reference, status_full_grown_bird),
            by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
##            > rows only in x        21
##            > rows only in y  ( 15,766)
##            > matched rows    (134,896)
##            >                 =========
##            > rows total            21
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (status_full_grown_bird_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (status_full_grown_bird)
##            > rows only in x         0
##            > rows only in y  (150,641)
##            > matched rows          21
##            >                 =========
##            > rows total            21

7.4.7 Changed bird age

Details of observations with changes in bird age at time of observation (column bird_age_obs):

crbirding_observations_2019 %>%
  filter(observation_reference %in% same_ref_ids) %>%
  anti_join(crbirding_observations,
            by = c("observation_reference",
                   "bird_age_obs")) %>%
  select(observation_reference, bird_age_obs) %>%
  rename(bird_age_obs_2019 = bird_age_obs) %>%
  left_join(crbirding_observations %>%
              select(observation_reference, bird_age_obs),
            by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
##            > rows only in x        21
##            > rows only in y  ( 15,766)
##            > matched rows    (134,896)
##            >                 =========
##            > rows total            21
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (bird_age_obs_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (bird_age_obs)
##            > rows only in x         0
##            > rows only in y  (150,641)
##            > matched rows          21
##            >                 =========
##            > rows total            21

7.4.8 Changed bird sex

Details of observations with changes in bird sex at time of observation (column bird_sex):

crbirding_observations_2019 %>%
  filter(observation_reference %in% same_ref_ids) %>%
  anti_join(crbirding_observations,
            by = c("observation_reference",
                   "bird_sex")) %>%
  select(observation_reference, bird_sex) %>%
  rename(bird_sex_2019 = bird_sex) %>%
  left_join(crbirding_observations %>%
              select(observation_reference, bird_sex),
            by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
##            > rows only in x         0
##            > rows only in y  ( 15,745)
##            > matched rows    (134,917)
##            >                 =========
##            > rows total             0
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (bird_sex_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (bird_sex)
##            > rows only in x         0
##            > rows only in y  (150,662)
##            > matched rows           0
##            >                 =========
##            > rows total             0

7.4.9 Changed bird ring position

Details of observations with changes in bird ring position (column bird_ring_position) or direction (bird_ring_direction):

crbirding_observations_2019 %>%
  filter(observation_reference %in% same_ref_ids) %>%
  anti_join(crbirding_observations,
            by = c("observation_reference", 
                   "bird_ring_position",
                   "bird_ring_direction")) %>%
  select(observation_reference,
         bird_ring_position, 
         bird_ring_direction) %>%
  rename(bird_ring_position_2019 = bird_ring_position,
         bird_ring_direction_2019 = bird_ring_direction) %>%
  left_join(crbirding_observations %>%
              select(observation_reference,
                     bird_ring_position,
                     bird_ring_direction),
            by = "observation_reference") %>%
  select(observation_reference,
         bird_ring_position_2019,
         bird_ring_position,
         bird_ring_direction_2019,
         bird_ring_direction)
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
##            > rows only in x         0
##            > rows only in y  ( 15,745)
##            > matched rows    (134,917)
##            >                 =========
##            > rows total             0
## select: dropped 22 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed 2 variables (bird_ring_position_2019, bird_ring_direction_2019)
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added 2 columns (bird_ring_position, bird_ring_direction)
##            > rows only in x         0
##            > rows only in y  (150,662)
##            > matched rows           0
##            >                 =========
##            > rows total             0
## select: columns reordered (observation_reference, bird_ring_position_2019, bird_ring_position, bird_ring_direction_2019, bird_ring_direction)

7.4.10 Changed notes

Details of observations with changes in observation notes (column observation_notes):

crbirding_observations_2019 %>%
  filter(observation_reference %in% same_ref_ids) %>%
  anti_join(crbirding_observations,
            by = c("observation_reference",
                   "observation_notes")) %>%
  select(observation_reference, observation_notes) %>%
  rename(observation_notes_2019 = observation_notes) %>%
  left_join(crbirding_observations %>%
              select(observation_reference, observation_notes),
            by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
##            > rows only in x   39,847
##            > rows only in y  (55,592)
##            > matched rows    (95,070)
##            >                 ========
##            > rows total       39,847
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (observation_notes_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (observation_notes)
##            > rows only in x         0
##            > rows only in y  (110,815)
##            > matched rows      39,847
##            >                 =========
##            > rows total        39,847

7.4.11 Changed capture information

Details of observations with changes in capture information (column observation_is_capture):

crbirding_observations_2019 %>%
  filter(observation_reference %in% same_ref_ids) %>%
  anti_join(crbirding_observations,
            by = c("observation_reference",
                   "observation_is_capture")) %>%
  select(observation_reference, observation_is_capture) %>%
  rename(observation_is_capture_2019 = observation_is_capture) %>%
  left_join(crbirding_observations %>%
              select(observation_reference, observation_is_capture),
            by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
##            > rows only in x         0
##            > rows only in y  ( 15,745)
##            > matched rows    (134,917)
##            >                 =========
##            > rows total             0
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (observation_is_capture_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (observation_is_capture)
##            > rows only in x         0
##            > rows only in y  (150,662)
##            > matched rows           0
##            >                 =========
##            > rows total             0

7.4.12 Changed bird reference

Details of observations with changes in bird reference (column bird_reference):

crbirding_observations_2019 %>%
  filter(observation_reference %in% same_ref_ids) %>%
  anti_join(crbirding_observations,
            by = c("bird_reference",
                   "observation_reference")) %>%
  select(observation_reference, bird_reference) %>%
  rename(bird_reference_2019 = bird_reference) %>%
  left_join(crbirding_observations %>%
              select(observation_reference, bird_reference),
            by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
##            > rows only in x        33
##            > rows only in y  ( 15,778)
##            > matched rows    (134,884)
##            >                 =========
##            > rows total            33
## select: dropped 23 variables (user_id, user_reference, bird_id, observation_id, observation_date, …)
## rename: renamed one variable (bird_reference_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, observation_id, observation_date, …)
## left_join: added one column (bird_reference)
##            > rows only in x         0
##            > rows only in y  (150,629)
##            > matched rows          33
##            >                 =========
##            > rows total            33

The huge number of changes of bird_reference has been discussed in previous session related to crbirding_birds.

7.4.13 Save observation data with updated bird_reference

We save observation data, crbirding_observations, with the updated bird unique identifiers, bird_reference:

crbirding_observations <- 
  crbirding_observations %>%
  mutate(
    observation_date = as.Date(observation_date)) %>%
  write_csv(path = here::here("data",
                              "processed",
                              "crbirding_observations.csv"),
  na = ""
)
## mutate: converted 'observation_date' from double to Date (0 new NA)