7 Compare to data transfer of 2019 and preserve IDs
SOVON asks us to preserve IDs used in mapped data as sent in April 2019.
7.1 Read data
First, we need to read the data we sent them in April 2019.
crbirding_users_2019 <- read_csv(here::here("data",
"processed",
"crbirding_users_20190410.csv"),
col_types = cols(
.default = col_character(),
user_id = col_logical(),
user_reference = col_number()),
na = "")
crbirding_birds_2019 <- read_csv(here::here("data",
"processed",
"crbirding_birds_20190410.csv"),
col_types = cols(
.default = col_character(),
bird_id = col_logical(),
bird_reference = col_number(),
bird_bto = col_logical(),
bird_birth_year = col_logical(),
bird_date_begin = col_datetime(
format = "%d-%m-%Y"),
bird_date_end = col_datetime(
format = "%d-%m-%Y")),
na = "")
crbirding_observations_2019 <- read_csv(here::here(
"data",
"processed",
"crbirding_observations_20190410.csv"),
col_types = cols(
.default = col_character(),
user_id = col_logical(),
user_reference = col_number(),
bird_id = col_logical(),
bird_reference = col_number(),
observation_id = col_logical(),
observation_reference = col_number(),
observation_date = col_datetime(
format = "%d-%m-%Y"),
observation_time = col_logical(),
check_bird = col_logical(),
MRI = col_number(),
melder_ringersnummer = col_logical(),
condition = col_number()
),
na = "")
We read also the data we mapped just now.
crbirding_users <- read_csv(here::here("data",
"processed",
"crbirding_users.csv"),
col_types = cols(
.default = col_character(),
user_id = col_logical(),
user_reference = col_number()),
na = "")
crbirding_birds <- read_csv(here::here("data",
"processed",
"crbirding_birds.csv"),
col_types = cols(
.default = col_character(),
bird_id = col_logical(),
bird_reference = col_number(),
bird_bto = col_logical(),
bird_birth_year = col_logical(),
bird_date_begin = col_datetime(
format = ""),
bird_date_end = col_datetime(
format = "")),
na = "")
crbirding_observations <- read_csv(here::here(
"data",
"processed",
"crbirding_observations.csv"),
col_types = cols(
.default = col_character(),
user_id = col_logical(),
user_reference = col_number(),
bird_id = col_logical(),
bird_reference = col_number(),
observation_id = col_logical(),
observation_reference = col_number(),
observation_date = col_datetime(
format = ""),
observation_time = col_logical(),
check_bird = col_logical(),
MRI = col_number(),
melder_ringersnummer = col_logical(),
observation_condition = col_number()
),
na = "")
To improve the comparison of bird data, we read also raw observations:
obs_and_acts <- read_tsv(
here::here("data", "interim", "obs_and_actions.tsv"),
col_types = cols(
.default = col_character(),
sovon_bird_reference = col_double(),
Nummer = col_double(),
Datum = col_datetime(format = ""),
WaarnemerNummer = col_double(),
PlaatsLengtegraadGraden = col_double(),
PlaatsLengtegraadMinuten = col_double(),
PlaatsLengtegraadSeconden = col_double(),
PlaatsBreedtegraadGraden = col_double(),
PlaatsBreedtegraadMinuten = col_double(),
PlaatsBreedtegraadSeconden = col_double(),
BevestigingDatum = col_datetime(format = ""),
AanmaakDatum = col_datetime(format = ""),
WijzigDatum = col_datetime(format = "")
)
)
7.2 Users
Number of new users based on new user_reference
(email and other private info not shown):
crbirding_users %>%
filter(!user_reference %in% crbirding_users_2019$user_reference) %>%
select(user_reference, user_country, user_role)
## filter: removed 1,861 rows (91%), 178 rows remaining
## select: dropped 8 variables (user_id, user_email, user_first_name, user_last_name, user_address, …)
Old user_reference
IDs not anymore present (email and other private info not shown):
crbirding_users_2019 %>%
filter(!user_reference %in% crbirding_users$user_reference) %>%
select(user_reference, user_country, user_role)
## filter: removed 1,861 rows (>99%), one row remaining
## select: dropped 8 variables (user_id, user_email, user_first_name, user_last_name, user_address, …)
Users from 2019 where some changes have been applied:
same_ref_ids <-
crbirding_users %>%
filter(user_reference %in% crbirding_users_2019$user_reference) %>%
pull(user_reference)
## filter: removed 178 rows (9%), 1,861 rows remaining
ids_users_with_changes <-
crbirding_users_2019 %>%
filter(user_reference %in% same_ref_ids) %>%
anti_join(crbirding_users,
by = names(crbirding_users)) %>%
pull(user_reference)
## filter: removed one row (<1%), 1,861 rows remaining
## anti_join: added no columns
## > rows only in x 7
## > rows only in y ( 185)
## > matched rows (1,854)
## > =======
## > rows total 7
## [1] 880 529 1609 1913 712 1786 542
7.3 Map birds
Before mapping birds, we have to modify the color rings in crbirding_birds_2019
by applying the syntax discussed in #107 and #67. SOVON asks to follow this syntax: ring color + hyphen (-
) + inscription without inscription color, e.g. W-EAAM
instead of WN(EAAM)
.
First, check that all color rings from 2019 have two letters before bracket (
:
crbirding_birds_2019 %>%
filter(is.na(bird_shorthand) |
str_sub(bird_shorthand, start = 3, end = 3) == "(") %>%
nrow == nrow(crbirding_birds_2019)
## filter: no rows removed
## [1] TRUE
We proceed then to modify the color rings:
crbirding_birds_2019 <-
crbirding_birds_2019 %>%
mutate(bird_shorthand = if_else(
!is.na(bird_shorthand),
str_c(str_sub(bird_shorthand, start = 1, end = 1),
"-",
str_extract(bird_shorthand, pattern = "(?<=\\()(.*?)(?=\\))")),
bird_shorthand
))
## mutate: changed 10,126 values (93%) of 'bird_shorthand' (0 new NA)
Preview:
crbirding_birds_2019 %>%
select(bird_shorthand) %>%
filter(!is.na(bird_shorthand)) %>%
head(n = 100)
## select: dropped 16 variables (bird_id, bird_reference, bird_euring, bird_bto, bird_scheme, …)
## filter: removed 724 rows (7%), 10,126 rows remaining
Now we can proceed mapping birds data.
7.3.1 Map bird_reference
In INBO ring table, birds were uniquely identified by their very first ring (data were spread and untidy). We added a bird_reference
field to gather information about multiple rings and link them to the same bird. This means that this field cannot be used to compare the actual INBO data with the data of 2019. We have to compare data by the very first color ring.
As the dot for readibility is only preserved for the very last ring, it can be better to compare data by removing it from bird shorthand. We create an auxiliary column called inscription
for both crbirding_birds
and crbirding_birds_2019
:
crbirding_birds_2019 <-
crbirding_birds_2019 %>%
mutate(inscription = if_else(!is.na(bird_shorthand),
str_extract(
str_remove(bird_shorthand, "\\."),
"(?<=\\-)[A-Z0-9]*"),
NA_character_))
## mutate: new variable 'inscription' (character) with 10,014 unique values and 7% NA
crbirding_birds <-
crbirding_birds %>%
mutate(inscription = if_else(!is.na(bird_shorthand),
str_extract(
str_remove(bird_shorthand, "\\."),
"(?<=\\-)[A-Z0-9]*"),
NA_character_))
## mutate: new variable 'inscription' (character) with 11,486 unique values and 6% NA
We add the first color ring inscription as additional column, first_color_ring
:
crbirding_birds_first_color_ring <-
crbirding_birds %>%
select(bird_reference, inscription, bird_date_begin) %>%
group_by(bird_reference) %>%
filter(!is.na(inscription)) %>%
mutate(first_color_ring = if_else(
bird_date_begin == min(bird_date_begin, na.rm = TRUE),
inscription,
NA_character_)) %>%
ungroup() %>%
filter(!is.na(first_color_ring)) %>%
distinct(bird_reference, first_color_ring) %>%
right_join(crbirding_birds, by = c("bird_reference"))
## select: dropped 15 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## group_by: one grouping variable (bird_reference)
## filter (grouped): removed 762 rows (6%), 11,614 rows remaining
## mutate (grouped): new variable 'first_color_ring' (character) with 11,137 unique values and 4% NA
## ungroup: no grouping variables
## filter: removed 478 rows (4%), 11,136 rows remaining
## distinct: no rows removed
## right_join: added 17 columns (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## > rows only in x ( 0)
## > rows only in y 0
## > matched rows 12,376
## > ========
## > rows total 12,376
All birds should get a valid first_color_ring
column. Exceptions:
## filter: removed all rows (100%)
We do the same with ring data from 2019:
crbirding_birds_first_color_ring_2019 <-
crbirding_birds_2019 %>%
select(bird_reference, inscription, bird_date_begin) %>%
group_by(bird_reference) %>%
filter(!is.na(inscription)) %>%
mutate(first_color_ring = if_else(
bird_date_begin == min(bird_date_begin,na.rm = TRUE),
inscription,
NA_character_)) %>%
ungroup() %>%
filter(!is.na(first_color_ring)) %>%
distinct(bird_reference, first_color_ring) %>%
right_join(crbirding_birds_2019, by = c("bird_reference"))
## select: dropped 15 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## group_by: one grouping variable (bird_reference)
## filter (grouped): removed 724 rows (7%), 10,126 rows remaining
## mutate (grouped): new variable 'first_color_ring' (character) with 9,686 unique values and 4% NA
## ungroup: no grouping variables
## filter: removed 441 rows (4%), 9,685 rows remaining
## distinct: no rows removed
## right_join: added 17 columns (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## > rows only in x ( 0)
## > rows only in y 0
## > matched rows 10,850
## > ========
## > rows total 10,850
Even in this case all birds should get a valid first_color_ring
column. Exceptions:
## filter: removed all rows (100%)
Now we can map the new bird_reference
to the bird_reference
of 2019:
crbirding_birds_mapping_bird_reference <-
crbirding_birds_first_color_ring %>%
distinct(bird_reference, first_color_ring) %>%
left_join(crbirding_birds_first_color_ring_2019 %>%
distinct(bird_reference, first_color_ring) %>%
rename(bird_reference_2019 = bird_reference),
by = "first_color_ring") %>%
select(first_color_ring, everything())
## distinct: removed 1,240 rows (10%), 11,136 rows remaining
## distinct: removed 1,165 rows (11%), 9,685 rows remaining
## rename: renamed one variable (bird_reference_2019)
## left_join: added one column (bird_reference_2019)
## > rows only in x 1,504
## > rows only in y ( 53)
## > matched rows 9,632
## > ========
## > rows total 11,136
## select: columns reordered (first_color_ring, bird_reference, bird_reference_2019)
We add bird_reference_2019
to crbirding_birds
and crbirding_observations
:
crbirding_birds <-
crbirding_birds %>%
left_join((crbirding_birds_mapping_bird_reference %>%
select(bird_reference, bird_reference_2019)),
by = "bird_reference") %>%
select(bird_id, bird_reference, bird_reference_2019, everything())
## select: dropped one variable (first_color_ring)
## left_join: added one column (bird_reference_2019)
## > rows only in x 0
## > rows only in y ( 0)
## > matched rows 12,376
## > ========
## > rows total 12,376
## select: columns reordered (bird_id, bird_reference, bird_reference_2019, bird_euring, bird_bto, …)
and crbirding_observations
:
crbirding_observations <-
crbirding_observations %>%
left_join((crbirding_birds_mapping_bird_reference %>%
select(bird_reference, bird_reference_2019)),
by = "bird_reference")
## select: dropped one variable (first_color_ring)
## left_join: added one column (bird_reference_2019)
## > rows only in x 0
## > rows only in y ( 0)
## > matched rows 150,662
## > =========
## > rows total 150,662
Number of birds present in 2019 but with bird_reference
not equal to bird_reference_2019
:
crbirding_birds %>%
filter(!is.na(bird_reference_2019)) %>%
filter(bird_reference != bird_reference_2019) %>%
nrow
## filter: removed 1,538 rows (12%), 10,838 rows remaining
## filter: removed 1,530 rows (14%), 9,308 rows remaining
## [1] 9308
This is not surprising as bird_reference
has been defined as a progressive number, so it is sufficient the change of one bird reference to modify all the other references coming after.
First change appears at:
crbirding_birds %>%
filter(!is.na(bird_reference_2019)) %>%
filter(bird_reference != bird_reference_2019) %>%
filter(bird_reference == min(bird_reference)) %>%
select(bird_reference, bird_reference_2019, bird_shorthand)
## filter: removed 1,538 rows (12%), 10,838 rows remaining
## filter: removed 1,530 rows (14%), 9,308 rows remaining
## filter: removed 9,306 rows (>99%), 2 rows remaining
## select: dropped 16 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)
7.3.2 New bird_reference
Some birds were not present in data transfer of April 2019, so the bird_reference
cannot be mapped to bird_reference_2019
.
This case includes:
- New birds: birds ringed for the very first time after the previous data transfer in April 2019.
- Birds with modified first color ring: birds whose very first color ring has been corrected or added. This case is due to the fact that the first color ring is the real unique identifier in INBO ring table
tblKleurring
crbirding_birds %>%
anti_join(crbirding_birds_2019,
by = c("bird_reference_2019" = "bird_reference"))
## anti_join: added no columns
## > rows only in x 1,538
## > rows only in y ( 54)
## > matched rows (10,838)
## > ========
## > rows total 1,538
Based on column bird_date_begin
(see above) we can see that most but not all rings have been placed after 2019-04-10, date of previous export. It can also occur that there are old rings which have been added to database after that date, so they are actually new rings, database speaking. So, to check which rings have been entered before 2019-04-10 we use column AanmaakDatum
in raw observation data:
supected_new_birds_before_export_date <-
crbirding_birds %>%
filter(is.na(bird_reference_2019)) %>%
left_join(crbirding_birds_first_color_ring %>%
distinct(bird_reference, first_color_ring),
by = "bird_reference") %>%
select(bird_reference, first_color_ring) %>%
left_join(obs_and_acts,
by = c("first_color_ring" = "KleurringNummer")) %>%
select(bird_reference, first_color_ring, AanmaakDatum) %>%
group_by(bird_reference, first_color_ring) %>%
filter(AanmaakDatum == min(AanmaakDatum)) %>%
ungroup() %>%
filter(AanmaakDatum < as_datetime("2019-04-10")) %>%
left_join(crbirding_birds,
by = c("bird_reference"))
## filter: removed 10,838 rows (88%), 1,538 rows remaining
## distinct: removed 1,240 rows (10%), 11,136 rows remaining
## left_join: added one column (first_color_ring)
## > rows only in x 0
## > rows only in y (9,632)
## > matched rows 1,538
## > =======
## > rows total 1,538
## select: dropped 18 variables (bird_id, bird_reference_2019, bird_euring, bird_bto, bird_shorthand, …)
## left_join: added 41 columns (sovon_bird_reference, Nummer, Datum, EuringCode, LeeftijdCode, …)
## > rows only in x 4
## > rows only in y (145,639)
## > matched rows 5,245 (includes duplicates)
## > =========
## > rows total 5,249
## select: dropped 40 variables (sovon_bird_reference, Nummer, Datum, EuringCode, LeeftijdCode, …)
## group_by: 2 grouping variables (bird_reference, first_color_ring)
## filter (grouped): removed 3,633 rows (69%), 1,616 rows remaining
## ungroup: no grouping variables
## filter: removed 1,614 rows (>99%), 2 rows remaining
## left_join: added 18 columns (bird_id, bird_reference_2019, bird_euring, bird_bto, bird_shorthand, …)
## > rows only in x 0
## > rows only in y (12,374)
## > matched rows 2
## > ========
## > rows total 2
To better retrieve potential matches to birds from 2019’s export, we check birds from 2019 not included in actual data:
crbirding_birds_first_color_ring_2019 %>%
anti_join(crbirding_birds_first_color_ring,
by = "first_color_ring")
## anti_join: added no columns
## > rows only in x 54
## > rows only in y ( 1,538)
## > matched rows (10,796)
## > ========
## > rows total 54
As expected most of these rings are the so called virtual rings that we have decided to not include in new export (see #81-issuecomment-698964036).
If we exclude them from this overview, the following birds remain:
crbirding_birds_first_color_ring_2019 %>%
filter(str_sub(first_color_ring, start = 3, end = 3) != "B") %>%
anti_join(crbirding_birds_first_color_ring,
by = "first_color_ring")
## filter: removed 67 rows (1%), 10,783 rows remaining
## anti_join: added no columns
## > rows only in x 4
## > rows only in y ( 1,555)
## > matched rows (10,779)
## > ========
## > rows total 4
The ring ALOB
is not the first ring in the new data, but it replaces the ring LPAV
. We match it and assign bird_reference_2019
to this ring:
## filter: removed 12,375 rows (>99%), one row remaining
bird_reference_ALOB_2019 <-
crbirding_birds_2019 %>%
filter(inscription == "ALOB") %>%
pull(bird_reference)
## filter: removed 10,849 rows (>99%), one row remaining
crbirding_birds <-
crbirding_birds %>%
mutate(bird_reference_2019 = if_else(bird_reference == bird_reference_ALOB,
bird_reference_ALOB_2019,
bird_reference_2019))
## mutate: changed 2 values (<1%) of 'bird_reference_2019' (0 new NA)
crbirding_observations <-
crbirding_observations %>%
mutate(bird_reference_2019 = if_else(bird_reference == bird_reference_ALOB,
bird_reference_ALOB_2019,
bird_reference_2019))
## mutate: changed 30 values (<1%) of 'bird_reference_2019' (0 new NA)
The ring EAU
has been removed from the database at the preprocessing stage (see section about the removal of some rings).
Based on metal ring (bird_ring_number
) information, date of ringing (bird_date_begin
) and other fields we can see that the ring FMOV is the same as ring FMOU from 2019 export:
all(
crbirding_birds_2019 %>%
filter(bird_shorthand == "B-FM.OU") %>%
select(bird_date_begin, bird_ring_number, bird_age_ringing, bird_ring_position) %>%
pull() ==
(crbirding_birds %>%
filter(bird_shorthand == "B-FM.OV") %>%
select(bird_date_begin, bird_ring_number, bird_age_ringing, bird_ring_position) %>%
pull()))
## filter: removed 10,849 rows (>99%), one row remaining
## select: dropped 14 variables (bird_id, bird_reference, bird_euring, bird_bto, bird_shorthand, …)
## filter: removed 12,375 rows (>99%), one row remaining
## select: dropped 15 variables (bird_id, bird_reference, bird_reference_2019, bird_euring, bird_bto, …)
## [1] TRUE
So, we can assign bird_reference_2019
to this ring:
## filter: removed 12,375 rows (>99%), one row remaining
bird_reference_FMOU_2019 <-
crbirding_birds_2019 %>%
filter(inscription == "FMOU") %>%
pull(bird_reference)
## filter: removed 10,849 rows (>99%), one row remaining
crbirding_birds <-
crbirding_birds %>%
mutate(bird_reference_2019 = if_else(bird_reference == bird_reference_FMOV,
bird_reference_FMOU_2019,
bird_reference_2019))
## mutate: changed one value (<1%) of 'bird_reference_2019' (1 fewer NA)
crbirding_observations <-
crbirding_observations %>%
mutate(bird_reference_2019 = if_else(bird_reference == bird_reference_FMOV,
bird_reference_FMOU_2019,
bird_reference_2019))
## mutate: changed one value (<1%) of 'bird_reference_2019' (1 fewer NA)
Show changes in column bird_reference_2019
:
crbirding_birds %>%
filter(bird_reference %in% c(bird_reference_ALOB, bird_reference_FMOV)) %>%
select(bird_reference, bird_reference_2019, bird_shorthand)
## filter: removed 12,373 rows (>99%), 3 rows remaining
## select: dropped 16 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)
7.3.3 Use bird_reference
from 2019 as primary key
To ease the work of SOVON It team and as discussed in issue 128 we reuse the old bird references (bird_reference_2019
) as primary key in crbirding_birds
where present, being careful to change the foreign key in crbirding_observations
as well.
We do it by using a helping column, new_bird_reference
filled with old bird references where possible and with the new ones plus 20000 to ensure unicity:
crbirding_birds <-
crbirding_birds %>%
mutate(new_bird_reference = if_else(!is.na(bird_reference_2019),
bird_reference_2019,
bird_reference + 20000))
## mutate: new variable 'new_bird_reference' (double) with 11,136 unique values and 0% NA
crbirding_observations <-
crbirding_observations %>%
mutate(new_bird_reference = if_else(!is.na(bird_reference_2019),
bird_reference_2019,
bird_reference + 20000))
## mutate: new variable 'new_bird_reference' (double) with 11,136 unique values and 0% NA
Is new_bird_reference
unique for both birds and observations?
nrow(crbirding_birds %>%
distinct(bird_reference, new_bird_reference)) ==
nrow(crbirding_birds %>%
distinct(bird_reference))
## distinct: removed 1,240 rows (10%), 11,136 rows remaining
## distinct: removed 1,240 rows (10%), 11,136 rows remaining
## [1] TRUE
nrow(crbirding_observations %>%
distinct(bird_reference, new_bird_reference)) ==
nrow(crbirding_observations %>%
distinct(bird_reference))
## distinct: removed 139,526 rows (93%), 11,136 rows remaining
## distinct: removed 139,526 rows (93%), 11,136 rows remaining
## [1] TRUE
Again, double checking that new_bird_reference
is unique:
nrow(crbirding_observations %>%
distinct(bird_reference, new_bird_reference)) ==
nrow(crbirding_observations %>%
distinct(bird_reference))
## distinct: removed 139,526 rows (93%), 11,136 rows remaining
## distinct: removed 139,526 rows (93%), 11,136 rows remaining
## [1] TRUE
Final check: are all bird reference ids in new_bird_reference
in crbirding_observations
in crbirding_birds
as well?
crbirding_observations %>%
filter(!new_bird_reference %in% crbirding_birds$new_bird_reference) %>%
nrow() == 0
## filter: removed all rows (100%)
## [1] TRUE
If FALSE
, it means some observations have a broken link to birds and action is needed to solve this bug.
If TRUE
, we can safely copy the values of new_bird_reference
to bird_reference
and remove the help columns bird_reference_2019
and inscription
:
crbirding_birds <-
crbirding_birds %>%
select(-c(bird_reference, bird_reference_2019, inscription)) %>%
rename(bird_reference = new_bird_reference) %>%
# reorder columns
select(bird_id, bird_reference, everything())
## select: dropped 3 variables (bird_reference, bird_reference_2019, inscription)
## rename: renamed one variable (bird_reference)
## select: columns reordered (bird_id, bird_reference, bird_euring, bird_bto, bird_shorthand, …)
crbirding_observations <-
crbirding_observations %>%
select(-bird_reference) %>%
rename(bird_reference = new_bird_reference) %>%
# reorder columns
select(user_id,
user_reference,
bird_id,
bird_reference,
everything())
## select: dropped one variable (bird_reference)
## rename: renamed one variable (bird_reference)
## select: columns reordered (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## select: dropped one variable (inscription)
7.3.4 Changed bird_euring
crbirding_birds %>%
# remove new rings
filter(bird_reference - 20000 < 0) %>%
anti_join(crbirding_birds_2019,
by = c("bird_reference",
"bird_euring")) %>%
select(bird_reference, bird_euring) %>%
left_join(crbirding_birds_2019 %>%
rename(bird_euring_2019 = bird_euring) %>%
select(bird_reference, bird_euring_2019),
by = ("bird_reference"))
## filter: removed 1,537 rows (12%), 10,839 rows remaining
## anti_join: added no columns
## > rows only in x 9
## > rows only in y ( 61)
## > matched rows (10,830)
## > ========
## > rows total 9
## select: dropped 15 variables (bird_id, bird_bto, bird_shorthand, bird_scheme, bird_ring_number, …)
## rename: renamed one variable (bird_euring_2019)
## select: dropped 15 variables (bird_id, bird_bto, bird_shorthand, bird_scheme, bird_ring_number, …)
## left_join: added one column (bird_euring_2019)
## > rows only in x 0
## > rows only in y (10,841)
## > matched rows 9
## > ========
## > rows total 9
7.3.5 Changed bird_shorthand
Birds with changed ring history:
bird_ref_changed_shorthand <-
crbirding_birds %>%
# remove new rings
filter(bird_reference - 20000 < 0) %>%
select(bird_reference, bird_shorthand) %>%
anti_join(crbirding_birds_2019,
by = c("bird_reference", "bird_shorthand")) %>%
distinct(bird_reference)
## filter: removed 1,537 rows (12%), 10,839 rows remaining
## select: dropped 15 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)
## anti_join: added no columns
## > rows only in x 50
## > rows only in y ( 77)
## > matched rows (10,789)
## > ========
## > rows total 50
## distinct: removed 21 rows (42%), 29 rows remaining
bird_shorthand
history in crbirding_birds
:
crbirding_birds %>%
filter(bird_reference %in% bird_ref_changed_shorthand$bird_reference) %>%
select(bird_reference,
bird_shorthand,
bird_date_begin,
bird_date_end) %>%
arrange(bird_reference)
## filter: removed 12,314 rows (99%), 62 rows remaining
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)
Values of bird_shorthand
in crbirding_birds_2019
:
crbirding_birds_2019 %>%
filter(bird_reference %in% bird_ref_changed_shorthand$bird_reference) %>%
select(bird_reference,
bird_shorthand,
bird_date_begin,
bird_date_end) %>%
# add "_2019" to make sure we are speaking of 2019's export data
rename_all(paste0, "_2019") %>%
arrange(bird_reference_2019)
## filter: removed 10,813 rows (>99%), 37 rows remaining
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)
## rename_all: renamed 4 variables (bird_reference_2019, bird_shorthand_2019, bird_date_begin_2019, bird_date_end_2019)
Most of these changes are due to change the color rings.
7.3.6 Changed bird_scheme
Rings with changed bird_scheme
:
changed_bird_scheme <-
crbirding_birds %>%
# remove new rings
filter(bird_reference - 20000 < 0) %>%
anti_join(crbirding_birds_2019,
by = c("bird_reference", "bird_scheme")) %>%
select(bird_reference, bird_reference,
bird_scheme, bird_date_begin, bird_date_end) %>%
left_join(crbirding_birds_2019 %>%
rename(bird_scheme_2019 = bird_scheme) %>%
select(bird_reference,
bird_scheme_2019,
bird_date_begin,
bird_date_end),
by = (c("bird_reference", "bird_date_begin", "bird_date_end"))) %>%
filter(!is.na(bird_scheme_2019) | !is.na(bird_scheme)) %>%
select(bird_reference,
bird_scheme,
bird_scheme_2019,
everything())
## filter: removed 1,537 rows (12%), 10,839 rows remaining
## anti_join: added no columns
## > rows only in x 4,401
## > rows only in y (4,412)
## > matched rows (6,438)
## > =======
## > rows total 4,401
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_ring_number, …)
## rename: renamed one variable (bird_scheme_2019)
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_ring_number, …)
## left_join: added one column (bird_scheme_2019)
## > rows only in x 53
## > rows only in y (6,502)
## > matched rows 4,348
## > =======
## > rows total 4,401
## filter: removed 13 rows (<1%), 4,388 rows remaining
## select: columns reordered (bird_reference, bird_scheme, bird_scheme_2019, bird_date_begin, bird_date_end)
Mapping values:
## group_by: 2 grouping variables (bird_scheme, bird_scheme_2019)
## count: now 4 rows and 3 columns, 2 group variables remaining (bird_scheme, bird_scheme_2019)
7.3.7 Changed bird_ring_number
Rings with changed bird_ring_number
:
changed_bird_ring_number <-
crbirding_birds %>%
# remove new rings
filter(bird_reference - 20000 < 0) %>%
anti_join(crbirding_birds_2019,
by = c("bird_reference", "bird_ring_number")) %>%
select(bird_reference, bird_ring_number, bird_date_begin, bird_date_end) %>%
left_join(crbirding_birds_2019 %>%
rename(bird_ring_number_2019 = bird_ring_number) %>%
select(bird_reference,
bird_ring_number_2019,
bird_date_begin,
bird_date_end),
by = (c("bird_reference", "bird_date_begin", "bird_date_end"))) %>%
filter(!is.na(bird_ring_number_2019) | !is.na(bird_ring_number)) %>%
select(contains("reference"),
contains("number"),
contains("date"))
## filter: removed 1,537 rows (12%), 10,839 rows remaining
## anti_join: added no columns
## > rows only in x 110
## > rows only in y ( 145)
## > matched rows (10,729)
## > ========
## > rows total 110
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## rename: renamed one variable (bird_ring_number_2019)
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## left_join: added one column (bird_ring_number_2019)
## > rows only in x 25
## > rows only in y (10,765)
## > matched rows 85
## > ========
## > rows total 110
## filter: removed 15 rows (14%), 95 rows remaining
## select: columns reordered (bird_reference, bird_ring_number, bird_ring_number_2019, bird_date_begin, bird_date_end)
7.3.8 Changed bird_name
Rings with changed bird_name
:
changed_bird_name <-
crbirding_birds %>%
# remove new rings
filter(bird_reference - 20000 < 0) %>%
anti_join(crbirding_birds_2019,
by = c("bird_reference", "bird_name")) %>%
select(bird_reference,
bird_name,
bird_date_begin,
bird_date_end) %>%
left_join(crbirding_birds_2019 %>%
rename(bird_name_2019 = bird_name) %>%
select(bird_reference,
bird_name_2019,
bird_date_begin,
bird_date_end),
by = (c("bird_reference", "bird_date_begin", "bird_date_end"))) %>%
filter(!is.na(bird_name_2019) | !is.na(bird_name)) %>%
select(bird_reference,
contains("name"),
contains("date"))
## filter: removed 1,537 rows (12%), 10,839 rows remaining
## anti_join: added no columns
## > rows only in x 2
## > rows only in y ( 53)
## > matched rows (10,837)
## > ========
## > rows total 2
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## rename: renamed one variable (bird_name_2019)
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## left_join: added one column (bird_name_2019)
## > rows only in x 2
## > rows only in y (10,850)
## > matched rows 0
## > ========
## > rows total 2
## filter: no rows removed
## select: columns reordered (bird_reference, bird_name, bird_name_2019, bird_date_begin, bird_date_end)
7.3.9 Changed bird_sex
Rings with changed bird_sex
:
changed_bird_sex <-
crbirding_birds %>%
# remove new rings
filter(bird_reference - 20000 < 0) %>%
anti_join(crbirding_birds_2019,
by = c("bird_reference", "bird_sex")) %>%
select(bird_reference,
bird_sex,
bird_date_begin,
bird_date_end) %>%
left_join(crbirding_birds_2019 %>%
rename(bird_sex_2019 = bird_sex) %>%
select(bird_reference,
bird_sex_2019,
bird_date_begin,
bird_date_end),
by = (c("bird_reference", "bird_date_begin", "bird_date_end"))) %>%
filter(!is.na(bird_sex_2019) | !is.na(bird_sex)) %>%
select(contains("reference"),
contains("sex"),
contains("date"))
## filter: removed 1,537 rows (12%), 10,839 rows remaining
## anti_join: added no columns
## > rows only in x 55
## > rows only in y ( 105)
## > matched rows (10,784)
## > ========
## > rows total 55
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## rename: renamed one variable (bird_sex_2019)
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## left_join: added one column (bird_sex_2019)
## > rows only in x 4
## > rows only in y (10,799)
## > matched rows 51
## > ========
## > rows total 55
## filter: no rows removed
## select: columns reordered (bird_reference, bird_sex, bird_sex_2019, bird_date_begin, bird_date_end)
7.3.10 Changed bird_date_begin
Rings with changed bird_date_begin
excluding the birds with new rings:
same_rings <-
crbirding_birds %>%
# remove new rings
filter(bird_reference - 20000 < 0) %>%
select(bird_reference,
bird_shorthand,
bird_date_begin,
bird_date_end) %>%
inner_join(crbirding_birds_2019 %>%
select(bird_reference,
bird_date_begin_2019 = bird_date_begin,
bird_shorthand,
bird_date_end_2019 = bird_date_end),
by = c("bird_reference", "bird_shorthand"))
## filter: removed 1,537 rows (12%), 10,839 rows remaining
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)
## select: renamed 2 variables (bird_date_begin_2019, bird_date_end_2019) and dropped 13 variables
## inner_join: added 2 columns (bird_date_begin_2019, bird_date_end_2019)
## > rows only in x ( 50)
## > rows only in y ( 77)
## > matched rows 11,032 (includes duplicates)
## > ========
## > rows total 11,032
changed_bird_date_begin_end <-
same_rings %>%
group_by(bird_reference) %>%
filter(!bird_date_begin %in% bird_date_begin_2019 |
!bird_date_end %in% bird_date_end_2019) %>%
filter(!is.na(bird_date_begin) &
!is.na(bird_date_begin_2019) &
!is.na(bird_date_end) &
!is.na(bird_date_end_2019)) %>%
distinct(bird_reference)
## group_by: one grouping variable (bird_reference)
## filter (grouped): removed 10,954 rows (99%), 78 rows remaining
## filter (grouped): removed 77 rows (99%), one row remaining
## distinct (grouped): no rows removed
Actual color ring history:
changed_bird_date_begin_end %>%
left_join(crbirding_birds,
by = "bird_reference") %>%
select(bird_reference, bird_shorthand, bird_date_begin, bird_date_end)
## left_join: added 16 columns (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## > rows only in x 0
## > rows only in y (12,373)
## > matched rows 3 (includes duplicates)
## > ========
## > rows total 3
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)
Color ring history data from 2019:
changed_bird_date_begin_end %>%
left_join(crbirding_birds_2019,
by = "bird_reference") %>%
select(bird_reference, bird_shorthand, bird_date_begin, bird_date_end)
## left_join: added 16 columns (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## > rows only in x 0
## > rows only in y (10,846)
## > matched rows 4 (includes duplicates)
## > ========
## > rows total 4
## select: dropped 13 variables (bird_id, bird_euring, bird_bto, bird_scheme, bird_ring_number, …)
7.3.11 Save bird data with updated bird_reference
We save crbirding_birds
and crbirding_observations
with the updated unique identifiers in bird_reference
:
# save bird data
crbirding_birds <-
crbirding_birds %>%
mutate(
bird_date_begin = as.Date(bird_date_begin),
bird_date_end = as.Date(bird_date_end)) %>%
write_csv(path = here::here("data",
"processed",
"crbirding_birds.csv"),
na = ""
)
## mutate: converted 'bird_date_begin' from double to Date (0 new NA)
## converted 'bird_date_end' from double to Date (0 new NA)
7.4 Map observations
Number of new observations based on new observation_reference
(email and other private info not shown):
crbirding_observations %>%
filter(!observation_reference %in%
crbirding_observations_2019$observation_reference) %>%
nrow()
## filter: removed 134,917 rows (90%), 15,745 rows remaining
## [1] 15745
Old observations not anymore present (all columns info from 2019 data):
crbirding_observations_2019 %>%
filter(!observation_reference %in%
crbirding_observations$observation_reference) %>%
select(-contains("melder"))
## filter: removed 134,917 rows (>99%), 117 rows remaining
## select: dropped 3 variables (melder, melder_email, melder_ringersnummer)
Save in same_ref_ids
the observation reference of observations present in both data.frames:
same_ref_ids <-
crbirding_observations %>%
filter(observation_reference %in% crbirding_observations_2019$observation_reference) %>%
pull(observation_reference)
## filter: removed 15,745 rows (10%), 134,917 rows remaining
7.4.1 Changed geospatial information
Details of observations with changes in latitude (observation_lat
) and/or longitude (observation_lng
):
crbirding_observations_2019 %>%
filter(observation_reference %in% same_ref_ids) %>%
anti_join(crbirding_observations,
by = c("observation_reference",
"observation_lat",
"observation_lng")) %>%
select(observation_reference, observation_lat, observation_lng) %>%
rename(observation_lat_2019 = observation_lat,
observation_lng_2019 = observation_lng) %>%
left_join(crbirding_observations %>%
select(observation_reference,
observation_lat,
observation_lng),
by = "observation_reference") %>%
select(observation_reference,
observation_lat_2019,
observation_lat,
observation_lng_2019,
observation_lng)
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
## > rows only in x 11,216
## > rows only in y ( 26,961)
## > matched rows (123,701)
## > =========
## > rows total 11,216
## select: dropped 22 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed 2 variables (observation_lat_2019, observation_lng_2019)
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added 2 columns (observation_lat, observation_lng)
## > rows only in x 0
## > rows only in y (139,446)
## > matched rows 11,216
## > =========
## > rows total 11,216
## select: columns reordered (observation_reference, observation_lat_2019, observation_lat, observation_lng_2019, observation_lng)
Notice how these are observations where the longitude direction was not mapped properly during export of 2019 (see issue #105).
Details of observations with changes in location (column observation_location
):
crbirding_observations_2019 %>%
filter(observation_reference %in% same_ref_ids) %>%
anti_join(crbirding_observations,
by = c("observation_reference",
"observation_location")) %>%
select(observation_reference, observation_location) %>%
rename(observation_location_2019 = observation_location) %>%
left_join(crbirding_observations %>%
select(observation_reference, observation_location),
by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
## > rows only in x 147
## > rows only in y ( 15,892)
## > matched rows (134,770)
## > =========
## > rows total 147
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (observation_location_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (observation_location)
## > rows only in x 0
## > rows only in y (150,515)
## > matched rows 147
## > =========
## > rows total 147
7.4.2 Changed temporal information
Details of observations with changes in temporal information (column observation_date
):
crbirding_observations_2019 %>%
filter(observation_reference %in% same_ref_ids) %>%
anti_join(crbirding_observations,
by = c("observation_reference",
"observation_date")) %>%
select(observation_reference, observation_date) %>%
rename(observation_date_2019 = observation_date) %>%
left_join(crbirding_observations %>%
select(observation_reference, observation_date),
by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
## > rows only in x 5
## > rows only in y ( 15,750)
## > matched rows (134,912)
## > =========
## > rows total 5
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (observation_date_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (observation_date)
## > rows only in x 0
## > rows only in y (150,657)
## > matched rows 5
## > =========
## > rows total 5
7.4.3 Changed user information
Details of observations with changes in user detail(no details about users shown, just user_reference
):
crbirding_observations_2019 %>%
filter(observation_reference %in% same_ref_ids) %>%
anti_join(crbirding_observations,
by = c("observation_reference",
"user_reference",
"melder",
"melder_email")) %>%
select(observation_reference, user_reference) %>%
rename(user_reference_2019 = user_reference) %>%
left_join(crbirding_observations %>%
select(observation_reference, user_reference),
by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
## > rows only in x 71
## > rows only in y ( 15,816)
## > matched rows (134,846)
## > =========
## > rows total 71
## select: dropped 23 variables (user_id, bird_id, bird_reference, observation_id, observation_date, …)
## rename: renamed one variable (user_reference_2019)
## select: dropped 24 variables (user_id, bird_id, bird_reference, observation_id, observation_date, …)
## left_join: added one column (user_reference)
## > rows only in x 0
## > rows only in y (150,591)
## > matched rows 71
## > =========
## > rows total 71
Most user_reference
are the same as in 2019: it means that changes are in the columns hidden for privacy reasons. These user ids are indeed the ones stored above in variable ids_users_with_changes
in the Users section.
7.4.4 Changed metal ring information
Details of observations with changes in metal ring information (columns MRI
and ring_number
):
crbirding_observations_2019 %>%
filter(observation_reference %in% same_ref_ids) %>%
anti_join(crbirding_observations,
by = c("observation_reference",
"MRI",
"ring_number")) %>%
select(observation_reference, MRI, ring_number) %>%
rename(MRI_2019 = MRI,
ring_number_2019 = ring_number) %>%
left_join(crbirding_observations %>%
select(observation_reference, MRI, ring_number),
by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
## > rows only in x 5
## > rows only in y ( 15,750)
## > matched rows (134,912)
## > =========
## > rows total 5
## select: dropped 22 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed 2 variables (MRI_2019, ring_number_2019)
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added 2 columns (MRI, ring_number)
## > rows only in x 0
## > rows only in y (150,657)
## > matched rows 5
## > =========
## > rows total 5
7.4.5 Changed condition
First we rename column condition
of crbirding_observation_2019
to new column name observation_condition
:
crbirding_observations_2019 <-
crbirding_observations_2019 %>%
rename(observation_condition = condition)
## rename: renamed one variable (observation_condition)
Details of observations with changes in condition:
crbirding_observations_2019 %>%
filter(observation_reference %in% same_ref_ids) %>%
anti_join(crbirding_observations,
by = c("observation_reference",
"observation_condition")) %>%
select(observation_reference, observation_condition) %>%
rename(observation_condition_2019 = observation_condition) %>%
left_join(crbirding_observations %>%
select(observation_reference, observation_condition),
by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
## > rows only in x 134,917
## > rows only in y (150,662)
## > matched rows ( 0)
## > =========
## > rows total 134,917
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (observation_condition_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (observation_condition)
## > rows only in x 0
## > rows only in y ( 15,745)
## > matched rows 134,917
## > =========
## > rows total 134,917
A lot of changes expected as the mapping codes used for observation_condition changed.
7.4.6 Changed status
First we rename column status
of crbirding_observation_2019
to new column name status_full_grown_bird
:
crbirding_observations_2019 <-
crbirding_observations_2019 %>%
rename(status_full_grown_bird = status)
## rename: renamed one variable (status_full_grown_bird)
Details of observations with changes in status (column status_full_grown_bird
):
crbirding_observations_2019 %>%
filter(observation_reference %in% same_ref_ids) %>%
anti_join(crbirding_observations,
by = c("observation_reference",
"status_full_grown_bird")) %>%
select(observation_reference, status_full_grown_bird) %>%
rename(status_full_grown_bird_2019 = status_full_grown_bird) %>%
left_join(crbirding_observations %>%
select(observation_reference, status_full_grown_bird),
by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
## > rows only in x 21
## > rows only in y ( 15,766)
## > matched rows (134,896)
## > =========
## > rows total 21
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (status_full_grown_bird_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (status_full_grown_bird)
## > rows only in x 0
## > rows only in y (150,641)
## > matched rows 21
## > =========
## > rows total 21
7.4.7 Changed bird age
Details of observations with changes in bird age at time of observation (column bird_age_obs
):
crbirding_observations_2019 %>%
filter(observation_reference %in% same_ref_ids) %>%
anti_join(crbirding_observations,
by = c("observation_reference",
"bird_age_obs")) %>%
select(observation_reference, bird_age_obs) %>%
rename(bird_age_obs_2019 = bird_age_obs) %>%
left_join(crbirding_observations %>%
select(observation_reference, bird_age_obs),
by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
## > rows only in x 21
## > rows only in y ( 15,766)
## > matched rows (134,896)
## > =========
## > rows total 21
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (bird_age_obs_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (bird_age_obs)
## > rows only in x 0
## > rows only in y (150,641)
## > matched rows 21
## > =========
## > rows total 21
7.4.8 Changed bird sex
Details of observations with changes in bird sex at time of observation (column bird_sex
):
crbirding_observations_2019 %>%
filter(observation_reference %in% same_ref_ids) %>%
anti_join(crbirding_observations,
by = c("observation_reference",
"bird_sex")) %>%
select(observation_reference, bird_sex) %>%
rename(bird_sex_2019 = bird_sex) %>%
left_join(crbirding_observations %>%
select(observation_reference, bird_sex),
by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
## > rows only in x 0
## > rows only in y ( 15,745)
## > matched rows (134,917)
## > =========
## > rows total 0
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (bird_sex_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (bird_sex)
## > rows only in x 0
## > rows only in y (150,662)
## > matched rows 0
## > =========
## > rows total 0
7.4.9 Changed bird ring position
Details of observations with changes in bird ring position (column bird_ring_position
) or direction (bird_ring_direction
):
crbirding_observations_2019 %>%
filter(observation_reference %in% same_ref_ids) %>%
anti_join(crbirding_observations,
by = c("observation_reference",
"bird_ring_position",
"bird_ring_direction")) %>%
select(observation_reference,
bird_ring_position,
bird_ring_direction) %>%
rename(bird_ring_position_2019 = bird_ring_position,
bird_ring_direction_2019 = bird_ring_direction) %>%
left_join(crbirding_observations %>%
select(observation_reference,
bird_ring_position,
bird_ring_direction),
by = "observation_reference") %>%
select(observation_reference,
bird_ring_position_2019,
bird_ring_position,
bird_ring_direction_2019,
bird_ring_direction)
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
## > rows only in x 0
## > rows only in y ( 15,745)
## > matched rows (134,917)
## > =========
## > rows total 0
## select: dropped 22 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed 2 variables (bird_ring_position_2019, bird_ring_direction_2019)
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added 2 columns (bird_ring_position, bird_ring_direction)
## > rows only in x 0
## > rows only in y (150,662)
## > matched rows 0
## > =========
## > rows total 0
## select: columns reordered (observation_reference, bird_ring_position_2019, bird_ring_position, bird_ring_direction_2019, bird_ring_direction)
7.4.10 Changed notes
Details of observations with changes in observation notes (column observation_notes
):
crbirding_observations_2019 %>%
filter(observation_reference %in% same_ref_ids) %>%
anti_join(crbirding_observations,
by = c("observation_reference",
"observation_notes")) %>%
select(observation_reference, observation_notes) %>%
rename(observation_notes_2019 = observation_notes) %>%
left_join(crbirding_observations %>%
select(observation_reference, observation_notes),
by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
## > rows only in x 39,847
## > rows only in y (55,592)
## > matched rows (95,070)
## > ========
## > rows total 39,847
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (observation_notes_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (observation_notes)
## > rows only in x 0
## > rows only in y (110,815)
## > matched rows 39,847
## > =========
## > rows total 39,847
7.4.11 Changed capture information
Details of observations with changes in capture information (column observation_is_capture
):
crbirding_observations_2019 %>%
filter(observation_reference %in% same_ref_ids) %>%
anti_join(crbirding_observations,
by = c("observation_reference",
"observation_is_capture")) %>%
select(observation_reference, observation_is_capture) %>%
rename(observation_is_capture_2019 = observation_is_capture) %>%
left_join(crbirding_observations %>%
select(observation_reference, observation_is_capture),
by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
## > rows only in x 0
## > rows only in y ( 15,745)
## > matched rows (134,917)
## > =========
## > rows total 0
## select: dropped 23 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## rename: renamed one variable (observation_is_capture_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, bird_reference, observation_id, …)
## left_join: added one column (observation_is_capture)
## > rows only in x 0
## > rows only in y (150,662)
## > matched rows 0
## > =========
## > rows total 0
7.4.12 Changed bird reference
Details of observations with changes in bird reference (column bird_reference
):
crbirding_observations_2019 %>%
filter(observation_reference %in% same_ref_ids) %>%
anti_join(crbirding_observations,
by = c("bird_reference",
"observation_reference")) %>%
select(observation_reference, bird_reference) %>%
rename(bird_reference_2019 = bird_reference) %>%
left_join(crbirding_observations %>%
select(observation_reference, bird_reference),
by = "observation_reference")
## filter: removed 117 rows (<1%), 134,917 rows remaining
## anti_join: added no columns
## > rows only in x 33
## > rows only in y ( 15,778)
## > matched rows (134,884)
## > =========
## > rows total 33
## select: dropped 23 variables (user_id, user_reference, bird_id, observation_id, observation_date, …)
## rename: renamed one variable (bird_reference_2019)
## select: dropped 24 variables (user_id, user_reference, bird_id, observation_id, observation_date, …)
## left_join: added one column (bird_reference)
## > rows only in x 0
## > rows only in y (150,629)
## > matched rows 33
## > =========
## > rows total 33
The huge number of changes of bird_reference
has been discussed in previous session related to crbirding_birds
.
7.4.13 Save observation data with updated bird_reference
We save observation data, crbirding_observations
, with the updated bird unique identifiers, bird_reference
:
crbirding_observations <-
crbirding_observations %>%
mutate(
observation_date = as.Date(observation_date)) %>%
write_csv(path = here::here("data",
"processed",
"crbirding_observations.csv"),
na = ""
)
## mutate: converted 'observation_date' from double to Date (0 new NA)