6 Map observation data
6.1 Read data
6.1.1 Read temporary observation data
We start from the temporary observation data saved as TSV file in data\interim
:
obs_and_acts <- read_tsv(
here::here("data", "interim", "obs_and_actions.tsv"),
col_types = cols(
.default = col_character(),
sovon_bird_reference = col_double(),
Nummer = col_double(),
Datum = col_datetime(format = ""),
WaarnemerNummer = col_double(),
PlaatsLengtegraadGraden = col_double(),
PlaatsLengtegraadMinuten = col_double(),
PlaatsLengtegraadSeconden = col_double(),
PlaatsBreedtegraadGraden = col_double(),
PlaatsBreedtegraadMinuten = col_double(),
PlaatsBreedtegraadSeconden = col_double(),
BevestigingDatum = col_datetime(format = ""),
AanmaakDatum = col_datetime(format = ""),
WijzigDatum = col_datetime(format = "")
)
)
6.1.2 Read action data
Import action codes and meaning:
## Parsed with column specification:
## cols(
## Code = col_character(),
## Beschrijving = col_character(),
## BeschrijvingUK = col_character(),
## CodeExtern = col_character(),
## CodeExternUK = col_character(),
## Kolom = col_character()
## )
6.1.3 Read processed ring data
We import finalized ring data from ./data/processed
:
crbirding_birds <- read_csv(
here::here("data", "processed", "crbirding_birds.csv"),
col_types = cols(
.default = col_character(),
bird_id = col_logical(),
bird_reference = col_double(),
bird_bto = col_logical(),
bird_birth_year = col_logical(),
bird_date_begin = col_datetime(format = ""),
bird_date_end = col_datetime(format = "")
)
)
6.1.4 Read temporary ring data
We will need some columns from original INBO ring data. We import the temporary ring data as well:
6.1.5 Read ring position data
We import mapping of ring position and inscription reading direction:
6.1.6 Read processed user data
We import finalized user data from ./data/processed
:
6.2 Map color observation data
6.2.1 Extract action codes
Actions present in obs_and_acts
:
## [1] "rngkl" "rngme" "klgev" "br" "vang" "dood" "klweg" "veld" "meweg"
## [10] "me" "vangl" "ziek"
Combinations of actions present in observations:
## select: dropped 30 variables (sovon_bird_reference, Nummer, Datum, EuringCode, LeeftijdCode, …)
## distinct: removed 150,632 rows (>99%), 30 rows remaining
6.2.2 Observation ID
The field sovon_observation_id
is left to SOVON:
## mutate: new variable 'sovon_observation_id' (logical) with one unique value and 100% NA
6.2.3 Observation reference
The observation reference is an unique identifier assigned to each observation. This field exists already: Nummer
.
## mutate: new variable 'sovon_observation_reference' (double) with 150,662 unique values and 0% NA
6.2.4 Observation date
The date is saved in column Datum
. We copy it in required column sovon_observation_date
:
## mutate: new variable 'sovon_observation_date' (double) with 7,121 unique values and 0% NA
6.2.5 Observation time
There is no observation time in obs_and_acts
. NA
is given to sovon_observation_time
:
## mutate: new variable 'sovon_observation_time' (logical) with one unique value and 100% NA
6.2.6 Observation latitude
Observations with missing or partially missing latitude information:
obs_missing_latitude_longitude <-
obs_and_acts %>%
filter(is.na(PlaatsBreedtegraadGraden) |
is.na(PlaatsBreedtegraadMinuten) |
is.na(PlaatsBreedtegraadSeconden) |
is.na(PlaatsLengtegraadGraden) |
is.na(PlaatsLengtegraadMinuten) |
is.na(PlaatsLengtegraadSeconden)) %>%
select(
Nummer,
KleurringNummer,
Datum,
starts_with("PlaatsBreedtegraad"),
starts_with("PlaatsLengtegraad"),
PlaatsGemeente,
PlaatsToponym
)
## filter: removed 150,654 rows (>99%), 8 rows remaining
## select: dropped 33 variables (sovon_bird_reference, EuringCode, LeeftijdCode, KleurringPlaats, MetaalringNummer_obs, …)
The absolute observation latitude should be converted to decimal degrees:
obs_and_acts <-
obs_and_acts %>%
mutate(
sovon_observation_lat = case_when(
!is.na(PlaatsBreedtegraadGraden) &
!is.na(PlaatsBreedtegraadMinuten) &
!is.na(PlaatsBreedtegraadSeconden) ~ conv_unit(
str_c(PlaatsBreedtegraadGraden,
PlaatsBreedtegraadMinuten,
PlaatsBreedtegraadSeconden,
sep = " "
),
from = "deg_min_sec",
to = "dec_deg"
),
!is.na(PlaatsBreedtegraadGraden) &
!is.na(PlaatsBreedtegraadMinuten) &
is.na(PlaatsBreedtegraadSeconden) ~ conv_unit(
str_c(PlaatsBreedtegraadGraden,
PlaatsBreedtegraadMinuten,
0,
sep = " "
),
from = "deg_min_sec",
to = "dec_deg"
),
!is.na(PlaatsBreedtegraadGraden) &
is.na(PlaatsBreedtegraadMinuten) &
is.na(PlaatsBreedtegraadSeconden) ~ conv_unit(
str_c(PlaatsBreedtegraadGraden, 0, 0, sep = " "),
from = "deg_min_sec",
to = "dec_deg"
),
TRUE ~ NA_character_
)
)
## mutate: new variable 'sovon_observation_lat' (character) with 660 unique values and <1% NA
By field PlaatsBreedtegraadRichtingCode
we can know whether the observation took place in the northern or southern hemisphere. In case of southern hemisphere, the latitude value should be negative.
Values of PlaatsBreedtegraadRichtingCode
and number of observations:
## group_by: one grouping variable (PlaatsBreedtegraadRichtingCode)
## count: now 2 rows and 2 columns, one group variable remaining (PlaatsBreedtegraadRichtingCode)
If present, the observations with valid PlaatsBreedtegraadGraden
but without PlaatsBreedtegraadRichtingCode
should be further evaluated:
obs_and_acts %>%
filter(is.na(PlaatsBreedtegraadRichtingCode) &
!is.na(PlaatsBreedtegraadGraden)) %>%
select(Nummer, starts_with("PlaatsBreedte"), PlaatsGemeente, PlaatsToponym)
## filter: removed all rows (100%)
## select: dropped 40 variables (sovon_bird_reference, Datum, EuringCode, LeeftijdCode, KleurringNummer, …)
Assign negative latitude if PlaatsBreedtegraadRichtingCode
is equal to S
:
obs_and_acts <-
obs_and_acts %>%
mutate(
sovon_observation_lat = case_when(
PlaatsBreedtegraadRichtingCode == "N" ~ sovon_observation_lat,
PlaatsBreedtegraadRichtingCode == "S" & !is.na(sovon_observation_lat) ~
paste0("-", sovon_observation_lat),
TRUE ~ sovon_observation_lat
)
)
## mutate: no changes
6.2.7 Observation longitude
Absolute value of the observation longitude should be converted to decimal degrees:
obs_and_acts <-
obs_and_acts %>%
mutate(
sovon_observation_lng = case_when(
!is.na(PlaatsLengtegraadGraden) &
!is.na(PlaatsLengtegraadMinuten) &
!is.na(PlaatsLengtegraadSeconden) ~ conv_unit(
str_c(PlaatsLengtegraadGraden,
PlaatsLengtegraadMinuten,
PlaatsLengtegraadSeconden,
sep = " "
),
from = "deg_min_sec",
to = "dec_deg"
),
!is.na(PlaatsLengtegraadGraden) &
!is.na(PlaatsLengtegraadMinuten) &
is.na(PlaatsLengtegraadSeconden) ~ conv_unit(
str_c(PlaatsLengtegraadGraden,
PlaatsLengtegraadMinuten,
0,
sep = " "
),
from = "deg_min_sec",
to = "dec_deg"
),
!is.na(PlaatsLengtegraadGraden) &
is.na(PlaatsLengtegraadMinuten) &
is.na(PlaatsLengtegraadSeconden) ~ conv_unit(
str_c(PlaatsLengtegraadGraden, 0, 0, sep = " "),
from = "deg_min_sec",
to = "dec_deg"
),
TRUE ~ NA_character_
)
)
## mutate: new variable 'sovon_observation_lng' (character) with 596 unique values and <1% NA
By field PlaatsLengtegraadRichtingCode
we can know whether the observation took place in the western or eastern hemisphere. In case of western hemisphere, the longitude value should be negative.
Values of PlaatsLengtegraadRichtingCode
and number of observations:
## group_by: one grouping variable (PlaatsLengtegraadRichtingCode)
## count: now 3 rows and 2 columns, one group variable remaining (PlaatsLengtegraadRichtingCode)
If present, the observations with valid PlaatsLengtegraadGraden
but without PlaatsLengtegraadRichtingCode
should be further evaluated:
obs_and_acts %>%
filter(is.na(PlaatsLengtegraadRichtingCode) &
!is.na(PlaatsLengtegraadGraden)) %>%
select(Nummer, starts_with("PlaatsLengte"), PlaatsGemeente, PlaatsToponym)
## filter: removed all rows (100%)
## select: dropped 41 variables (sovon_bird_reference, Datum, EuringCode, LeeftijdCode, KleurringNummer, …)
Assign negative longitude if PlaatsLengtegraadRichtingCode
is equal to W
:
obs_and_acts <-
obs_and_acts %>%
mutate(
sovon_observation_lng = case_when(
PlaatsLengtegraadRichtingCode == "E" ~ sovon_observation_lng,
PlaatsLengtegraadRichtingCode == "W" & !is.na(sovon_observation_lng) ~ paste0("-", sovon_observation_lng),
TRUE ~ sovon_observation_lng
)
)
## mutate: changed 12,183 values (8%) of 'sovon_observation_lng' (0 new NA)
6.2.8 Observation location
Aggregate information about observation location. We follow the following structure: PlaatsGemeente
[+ ,
+ PlaatsToponym
[+ :
+ PlaatsToponymDetail
]]:
obs_and_acts <-
obs_and_acts %>%
mutate(
sovon_observation_location = ifelse(!is.na(PlaatsToponym),
str_c(PlaatsGemeente,
PlaatsToponym,
sep = ", "
),
paste(PlaatsGemeente)
)
) %>%
mutate(
sovon_observation_location = ifelse(!is.na(PlaatsToponymDetail),
str_c(sovon_observation_location,
PlaatsToponymDetail,
sep = ": "
),
paste(sovon_observation_location)
)
)
## mutate: new variable 'sovon_observation_location' (character) with 2,227 unique values and 0% NA
## mutate: changed 20,283 values (13%) of 'sovon_observation_location' (0 new NA)
6.2.9 Check bird
The field sovon_check_bird
is provided by SOVON. NA
is given:
## mutate: new variable 'sovon_check_bird' (logical) with one unique value and 100% NA
6.2.10 User ID
The field sovon_user_id
is left to SOVON:
## mutate: new variable 'sovon_user_id' (logical) with one unique value and 100% NA
6.2.11 User reference
The field sovon_user_reference
links observations to users and it is equal to field user_reference
in user data:
## mutate: new variable 'sovon_user_reference' (double) with 2,010 unique values and 0% NA
6.2.12 Observer
The field sovon_melder
is equal to user_first_name
and user_last_name
in users
:
obs_and_acts <-
obs_and_acts %>%
left_join(
crbirding_users %>%
select(user_reference, user_first_name, user_last_name),
by = c("WaarnemerNummer" = "user_reference")
) %>%
mutate(sovon_melder = case_when(
is.na(user_first_name) & !is.na(user_last_name) ~ user_last_name,
!is.na(user_first_name) & is.na(user_last_name) ~ user_first_name,
!is.na(user_first_name) & !is.na(user_last_name) ~ str_c(user_first_name, user_last_name, sep = " "),
is.na(user_first_name) & is.na(user_last_name) ~ NA_character_
)) %>%
select(-c(user_first_name, user_last_name))
## select: dropped 8 variables (user_id, user_email, user_address, user_postal_code, user_place, …)
## left_join: added 2 columns (user_first_name, user_last_name)
## > rows only in x 0
## > rows only in y ( 29)
## > matched rows 150,662
## > =========
## > rows total 150,662
## mutate: new variable 'sovon_melder' (character) with 2,008 unique values and 0% NA
## select: dropped 2 variables (user_first_name, user_last_name)
6.2.13 Observer e-mail
The field sovon_melder_email
is equal to user_email
in users
:
obs_and_acts <-
obs_and_acts %>%
left_join(
crbirding_users %>%
select(user_reference, user_email),
by = c("WaarnemerNummer" = "user_reference")
) %>%
mutate(sovon_melder_email = user_email)
## select: dropped 9 variables (user_id, user_first_name, user_last_name, user_address, user_postal_code, …)
## left_join: added one column (user_email)
## > rows only in x 0
## > rows only in y ( 29)
## > matched rows 150,662
## > =========
## > rows total 150,662
## mutate: new variable 'sovon_melder_email' (character) with 1,200 unique values and 62% NA
6.2.14 Reporter - ringer number
The field sovon_melder_ringersnummer
is left to SOVON:
## mutate: new variable 'sovon_melder_ringersnummer' (logical) with one unique value and 100% NA
6.2.15 Ring number
The field sovon_ring_number
contains information about the metal ring number, in the same format as in field bird_ring_number
of crbirding_birds
. As explained in the mapping of field bird_ring_number
of crbirding_birds
, INBO database is a color ring database. This field is not accurately mapped and we will leave it empty.
## mutate: new variable 'sovon_ring_number' (character) with one unique value and 100% NA
6.2.16 Observation status
Adding the status (based on EURING) information arises by the need of mapping the observations with action code br
and vang
:
## filter: removed 11 rows (85%), 2 rows remaining
The actions br
and vang
refer to status breeding
in EURING system. Based on the scheme at page 16 of EURING Exchange Code 2000+ document and based on discussion in this issue, we define the following mapping for field status_full_grown_bird
:
obs_and_acts <-
obs_and_acts %>%
mutate(sovon_status_full_grown_bird = case_when(
!is.na(br) | !is.na(vang) ~ "N",
LeeftijdCode == "PU" ~ "-",
TRUE ~ "U"
))
## mutate: new variable 'sovon_status_full_grown_bird' (character) with 3 unique values and 0% NA
6.2.17 Observation condition
The SOVON field observation_condition
is NOT mapped following the EURING Exchange Code 2000+ document. SOVON experts provided us the following list of conditions (see #126):
1
: healthy2
: sick/wounded3
: dead (less than week)4
: dead (more than week)5
: dead (unknown if freshly dead)6
: released after rehabilitation
So, we do an initial mapping of following actions as follows (in order of priority):
dood
,klgev
,megev
:observation_condition
=5
ziek
(or observations of bird shelter workers):observation_condition
=2
rngkl
,rngme
,vang
,vangl
,veld
,me
,meweg
,klweg
,br
:observation_condition
=1
obs_and_acts <-
obs_and_acts %>%
mutate(sovon_observation_condition = case_when(
dood == "dood" ~ 5,
klgev == "klgev" ~ 5,
ziek == "ziek" |
sovon_user_reference %in% workers_bird_shelters$user_reference ~ 2,
!is.na(rngkl) | !is.na(rngme) | !is.na(vang) | !is.na(vangl) |
!is.na(veld) | !is.na(me) | !is.na(meweg) | !is.na(klweg) | !is.na(br) ~ 1
))
## mutate: new variable 'sovon_observation_condition' (double) with 3 unique values and 0% NA
Releasing after rehabilitation can be detected by the notes in field Opmerking
for the observations of bird shelter workers. This is due to the fact that some of these observations are linked to ringing actions, other are linked to veld
actions thus making the detection of release by actions troublesome.
Assign condition value 6
to the release after rehabilitation:
obs_and_acts <-
obs_and_acts %>%
mutate(sovon_observation_condition = if_else(
# observation of a bird shelter worker
sovon_user_reference %in% workers_bird_shelters$user_reference &
# bird is not dead
is.na(dood) &
# no empty note
!is.na(Opmerking) &
# note mentions the release in Dutch
str_detect(Opmerking, pattern = "gelost|vrijlating") &
# note does not mention the unrelease
!str_detect(Opmerking, pattern = "niet gelost"),
6, sovon_observation_condition
))
## mutate: changed 1,293 values (1%) of 'sovon_observation_condition' (0 new NA)
Overview of notes and actions of observations with sovon_observation_condition
= 6 (released after rehabilitation):
obs_and_acts %>%
filter(sovon_observation_condition == 6) %>%
select(
Opmerking,
acts
) %>%
select_if(function(x) any(!is.na(x))) %>%
distinct()
## filter: removed 149,369 rows (99%), 1,293 rows remaining
## select: dropped 46 variables (sovon_bird_reference, Nummer, Datum, EuringCode, LeeftijdCode, …)
## select_if: dropped 6 variables (klgev, br, vang, dood, meweg, …)
## distinct: removed 1,243 rows (96%), 50 rows remaining
Preview of mapping of sovon_observation_condition
:
obs_and_acts %>%
select(
sovon_observation_reference,
sovon_bird_reference,
sovon_observation_date,
sovon_observation_condition,
acts
) %>%
head(n = 100)
## select: dropped 43 variables (Nummer, Datum, EuringCode, LeeftijdCode, KleurringNummer, …)
6.2.18 MRI: metal ring information
Metal ring information is an integer between 0 and 9. A table can be found at page 8 of EURING Exchange Code 2000+ document.
However, SOVON uses this field to map the observations referring to the applying of any kind of ring, either color or metal, as their database is purely focussed on color rings and due to a missing field in the EURING Exchange Code 2000+ specific for color rings.
INBO experts explained us that all color rings are applied on tarsus, MRI: 2, while metal ring position is on tarsus or above or unknown, MRI: 1. However, the value 2 is not allowed by SOVON (see comment in issue 47). Values allowed: 1, 4 and 5, where 4 is used for adding a color and/or metal ring, while 5 for changing
Mapping decision rules:
- Default: MRI 4
- Very first ringing of a bird (no matter if
rngme
only,rngkl
only orrngme
+rngkl
):MRI
1 - Changing a metal and/or color ring: MRI 5
This last condition holds true even if the color ring is changed while adding a metal ring or viceversa.
We initialize sovon_MRI
by assigning default value 4:
## mutate: new variable 'sovon_MRI' (double) with one unique value and 0% NA
Very first ringing gets sovon_MRI
= 1:
obs_and_acts <-
obs_and_acts %>%
group_by(sovon_bird_reference) %>%
mutate(sovon_MRI = if_else(
Datum == min(Datum) & (rngkl == "rngkl" | rngme == "rngme"),
1, sovon_MRI
)) %>%
arrange(sovon_bird_reference, Datum) %>%
ungroup()
## group_by: one grouping variable (sovon_bird_reference)
## mutate (grouped): changed 11,199 values (7%) of 'sovon_MRI' (70 new NA)
## ungroup: no grouping variables
Preview:
obs_and_acts %>%
filter(sovon_MRI == 1) %>%
select(
sovon_observation_reference,
sovon_bird_reference,
sovon_observation_date,
sovon_MRI,
acts
) %>%
select_if(function(x) any(!is.na(x))) %>%
head(n = 100)
## filter: removed 139,533 rows (93%), 11,129 rows remaining
## select: dropped 44 variables (Nummer, Datum, EuringCode, LeeftijdCode, KleurringNummer, …)
## select_if: dropped 9 variables (klgev, br, vang, dood, klweg, …)
Adding a color ring while metal ring is alrady present is mapped as 4, which is the default value. Same for adding a metal ring while color ring already present.
Based on crbirding_birds
MRI is 5 if previous bird_shorthand
is not empty and different from actual bird_shorthand
and no klweg
action is coupled to the observation.
As said in previous chapter while mapping ring data, we don’t have track of changing metal rings, only loosing it (actions meweg
). We then assume to add a new one at next catching action. But adding a metal ring while not changing color ring means MRI equal to 4, default value, if color ring is changed then it is 5: this demonstrates that MRI value 5 depends only on value of bird_shorthand
as only color rings can be changed in our mapping. If the ringer, while catching, realizes the absence of color ring (klweg
), then the color ring is added, not changed, i.e. MRI equal 4 as metal ring is still present.If metal ring is absent as well, then it would be impossible to identify the bird and it would count as a new bird with an observation coupled to rngkl
+ rngme
actions.
MRI_5_bird_ref_and_dates <-
crbirding_birds %>%
group_by(bird_reference) %>%
mutate(previous_bird_shorthand = lag(bird_shorthand)) %>%
filter(!is.na(previous_bird_shorthand) &
previous_bird_shorthand != bird_shorthand) %>%
select(bird_reference, bird_date_begin) %>%
ungroup() %>%
mutate(set_MRI_5 = 5)
## group_by: one grouping variable (bird_reference)
## mutate (grouped): new variable 'previous_bird_shorthand' (character) with 530 unique values and 95% NA
## filter (grouped): removed 12,038 rows (97%), 338 rows remaining
## select: dropped 16 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## ungroup: no grouping variables
## mutate: new variable 'set_MRI_5' (double) with one unique value and 0% NA
obs_and_acts <-
obs_and_acts %>%
left_join(MRI_5_bird_ref_and_dates,
by = c(
"sovon_bird_reference" = "bird_reference",
"Datum" = "bird_date_begin"
)
) %>%
mutate(sovon_MRI = if_else(!is.na(set_MRI_5) & is.na(klweg) & is.na(veld),
set_MRI_5,
sovon_MRI
))
## left_join: added one column (set_MRI_5)
## > rows only in x 150,320
## > rows only in y ( 0)
## > matched rows 342
## > =========
## > rows total 150,662
## mutate: changed 331 values (<1%) of 'sovon_MRI' (0 new NA)
Preview changes:
obs_and_acts %>%
filter(sovon_MRI == 5) %>%
select(
sovon_observation_reference,
sovon_bird_reference,
sovon_observation_date,
sovon_MRI,
acts
) %>%
select_if(function(x) any(!is.na(x)))
## filter: removed 150,331 rows (>99%), 331 rows remaining
## select: dropped 45 variables (Nummer, Datum, EuringCode, LeeftijdCode, KleurringNummer, …)
## select_if: dropped 7 variables (rngkl, klgev, br, dood, klweg, …)
The presence of action ziek
in combination with sovon_MRI
5 is due to the three rings discussed in previous chapter: FHOV
, YCAF
, FAAG
.
6.2.19 Bird age
The field sovon_bird_age_obs
should be filled with the age of the bird as mentioned by the observer. This field has been created in previous chapter:
## distinct: removed 150,654 rows (>99%), 8 rows remaining
6.2.20 Bird sex
The field sovon_bird_sex
should be filled with the sex of the bird as mentioned by the observer. This field is not present in obs_and_acts
, so we assign value U
(unknown):
## mutate: new variable 'sovon_bird_sex' (character) with one unique value and 0% NA
6.2.21 Capture
The field sovon_observation_is_capture
can be filled by evaluating the link of observations to actions vang
, vangl
, rngkl
or rngme
:
obs_and_acts <-
obs_and_acts %>%
mutate(sovon_observation_is_capture = if_else(
!is.na(vang) | !is.na(vangl) | !is.na(rngkl) | !is.na(rngme), "Y", "N"
))
## mutate: new variable 'sovon_observation_is_capture' (character) with 2 unique values and 0% NA
6.2.22 Bird ID
The field sovon_bird_id
is left to SOVON:
## mutate: new variable 'sovon_bird_id' (logical) with one unique value and 100% NA
6.2.23 Bird ring position and inscription direction
Similarly to the mapping in ring data, we map the ring position and inscription reading direction in observations:
obs_and_acts <-
obs_and_acts %>%
left_join(ring_position_table %>%
select(Code, sovon_bird_ring_position),
by = c("KleurringPlaats" = "Code")
) %>%
left_join(ring_position_table %>%
select(Code, sovon_bird_ring_direction),
by = c("KleurringPlaats" = "Code")
)
## select: dropped 4 variables (sovon_bird_ring_direction, Beschrijving, BeschrijvingUK, Aktief)
## left_join: added one column (sovon_bird_ring_position)
## > rows only in x 133,695
## > rows only in y ( 2)
## > matched rows 16,967
## > =========
## > rows total 150,662
## select: dropped 4 variables (sovon_bird_ring_position, Beschrijving, BeschrijvingUK, Aktief)
## left_join: added one column (sovon_bird_ring_direction)
## > rows only in x 133,695
## > rows only in y ( 2)
## > matched rows 16,967
## > =========
## > rows total 150,662
Effects of the mapping:
## distinct: removed 150,655 rows (>99%), 7 rows remaining
6.2.24 Observation notes
We copy the notes in Opmerking
to SOVON field sovon_observation_notes
:
## mutate: new variable 'sovon_observation_notes' (character) with 11,032 unique values and 70% NA
We add a dot at the end of the notes if not present already:
obs_and_acts <-
obs_and_acts %>%
mutate(
sovon_observation_notes =
if_else(
!is.na(sovon_observation_notes) & str_length(sovon_observation_notes) > 0,
if_else(str_sub(sovon_observation_notes, -1) != ".",
str_c(sovon_observation_notes, ".", sep = ""),
sovon_observation_notes
),
sovon_observation_notes
)
)
## mutate: changed 44,426 values (29%) of 'sovon_observation_notes' (0 new NA)
We also add the prefix "INBO original notes: "
:
obs_and_acts <-
obs_and_acts %>%
mutate(
sovon_observation_notes =
if_else(
!is.na(sovon_observation_notes) &
str_length(sovon_observation_notes) > 0,
str_c("INBO original notes: ", sovon_observation_notes, sep = " "),
sovon_observation_notes
)
)
## mutate: changed 44,590 values (30%) of 'sovon_observation_notes' (0 new NA)
We add the note no_color_ring.
to observations linked to action klweg
and/or me
:
obs_and_acts <-
obs_and_acts %>%
mutate(
sovon_observation_notes = if_else(
is.na(klweg) & is.na(me),
sovon_observation_notes,
if_else(is.na(sovon_observation_notes),
"no_color_ring.",
str_c("no_color_ring.", sovon_observation_notes, sep = " ")
)
)
)
## mutate: changed 392 values (<1%) of 'sovon_observation_notes' (302 fewer NA)
Preview:
obs_and_acts %>%
filter(!is.na(klweg) | !is.na(me)) %>%
select(, klweg, me, sovon_observation_notes) %>%
head()
## filter: removed 150,270 rows (>99%), 392 rows remaining
## select: dropped 64 variables (sovon_bird_reference, Nummer, Datum, EuringCode, LeeftijdCode, …)
We add the note no_metal_ring.
to observations linked to action meweg
:
obs_and_acts <-
obs_and_acts %>%
mutate(
sovon_observation_notes = if_else(
!is.na(meweg),
if_else(is.na(sovon_observation_notes),
"no_metal_ring.",
paste("no_metal_ring.", sovon_observation_notes, sep = " ")
),
sovon_observation_notes
)
)
## mutate: changed 125 values (<1%) of 'sovon_observation_notes' (87 fewer NA)
Preview:
obs_and_acts %>%
filter(!is.na(meweg)) %>%
select(sovon_observation_notes) %>%
distinct() %>%
head()
## filter: removed 150,537 rows (>99%), 125 rows remaining
## select: dropped 66 variables (sovon_bird_reference, Nummer, Datum, EuringCode, LeeftijdCode, …)
## distinct: removed 100 rows (80%), 25 rows remaining
6.3 Save final observation data
Select the required columns, starting with prefix sovon_
:
## select: dropped 42 variables (Nummer, Datum, EuringCode, LeeftijdCode, KleurringNummer, …)
Remove prefix sovon_
:
Remove time and timezone information from bird_date_begin
and bird_date_end
:
crbirding_observations <-
crbirding_observations %>%
mutate(observation_date = as.Date(observation_date))
## mutate: converted 'observation_date' from double to Date (0 new NA)
The desired order of columns in crbirding_observations
:
cr_obs_cols <- c(
"user_id", "user_reference", "bird_id", "bird_reference", "observation_id",
"observation_reference", "observation_date", "observation_time",
"observation_lat", "observation_lng", "observation_location",
"observation_is_capture", "observation_notes", "check_bird", "MRI", "melder",
"melder_email", "melder_ringersnummer", "ring_number", "observation_condition", "status_full_grown_bird",
"bird_age_obs", "bird_sex", "bird_ring_position", "bird_ring_direction"
)
Are all required columns present?
all(cr_obs_cols %in% names(crbirding_observations)) &
length(cr_obs_cols) == ncol(crbirding_observations)
## [1] TRUE
Set column order:
## select: columns reordered (user_id, user_reference, bird_id, bird_reference, observation_id, …)
Preview data (without personal observer data for privcay reasons):
## select: dropped 2 variables (melder, melder_email)
Save to text file (comma separated value):