6 Map observation data

6.1 Read data

6.1.1 Read temporary observation data

We start from the temporary observation data saved as TSV file in data\interim:

obs_and_acts <- read_tsv(
  here::here("data", "interim", "obs_and_actions.tsv"),
  col_types = cols(
    .default = col_character(),
    sovon_bird_reference = col_double(),
    Nummer = col_double(),
    Datum = col_datetime(format = ""),
    WaarnemerNummer = col_double(),
    PlaatsLengtegraadGraden = col_double(),
    PlaatsLengtegraadMinuten = col_double(),
    PlaatsLengtegraadSeconden = col_double(),
    PlaatsBreedtegraadGraden = col_double(),
    PlaatsBreedtegraadMinuten = col_double(),
    PlaatsBreedtegraadSeconden = col_double(),
    BevestigingDatum = col_datetime(format = ""),
    AanmaakDatum = col_datetime(format = ""),
    WijzigDatum = col_datetime(format = "")
  )
)

6.1.2 Read action data

Import action codes and meaning:

actions_meaning <- read_tsv(here::here("data", "input", "actions_meaning.tsv"))
## Parsed with column specification:
## cols(
##   Code = col_character(),
##   Beschrijving = col_character(),
##   BeschrijvingUK = col_character(),
##   CodeExtern = col_character(),
##   CodeExternUK = col_character(),
##   Kolom = col_character()
## )

6.1.3 Read processed ring data

We import finalized ring data from ./data/processed:

crbirding_birds <- read_csv(
  here::here("data", "processed", "crbirding_birds.csv"),
  col_types = cols(
    .default = col_character(),
    bird_id = col_logical(),
    bird_reference = col_double(),
    bird_bto = col_logical(),
    bird_birth_year = col_logical(),
    bird_date_begin = col_datetime(format = ""),
    bird_date_end = col_datetime(format = "")
  )
)

6.1.4 Read temporary ring data

We will need some columns from original INBO ring data. We import the temporary ring data as well:

birds <- read_tsv(
  here::here("data", "interim", "birds.tsv"),
  col_types = cols(
    .default = col_character()
  )
)

6.1.5 Read ring position data

We import mapping of ring position and inscription reading direction:

ring_position_table <- read_tsv(
  here::here("data", "interim", "ring_position_table.tsv"),
  na = "",
  col_types = cols(
    .default = col_character(),
    Aktief = col_logical()
  )
)

6.1.6 Read processed user data

We import finalized user data from ./data/processed:

crbirding_users <- read_csv(
  here::here("data", "processed", "crbirding_users.csv"),
  col_types = cols(
    .default = col_character(),
    user_id = col_logical(),
    user_reference = col_double()
  )
)

6.1.7 Read list workers in bird shelters

workers_bird_shelters <- read_tsv(
  here::here("data", "input", "workers_in_bird_shelters.tsv"),
  col_types = cols(
    .default = col_character(),
    user_id = col_logical(),
    user_reference = col_double()
  )
)

6.2 Map color observation data

6.2.1 Extract action codes

Actions present in obs_and_acts :

acts <- actions_meaning$Code
acts <- acts[acts %in% names(obs_and_acts)]
acts
##  [1] "rngkl" "rngme" "klgev" "br"    "vang"  "dood"  "klweg" "veld"  "meweg"
## [10] "me"    "vangl" "ziek"

Combinations of actions present in observations:

action_combinations <-
  obs_and_acts %>%
  select(acts) %>%
  distinct()
## select: dropped 30 variables (sovon_bird_reference, Nummer, Datum, EuringCode, LeeftijdCode, …)
## distinct: removed 150,632 rows (>99%), 30 rows remaining
action_combinations

6.2.2 Observation ID

The field sovon_observation_id is left to SOVON:

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_observation_id = NA)
## mutate: new variable 'sovon_observation_id' (logical) with one unique value and 100% NA

6.2.3 Observation reference

The observation reference is an unique identifier assigned to each observation. This field exists already: Nummer.

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_observation_reference = Nummer)
## mutate: new variable 'sovon_observation_reference' (double) with 150,662 unique values and 0% NA

6.2.4 Observation date

The date is saved in column Datum. We copy it in required column sovon_observation_date:

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_observation_date = Datum)
## mutate: new variable 'sovon_observation_date' (double) with 7,121 unique values and 0% NA

6.2.5 Observation time

There is no observation time in obs_and_acts. NA is given to sovon_observation_time:

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_observation_time = NA)
## mutate: new variable 'sovon_observation_time' (logical) with one unique value and 100% NA

6.2.6 Observation latitude

Observations with missing or partially missing latitude information:

obs_missing_latitude_longitude <-
  obs_and_acts %>%
  filter(is.na(PlaatsBreedtegraadGraden) |
    is.na(PlaatsBreedtegraadMinuten) |
    is.na(PlaatsBreedtegraadSeconden) |
    is.na(PlaatsLengtegraadGraden) |
    is.na(PlaatsLengtegraadMinuten) |
    is.na(PlaatsLengtegraadSeconden)) %>%
  select(
    Nummer,
    KleurringNummer,
    Datum,
    starts_with("PlaatsBreedtegraad"),
    starts_with("PlaatsLengtegraad"),
    PlaatsGemeente,
    PlaatsToponym
  )
## filter: removed 150,654 rows (>99%), 8 rows remaining
## select: dropped 33 variables (sovon_bird_reference, EuringCode, LeeftijdCode, KleurringPlaats, MetaalringNummer_obs, …)
obs_missing_latitude_longitude

The absolute observation latitude should be converted to decimal degrees:

obs_and_acts <-
  obs_and_acts %>%
  mutate(
    sovon_observation_lat = case_when(
      !is.na(PlaatsBreedtegraadGraden) &
        !is.na(PlaatsBreedtegraadMinuten) &
        !is.na(PlaatsBreedtegraadSeconden) ~ conv_unit(
        str_c(PlaatsBreedtegraadGraden,
          PlaatsBreedtegraadMinuten,
          PlaatsBreedtegraadSeconden,
          sep = " "
        ),
        from = "deg_min_sec",
        to = "dec_deg"
      ),
      !is.na(PlaatsBreedtegraadGraden) &
        !is.na(PlaatsBreedtegraadMinuten) &
        is.na(PlaatsBreedtegraadSeconden) ~ conv_unit(
        str_c(PlaatsBreedtegraadGraden,
          PlaatsBreedtegraadMinuten,
          0,
          sep = " "
        ),
        from = "deg_min_sec",
        to = "dec_deg"
      ),
      !is.na(PlaatsBreedtegraadGraden) &
        is.na(PlaatsBreedtegraadMinuten) &
        is.na(PlaatsBreedtegraadSeconden) ~ conv_unit(
        str_c(PlaatsBreedtegraadGraden, 0, 0, sep = " "),
        from = "deg_min_sec",
        to = "dec_deg"
      ),
      TRUE ~ NA_character_
    )
  )
## mutate: new variable 'sovon_observation_lat' (character) with 660 unique values and <1% NA

By field PlaatsBreedtegraadRichtingCode we can know whether the observation took place in the northern or southern hemisphere. In case of southern hemisphere, the latitude value should be negative.

Values of PlaatsBreedtegraadRichtingCode and number of observations:

obs_and_acts %>%
  group_by(PlaatsBreedtegraadRichtingCode) %>%
  count()
## group_by: one grouping variable (PlaatsBreedtegraadRichtingCode)
## count: now 2 rows and 2 columns, one group variable remaining (PlaatsBreedtegraadRichtingCode)

If present, the observations with valid PlaatsBreedtegraadGraden but without PlaatsBreedtegraadRichtingCode should be further evaluated:

obs_and_acts %>%
  filter(is.na(PlaatsBreedtegraadRichtingCode) &
    !is.na(PlaatsBreedtegraadGraden)) %>%
  select(Nummer, starts_with("PlaatsBreedte"), PlaatsGemeente, PlaatsToponym)
## filter: removed all rows (100%)
## select: dropped 40 variables (sovon_bird_reference, Datum, EuringCode, LeeftijdCode, KleurringNummer, …)

Assign negative latitude if PlaatsBreedtegraadRichtingCode is equal to S:

obs_and_acts <-
  obs_and_acts %>%
  mutate(
    sovon_observation_lat = case_when(
      PlaatsBreedtegraadRichtingCode == "N" ~ sovon_observation_lat,
      PlaatsBreedtegraadRichtingCode == "S" & !is.na(sovon_observation_lat) ~
      paste0("-", sovon_observation_lat),
      TRUE ~ sovon_observation_lat
    )
  )
## mutate: no changes

6.2.7 Observation longitude

Absolute value of the observation longitude should be converted to decimal degrees:

obs_and_acts <-
  obs_and_acts %>%
  mutate(
    sovon_observation_lng = case_when(
      !is.na(PlaatsLengtegraadGraden) &
        !is.na(PlaatsLengtegraadMinuten) &
        !is.na(PlaatsLengtegraadSeconden) ~ conv_unit(
        str_c(PlaatsLengtegraadGraden,
          PlaatsLengtegraadMinuten,
          PlaatsLengtegraadSeconden,
          sep = " "
        ),
        from = "deg_min_sec",
        to = "dec_deg"
      ),
      !is.na(PlaatsLengtegraadGraden) &
        !is.na(PlaatsLengtegraadMinuten) &
        is.na(PlaatsLengtegraadSeconden) ~ conv_unit(
        str_c(PlaatsLengtegraadGraden,
          PlaatsLengtegraadMinuten,
          0,
          sep = " "
        ),
        from = "deg_min_sec",
        to = "dec_deg"
      ),
      !is.na(PlaatsLengtegraadGraden) &
        is.na(PlaatsLengtegraadMinuten) &
        is.na(PlaatsLengtegraadSeconden) ~ conv_unit(
        str_c(PlaatsLengtegraadGraden, 0, 0, sep = " "),
        from = "deg_min_sec",
        to = "dec_deg"
      ),
      TRUE ~ NA_character_
    )
  )
## mutate: new variable 'sovon_observation_lng' (character) with 596 unique values and <1% NA

By field PlaatsLengtegraadRichtingCode we can know whether the observation took place in the western or eastern hemisphere. In case of western hemisphere, the longitude value should be negative.

Values of PlaatsLengtegraadRichtingCode and number of observations:

obs_and_acts %>%
  group_by(PlaatsLengtegraadRichtingCode) %>%
  count()
## group_by: one grouping variable (PlaatsLengtegraadRichtingCode)
## count: now 3 rows and 2 columns, one group variable remaining (PlaatsLengtegraadRichtingCode)

If present, the observations with valid PlaatsLengtegraadGraden but without PlaatsLengtegraadRichtingCode should be further evaluated:

obs_and_acts %>%
  filter(is.na(PlaatsLengtegraadRichtingCode) &
    !is.na(PlaatsLengtegraadGraden)) %>%
  select(Nummer, starts_with("PlaatsLengte"), PlaatsGemeente, PlaatsToponym)
## filter: removed all rows (100%)
## select: dropped 41 variables (sovon_bird_reference, Datum, EuringCode, LeeftijdCode, KleurringNummer, …)

Assign negative longitude if PlaatsLengtegraadRichtingCode is equal to W:

obs_and_acts <-
  obs_and_acts %>%
  mutate(
    sovon_observation_lng = case_when(
      PlaatsLengtegraadRichtingCode == "E" ~ sovon_observation_lng,
      PlaatsLengtegraadRichtingCode == "W" & !is.na(sovon_observation_lng) ~ paste0("-", sovon_observation_lng),
      TRUE ~ sovon_observation_lng
    )
  )
## mutate: changed 12,183 values (8%) of 'sovon_observation_lng' (0 new NA)

6.2.8 Observation location

Aggregate information about observation location. We follow the following structure: PlaatsGemeente [+ , + PlaatsToponym [+ : + PlaatsToponymDetail]]:

obs_and_acts <-
  obs_and_acts %>%
  mutate(
    sovon_observation_location = ifelse(!is.na(PlaatsToponym),
      str_c(PlaatsGemeente,
        PlaatsToponym,
        sep = ", "
      ),
      paste(PlaatsGemeente)
    )
  ) %>%
  mutate(
    sovon_observation_location = ifelse(!is.na(PlaatsToponymDetail),
      str_c(sovon_observation_location,
        PlaatsToponymDetail,
        sep = ": "
      ),
      paste(sovon_observation_location)
    )
  )
## mutate: new variable 'sovon_observation_location' (character) with 2,227 unique values and 0% NA
## mutate: changed 20,283 values (13%) of 'sovon_observation_location' (0 new NA)

6.2.9 Check bird

The field sovon_check_bird is provided by SOVON. NA is given:

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_check_bird = NA)
## mutate: new variable 'sovon_check_bird' (logical) with one unique value and 100% NA

6.2.10 User ID

The field sovon_user_id is left to SOVON:

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_user_id = NA)
## mutate: new variable 'sovon_user_id' (logical) with one unique value and 100% NA

6.2.11 User reference

The field sovon_user_reference links observations to users and it is equal to field user_reference in user data:

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_user_reference = WaarnemerNummer)
## mutate: new variable 'sovon_user_reference' (double) with 2,010 unique values and 0% NA

6.2.12 Observer

The field sovon_melder is equal to user_first_name and user_last_name in users:

obs_and_acts <-
  obs_and_acts %>%
  left_join(
    crbirding_users %>%
      select(user_reference, user_first_name, user_last_name),
    by = c("WaarnemerNummer" = "user_reference")
  ) %>%
  mutate(sovon_melder = case_when(
    is.na(user_first_name) & !is.na(user_last_name) ~ user_last_name,
    !is.na(user_first_name) & is.na(user_last_name) ~ user_first_name,
    !is.na(user_first_name) & !is.na(user_last_name) ~ str_c(user_first_name, user_last_name, sep = " "),
    is.na(user_first_name) & is.na(user_last_name) ~ NA_character_
  )) %>%
  select(-c(user_first_name, user_last_name))
## select: dropped 8 variables (user_id, user_email, user_address, user_postal_code, user_place, …)
## left_join: added 2 columns (user_first_name, user_last_name)
##            > rows only in x         0
##            > rows only in y  (     29)
##            > matched rows     150,662
##            >                 =========
##            > rows total       150,662
## mutate: new variable 'sovon_melder' (character) with 2,008 unique values and 0% NA
## select: dropped 2 variables (user_first_name, user_last_name)

6.2.13 Observer e-mail

The field sovon_melder_email is equal to user_email in users:

obs_and_acts <-
  obs_and_acts %>%
  left_join(
    crbirding_users %>%
      select(user_reference, user_email),
    by = c("WaarnemerNummer" = "user_reference")
  ) %>%
  mutate(sovon_melder_email = user_email)
## select: dropped 9 variables (user_id, user_first_name, user_last_name, user_address, user_postal_code, …)
## left_join: added one column (user_email)
##            > rows only in x         0
##            > rows only in y  (     29)
##            > matched rows     150,662
##            >                 =========
##            > rows total       150,662
## mutate: new variable 'sovon_melder_email' (character) with 1,200 unique values and 62% NA

6.2.14 Reporter - ringer number

The field sovon_melder_ringersnummer is left to SOVON:

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_melder_ringersnummer = NA)
## mutate: new variable 'sovon_melder_ringersnummer' (logical) with one unique value and 100% NA

6.2.15 Ring number

The field sovon_ring_number contains information about the metal ring number, in the same format as in field bird_ring_number of crbirding_birds. As explained in the mapping of field bird_ring_number of crbirding_birds, INBO database is a color ring database. This field is not accurately mapped and we will leave it empty.

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_ring_number = NA_character_)
## mutate: new variable 'sovon_ring_number' (character) with one unique value and 100% NA

6.2.16 Observation status

Adding the status (based on EURING) information arises by the need of mapping the observations with action code br and vang:

actions_meaning %>%
  filter(Code %in% c("br", "vang"))
## filter: removed 11 rows (85%), 2 rows remaining

The actions br and vang refer to status breeding in EURING system. Based on the scheme at page 16 of EURING Exchange Code 2000+ document and based on discussion in this issue, we define the following mapping for field status_full_grown_bird:

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_status_full_grown_bird = case_when(
    !is.na(br) | !is.na(vang) ~ "N",
    LeeftijdCode == "PU" ~ "-",
    TRUE ~ "U"
  ))
## mutate: new variable 'sovon_status_full_grown_bird' (character) with 3 unique values and 0% NA

6.2.17 Observation condition

The SOVON field observation_condition is NOT mapped following the EURING Exchange Code 2000+ document. SOVON experts provided us the following list of conditions (see #126):

  • 1: healthy
  • 2: sick/wounded
  • 3: dead (less than week)
  • 4: dead (more than week)
  • 5: dead (unknown if freshly dead)
  • 6: released after rehabilitation

So, we do an initial mapping of following actions as follows (in order of priority):

  1. dood, klgev, megev: observation_condition = 5
  2. ziek (or observations of bird shelter workers): observation_condition = 2
  3. rngkl, rngme, vang, vangl, veld, me, meweg, klweg, br: observation_condition = 1
obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_observation_condition = case_when(
    dood == "dood" ~ 5,
    klgev == "klgev" ~ 5,
    ziek == "ziek" | 
      sovon_user_reference %in% workers_bird_shelters$user_reference ~ 2,
    !is.na(rngkl) | !is.na(rngme) | !is.na(vang) | !is.na(vangl) |
    !is.na(veld) | !is.na(me) | !is.na(meweg) | !is.na(klweg) | !is.na(br) ~ 1
  ))
## mutate: new variable 'sovon_observation_condition' (double) with 3 unique values and 0% NA

Releasing after rehabilitation can be detected by the notes in field Opmerking for the observations of bird shelter workers. This is due to the fact that some of these observations are linked to ringing actions, other are linked to veld actions thus making the detection of release by actions troublesome.

Assign condition value 6 to the release after rehabilitation:

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_observation_condition = if_else(
    # observation of a bird shelter worker
    sovon_user_reference %in% workers_bird_shelters$user_reference & 
      # bird is not dead
      is.na(dood) & 
      # no empty note
      !is.na(Opmerking) & 
      # note mentions the release in Dutch
      str_detect(Opmerking, pattern = "gelost|vrijlating") & 
      # note does not mention the unrelease
      !str_detect(Opmerking, pattern = "niet gelost"),
    6, sovon_observation_condition
  ))
## mutate: changed 1,293 values (1%) of 'sovon_observation_condition' (0 new NA)

Overview of notes and actions of observations with sovon_observation_condition = 6 (released after rehabilitation):

obs_and_acts %>%
  filter(sovon_observation_condition == 6) %>%
  select(
    Opmerking,
    acts
  ) %>%
  select_if(function(x) any(!is.na(x))) %>%
  distinct()
## filter: removed 149,369 rows (99%), 1,293 rows remaining
## select: dropped 46 variables (sovon_bird_reference, Nummer, Datum, EuringCode, LeeftijdCode, …)
## select_if: dropped 6 variables (klgev, br, vang, dood, meweg, …)
## distinct: removed 1,243 rows (96%), 50 rows remaining

Preview of mapping of sovon_observation_condition:

obs_and_acts %>%
  select(
    sovon_observation_reference,
    sovon_bird_reference,
    sovon_observation_date,
    sovon_observation_condition,
    acts
  ) %>%
  head(n = 100)
## select: dropped 43 variables (Nummer, Datum, EuringCode, LeeftijdCode, KleurringNummer, …)

6.2.18 MRI: metal ring information

Metal ring information is an integer between 0 and 9. A table can be found at page 8 of EURING Exchange Code 2000+ document.

However, SOVON uses this field to map the observations referring to the applying of any kind of ring, either color or metal, as their database is purely focussed on color rings and due to a missing field in the EURING Exchange Code 2000+ specific for color rings.

INBO experts explained us that all color rings are applied on tarsus, MRI: 2, while metal ring position is on tarsus or above or unknown, MRI: 1. However, the value 2 is not allowed by SOVON (see comment in issue 47). Values allowed: 1, 4 and 5, where 4 is used for adding a color and/or metal ring, while 5 for changing

Mapping decision rules:

  1. Default: MRI 4
  2. Very first ringing of a bird (no matter if rngme only, rngkl only or rngme + rngkl): MRI 1
  3. Changing a metal and/or color ring: MRI 5

This last condition holds true even if the color ring is changed while adding a metal ring or viceversa.

We initialize sovon_MRI by assigning default value 4:

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_MRI = 4)
## mutate: new variable 'sovon_MRI' (double) with one unique value and 0% NA

Very first ringing gets sovon_MRI = 1:

obs_and_acts <-
  obs_and_acts %>%
  group_by(sovon_bird_reference) %>%
  mutate(sovon_MRI = if_else(
    Datum == min(Datum) & (rngkl == "rngkl" | rngme == "rngme"),
    1, sovon_MRI
  )) %>%
  arrange(sovon_bird_reference, Datum) %>%
  ungroup()
## group_by: one grouping variable (sovon_bird_reference)
## mutate (grouped): changed 11,199 values (7%) of 'sovon_MRI' (70 new NA)
## ungroup: no grouping variables

Preview:

obs_and_acts %>%
  filter(sovon_MRI == 1) %>%
  select(
    sovon_observation_reference,
    sovon_bird_reference,
    sovon_observation_date,
    sovon_MRI,
    acts
  ) %>%
  select_if(function(x) any(!is.na(x))) %>%
  head(n = 100)
## filter: removed 139,533 rows (93%), 11,129 rows remaining
## select: dropped 44 variables (Nummer, Datum, EuringCode, LeeftijdCode, KleurringNummer, …)
## select_if: dropped 9 variables (klgev, br, vang, dood, klweg, …)

Adding a color ring while metal ring is alrady present is mapped as 4, which is the default value. Same for adding a metal ring while color ring already present.

Based on crbirding_birds MRI is 5 if previous bird_shorthand is not empty and different from actual bird_shorthand and no klweg action is coupled to the observation. As said in previous chapter while mapping ring data, we don’t have track of changing metal rings, only loosing it (actions meweg). We then assume to add a new one at next catching action. But adding a metal ring while not changing color ring means MRI equal to 4, default value, if color ring is changed then it is 5: this demonstrates that MRI value 5 depends only on value of bird_shorthand as only color rings can be changed in our mapping. If the ringer, while catching, realizes the absence of color ring (klweg), then the color ring is added, not changed, i.e. MRI equal 4 as metal ring is still present.If metal ring is absent as well, then it would be impossible to identify the bird and it would count as a new bird with an observation coupled to rngkl + rngme actions.

MRI_5_bird_ref_and_dates <-
  crbirding_birds %>%
  group_by(bird_reference) %>%
  mutate(previous_bird_shorthand = lag(bird_shorthand)) %>%
  filter(!is.na(previous_bird_shorthand) &
    previous_bird_shorthand != bird_shorthand) %>%
  select(bird_reference, bird_date_begin) %>%
  ungroup() %>%
  mutate(set_MRI_5 = 5)
## group_by: one grouping variable (bird_reference)
## mutate (grouped): new variable 'previous_bird_shorthand' (character) with 530 unique values and 95% NA
## filter (grouped): removed 12,038 rows (97%), 338 rows remaining
## select: dropped 16 variables (bird_id, bird_euring, bird_bto, bird_shorthand, bird_scheme, …)
## ungroup: no grouping variables
## mutate: new variable 'set_MRI_5' (double) with one unique value and 0% NA
obs_and_acts <-
  obs_and_acts %>%
  left_join(MRI_5_bird_ref_and_dates,
    by = c(
      "sovon_bird_reference" = "bird_reference",
      "Datum" = "bird_date_begin"
    )
  ) %>%
  mutate(sovon_MRI = if_else(!is.na(set_MRI_5) & is.na(klweg) & is.na(veld),
    set_MRI_5,
    sovon_MRI
  ))
## left_join: added one column (set_MRI_5)
##            > rows only in x   150,320
##            > rows only in y  (      0)
##            > matched rows         342
##            >                 =========
##            > rows total       150,662
## mutate: changed 331 values (<1%) of 'sovon_MRI' (0 new NA)

Preview changes:

obs_and_acts %>%
  filter(sovon_MRI == 5) %>%
  select(
    sovon_observation_reference,
    sovon_bird_reference,
    sovon_observation_date,
    sovon_MRI,
    acts
  ) %>%
  select_if(function(x) any(!is.na(x)))
## filter: removed 150,331 rows (>99%), 331 rows remaining
## select: dropped 45 variables (Nummer, Datum, EuringCode, LeeftijdCode, KleurringNummer, …)
## select_if: dropped 7 variables (rngkl, klgev, br, dood, klweg, …)

The presence of action ziek in combination with sovon_MRI 5 is due to the three rings discussed in previous chapter: FHOV, YCAF, FAAG.

6.2.19 Bird age

The field sovon_bird_age_obs should be filled with the age of the bird as mentioned by the observer. This field has been created in previous chapter:

obs_and_acts %>%
  distinct(sovon_bird_age_obs)
## distinct: removed 150,654 rows (>99%), 8 rows remaining

6.2.20 Bird sex

The field sovon_bird_sex should be filled with the sex of the bird as mentioned by the observer. This field is not present in obs_and_acts, so we assign value U (unknown):

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_bird_sex = "U")
## mutate: new variable 'sovon_bird_sex' (character) with one unique value and 0% NA

6.2.21 Capture

The field sovon_observation_is_capture can be filled by evaluating the link of observations to actions vang, vangl, rngkl or rngme:

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_observation_is_capture = if_else(
    !is.na(vang) | !is.na(vangl) | !is.na(rngkl) | !is.na(rngme), "Y", "N"
  ))
## mutate: new variable 'sovon_observation_is_capture' (character) with 2 unique values and 0% NA

6.2.22 Bird ID

The field sovon_bird_id is left to SOVON:

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_bird_id = NA)
## mutate: new variable 'sovon_bird_id' (logical) with one unique value and 100% NA

6.2.23 Bird ring position and inscription direction

Similarly to the mapping in ring data, we map the ring position and inscription reading direction in observations:

obs_and_acts <-
  obs_and_acts %>%
  left_join(ring_position_table %>%
    select(Code, sovon_bird_ring_position),
  by = c("KleurringPlaats" = "Code")
  ) %>%
  left_join(ring_position_table %>%
    select(Code, sovon_bird_ring_direction),
  by = c("KleurringPlaats" = "Code")
  )
## select: dropped 4 variables (sovon_bird_ring_direction, Beschrijving, BeschrijvingUK, Aktief)
## left_join: added one column (sovon_bird_ring_position)
##            > rows only in x   133,695
##            > rows only in y  (      2)
##            > matched rows      16,967
##            >                 =========
##            > rows total       150,662
## select: dropped 4 variables (sovon_bird_ring_position, Beschrijving, BeschrijvingUK, Aktief)
## left_join: added one column (sovon_bird_ring_direction)
##            > rows only in x   133,695
##            > rows only in y  (      2)
##            > matched rows      16,967
##            >                 =========
##            > rows total       150,662

Effects of the mapping:

obs_and_acts %>%
  distinct(KleurringPlaats, sovon_bird_ring_position, sovon_bird_ring_direction)
## distinct: removed 150,655 rows (>99%), 7 rows remaining

6.2.24 Observation notes

We copy the notes in Opmerking to SOVON field sovon_observation_notes:

obs_and_acts <-
  obs_and_acts %>%
  mutate(sovon_observation_notes = Opmerking)
## mutate: new variable 'sovon_observation_notes' (character) with 11,032 unique values and 70% NA

We add a dot at the end of the notes if not present already:

obs_and_acts <-
  obs_and_acts %>%
  mutate(
    sovon_observation_notes =
      if_else(
        !is.na(sovon_observation_notes) & str_length(sovon_observation_notes) > 0,
        if_else(str_sub(sovon_observation_notes, -1) != ".",
          str_c(sovon_observation_notes, ".", sep = ""),
          sovon_observation_notes
        ),
        sovon_observation_notes
      )
  )
## mutate: changed 44,426 values (29%) of 'sovon_observation_notes' (0 new NA)

We also add the prefix "INBO original notes: ":

obs_and_acts <-
  obs_and_acts %>%
  mutate(
    sovon_observation_notes =
      if_else(
        !is.na(sovon_observation_notes) &
          str_length(sovon_observation_notes) > 0,
        str_c("INBO original notes: ", sovon_observation_notes, sep = " "),
        sovon_observation_notes
      )
  )
## mutate: changed 44,590 values (30%) of 'sovon_observation_notes' (0 new NA)

We add the note no_color_ring. to observations linked to action klweg and/or me:

obs_and_acts <-
  obs_and_acts %>%
  mutate(
    sovon_observation_notes = if_else(
      is.na(klweg) & is.na(me),
      sovon_observation_notes,
      if_else(is.na(sovon_observation_notes),
        "no_color_ring.",
        str_c("no_color_ring.", sovon_observation_notes, sep = " ")
      )
    )
  )
## mutate: changed 392 values (<1%) of 'sovon_observation_notes' (302 fewer NA)

Preview:

obs_and_acts %>%
  filter(!is.na(klweg) | !is.na(me)) %>%
  select(, klweg, me, sovon_observation_notes) %>%
  head()
## filter: removed 150,270 rows (>99%), 392 rows remaining
## select: dropped 64 variables (sovon_bird_reference, Nummer, Datum, EuringCode, LeeftijdCode, …)

We add the note no_metal_ring. to observations linked to action meweg:

obs_and_acts <-
  obs_and_acts %>%
  mutate(
    sovon_observation_notes = if_else(
      !is.na(meweg),
      if_else(is.na(sovon_observation_notes),
        "no_metal_ring.",
        paste("no_metal_ring.", sovon_observation_notes, sep = " ")
      ),
      sovon_observation_notes
    )
  )
## mutate: changed 125 values (<1%) of 'sovon_observation_notes' (87 fewer NA)

Preview:

obs_and_acts %>%
  filter(!is.na(meweg)) %>%
  select(sovon_observation_notes) %>%
  distinct() %>%
  head()
## filter: removed 150,537 rows (>99%), 125 rows remaining
## select: dropped 66 variables (sovon_bird_reference, Nummer, Datum, EuringCode, LeeftijdCode, …)
## distinct: removed 100 rows (80%), 25 rows remaining

6.3 Save final observation data

Select the required columns, starting with prefix sovon_:

crbirding_observations <-
  obs_and_acts %>%
  select(starts_with("sovon"))
## select: dropped 42 variables (Nummer, Datum, EuringCode, LeeftijdCode, KleurringNummer, …)

Remove prefix sovon_:

names(crbirding_observations) <- str_remove_all(names(crbirding_observations), pattern = "sovon_")

Remove time and timezone information from bird_date_begin and bird_date_end:

crbirding_observations <-
  crbirding_observations %>%
  mutate(observation_date = as.Date(observation_date))
## mutate: converted 'observation_date' from double to Date (0 new NA)

The desired order of columns in crbirding_observations:

cr_obs_cols <- c(
  "user_id", "user_reference", "bird_id", "bird_reference", "observation_id",
  "observation_reference", "observation_date", "observation_time",
  "observation_lat", "observation_lng", "observation_location",
  "observation_is_capture", "observation_notes", "check_bird", "MRI", "melder",
  "melder_email", "melder_ringersnummer", "ring_number", "observation_condition", "status_full_grown_bird",
  "bird_age_obs", "bird_sex", "bird_ring_position", "bird_ring_direction"
)

Are all required columns present?

all(cr_obs_cols %in% names(crbirding_observations)) &
  length(cr_obs_cols) == ncol(crbirding_observations)
## [1] TRUE

Set column order:

crbirding_observations <-
  crbirding_observations %>%
  select(all_of(cr_obs_cols))
## select: columns reordered (user_id, user_reference, bird_id, bird_reference, observation_id, …)

Preview data (without personal observer data for privcay reasons):

crbirding_observations %>%
  select(-c(melder, melder_email)) %>%
  head(n = 10)
## select: dropped 2 variables (melder, melder_email)

Save to text file (comma separated value):

write_csv(
  crbirding_observations,
  path = here::here("data", "processed", "crbirding_observations.csv"),
  na = ""
)