galah::search_taxa()
R/make_unmatched_overrides.R
make_unmatched_overrides.Rd
For an unmatched 'name', try to find a match via: rgbif::name_usage()
;
rgbif::name_backbone()
; and removal of any characters in 'name' after an
'x' or 'X' (i.e. treat hybrids as just the first taxa). Using
rgbif::name_backbone()
allows fuzzy matching to fix spelling errors. Any
results are passed back to galah::search_taxa()
to retrieve an
override to use for that 'name' (so long as the rgbif result is not identical
to 'name'). Any 'name' still completely unmatched is just given the override
'name' so it will not be lost from downstream processes but will not have any
associated taxonomic information.
make_unmatched_overrides(
df,
taxa_col = "original_name",
taxonomy,
target_rank = "species",
hybrids = FALSE,
include_unmatched = TRUE,
results_file = tempfile(fileext = ".parquet"),
remove_taxa = c("bold:", "unverified", "undetermined", "unidentified", "annual herb",
"annual grass", "incertae sedis", "\\?", "another\\s", "not naturalised in sa",
"annual tussock grass", "*no id", "spec\\.", "\\s\\-\\-\\s.*",
"\\ssp\\.", "\\sspec\\.", "\\ssp$", "\\ssp\\d", "\\ssp\\s",
"\\sspp\\.", "\\sspp\\s", "\\sspp$", "dead", "unknown", "\\sgroup$",
"\\sspecies$", "aquatic grass", "hybrid", "\\scultivar$", "\\scomplex$",
"\\ssect\\.", "\\ss\\.\\sstr\\.", "\\(includes\\s"),
tri_strings = c("\\sssp\\s", "\\sssp\\.", "\\svar\\s", "\\svar\\.",
"\\ssubsp\\.", "\\ssubspecies", "\\sform\\)", "\\sform\\s",
"\\sf\\.", "\\srace\\s", "\\srace\\)", "\\sp\\.v\\.")
)
Dataframe of biological records
Character. Name of column in df
containing the taxonomic
entities for which a match is desired.
Result of call to make_taxonomy()
Character. Level within envClean::lurank$rank
to target
Logical. Create overrides for hybrids (e.g. original names with 'x')?
Logical. Create overrides for taxa not matched via gbif using their original names?
File path to write results of searches. Previous results files are used to avoid redoing time consuming searches for taxa that are not matched via gbif and not written to the taxonomy file in make_taxonomy.
Character. Taxa with regular expressions in tolower(taxa_col)
that match remove_taxa
will not be searched or have overrides constructed.
Character. Taxa names with these strings that indicate a trinomial will not be included as a binomial override (i.e. avoids the use_species column in the overrides being populated with trinomial names).
Tibble in appropriate form to pass to the overrides argument of
make_taxonomy()
# setup
# library("envClean")
temp_file <- tempfile()
taxa_df <- tibble::tibble(taxa = c("Charadrius rubricollis"
, "Thinornis cucullatus"
, "Melithreptus gularis laetior"
, "Melithreptus gularis gularis"
, "Eucalyptus viminalis"
, "Eucalyptus viminalis cygnetensis"
, "Eucalyptus"
, "Charadrius mongolus all subspecies"
, "Bettongia lesueur Barrow and Boodie Islands subspecies"
, "Lagorchestes hirsutus Central Australian subspecies"
, "Perameles gunnii Victorian subspecies"
, "Pterostylis sp. Rock ledges (pl. 185, Bates & Weber 1990)"
, "Spyridium glabrisepalum"
, "Spyridium eriocephalum var. glabrisepalum"
, "Petrogale lateralis (MacDonnell Ranges race)"
, "Gehyra montium (revised)"
, "Korthalsella japonica f. japonica"
, "Galaxias sp. nov. 'Hunter'"
, "Some rubbish"
, "Senna artemisioides subsp x artemisioides"
, "Halosarcia sp. (NC)"
, "TERMITOIDAE sp." # 'epifamily'
)
)
# make taxonomy (returns list and writes taxonomy_file)
taxonomy <- make_taxonomy(df = taxa_df
, taxa_col = "taxa"
, taxonomy_file = temp_file
, needed_ranks = c("kingdom", "genus", "species", "subspecies")
)
#> Joining with `by = join_by(original_name)`
#> Matched 20 of 22 taxonomic search terms in selected atlas (Australia).
#> 2 unmatched search terms:
#> • "Galaxias sp. nov. Hunter", "Some rubbish"
#>
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> saving results to C:/temp/nige\RtmpEBQxW6\file34d81c9d7718.parquet
#> The following were completely unmatched: Galaxias sp. nov. Hunter and Some rubbish. Consider providing more taxonomic levels, or an override, for each unmatched taxa?
#> Joining with `by = join_by(original_name)`
#> Warning: There was 1 warning in `dplyr::filter()`.
#> ℹ In argument: `returned_rank == min(returned_rank)`.
#> Caused by warning in `min()`:
#> ! no non-missing arguments to min; returning Inf
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
taxonomy$raw
#> # A tibble: 22 × 20
#> original_name search_term scientific_name scientific_name_auth…¹
#> <chr> <chr> <chr> <chr>
#> 1 Bettongia lesueur Barrow … Bettongia … Bettongia lesu… NA
#> 2 Charadrius mongolus all s… Charadrius… Charadrius (Ch… Pallas, 1776
#> 3 Charadrius rubricollis Charadrius… Phalaropus lob… (Linnaeus, 1758)
#> 4 Eucalyptus Eucalyptus Eucalyptus L'Hér.
#> 5 Eucalyptus viminalis Eucalyptus… Eucalyptus vim… Labill.
#> 6 Eucalyptus viminalis cygn… Eucalyptus… Eucalyptus vim… Boomsma
#> 7 Galaxias sp. nov. Hunter Galaxias s… NA NA
#> 8 Gehyra montium (revised) Gehyra mon… Gehyra montium Storr, 1982
#> 9 Halosarcia sp. (NC) Halosarcia… Tecticornia Hook.f.
#> 10 Korthalsella japonica f. … Korthalsel… Korthalsella j… (Thunb.) Engl.
#> # ℹ 12 more rows
#> # ℹ abbreviated name: ¹scientific_name_authorship
#> # ℹ 16 more variables: taxon_concept_id <chr>, rank <ord>, match_type <chr>,
#> # kingdom <chr>, phylum <chr>, class <chr>, order <chr>, family <chr>,
#> # genus <chr>, species <chr>, vernacular_name <chr>, stamp <dttm>,
#> # subspecies <chr>, rank_adj <ord>, original_is_tri <lgl>,
#> # original_is_bi <lgl>
taxonomy$kingdom
#> $lutaxa
#> # A tibble: 0 × 7
#> # ℹ 7 variables: original_name <chr>, match_type <chr>, matched_rank <ord>,
#> # returned_rank <ord>, taxa <chr>, original_is_tri <lgl>,
#> # original_is_bi <lgl>
#>
#> $taxonomy
#> # A tibble: 0 × 2
#> # ℹ 2 variables: taxa <chr>, kingdom <chr>
#>
taxonomy$genus
#> $lutaxa
#> # A tibble: 20 × 7
#> original_name match_type matched_rank returned_rank taxa original_is_tri
#> <chr> <chr> <ord> <ord> <chr> <lgl>
#> 1 Bettongia lesueu… exactMatch subspecies genus Bett… TRUE
#> 2 Charadrius mongo… higherMat… species genus Char… FALSE
#> 3 Charadrius rubri… exactMatch species genus Phal… FALSE
#> 4 Eucalyptus exactMatch genus genus Euca… FALSE
#> 5 Eucalyptus vimin… exactMatch species genus Euca… FALSE
#> 6 Eucalyptus vimin… exactMatch subspecies genus Euca… TRUE
#> 7 Gehyra montium (… canonical… species genus Gehy… FALSE
#> 8 Halosarcia sp. … exactMatch genus genus Tect… FALSE
#> 9 Korthalsella jap… higherMat… species genus Kort… TRUE
#> 10 Lagorchestes hir… canonical… species genus Lago… TRUE
#> 11 Melithreptus gul… exactMatch subspecies genus Meli… TRUE
#> 12 Melithreptus gul… exactMatch subspecies genus Meli… TRUE
#> 13 Perameles gunnii… exactMatch subspecies genus Pera… TRUE
#> 14 Petrogale latera… canonical… species genus Petr… TRUE
#> 15 Pterostylis sp. … exactMatch genus genus Pter… FALSE
#> 16 Senna artemisioi… higherMat… species genus Senna FALSE
#> 17 Spyridium erioce… exactMatch variety genus Spyr… TRUE
#> 18 Spyridium glabri… exactMatch variety genus Spyr… TRUE
#> 19 TERMITOIDAE sp. exactMatch NA order Blat… FALSE
#> 20 Thinornis cucull… exactMatch species genus Thin… FALSE
#> # ℹ 1 more variable: original_is_bi <lgl>
#>
#> $taxonomy
#> # A tibble: 16 × 7
#> taxa kingdom phylum class order family genus
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 Bettongia Animalia Chordata Mammalia Diprotodontia Potoroi… Bett…
#> 2 Charadrius Animalia Chordata Aves Charadriiformes Charadr… Char…
#> 3 Phalaropus Animalia Chordata Aves Charadriiformes Scolopa… Phal…
#> 4 Eucalyptus Plantae Charophyta Equisetopsida Myrtales Myrtace… Euca…
#> 5 Gehyra Animalia Chordata Reptilia Squamata Gekkoni… Gehy…
#> 6 Tecticornia Plantae Charophyta Equisetopsida Caryophyllales Chenopo… Tect…
#> 7 Korthalsella Plantae Charophyta Equisetopsida Santalales Santala… Kort…
#> 8 Lagorchestes Animalia Chordata Mammalia Diprotodontia Macropo… Lago…
#> 9 Melithreptus Animalia Chordata Aves Passeriformes Melipha… Meli…
#> 10 Perameles Animalia Chordata Mammalia Peramelemorphia Peramel… Pera…
#> 11 Petrogale Animalia Chordata Mammalia Diprotodontia Macropo… Petr…
#> 12 Pterostylis Plantae Charophyta Equisetopsida Asparagales Orchida… Pter…
#> 13 Senna Plantae Charophyta Equisetopsida Fabales Fabaceae Senna
#> 14 Spyridium Plantae Charophyta Equisetopsida Rosales Rhamnac… Spyr…
#> 15 Blattodea Animalia Arthropoda Insecta Blattodea NA NA
#> 16 Thinornis Animalia Chordata Aves Charadriiformes Charadr… Thin…
#>
taxonomy$species
#> $lutaxa
#> # A tibble: 20 × 7
#> original_name match_type matched_rank returned_rank taxa original_is_tri
#> <chr> <chr> <ord> <ord> <chr> <lgl>
#> 1 Bettongia lesueu… exactMatch subspecies species Bett… TRUE
#> 2 Charadrius mongo… higherMat… species species Char… FALSE
#> 3 Charadrius rubri… exactMatch species species Phal… FALSE
#> 4 Eucalyptus exactMatch genus genus Euca… FALSE
#> 5 Eucalyptus vimin… exactMatch species species Euca… FALSE
#> 6 Eucalyptus vimin… exactMatch subspecies species Euca… TRUE
#> 7 Gehyra montium (… canonical… species species Gehy… FALSE
#> 8 Halosarcia sp. … exactMatch genus genus Tect… FALSE
#> 9 Korthalsella jap… higherMat… species species Kort… TRUE
#> 10 Lagorchestes hir… canonical… species species Lago… TRUE
#> 11 Melithreptus gul… exactMatch subspecies species Meli… TRUE
#> 12 Melithreptus gul… exactMatch subspecies species Meli… TRUE
#> 13 Perameles gunnii… exactMatch subspecies species Pera… TRUE
#> 14 Petrogale latera… canonical… species species Petr… TRUE
#> 15 Pterostylis sp. … exactMatch genus genus Pter… FALSE
#> 16 Senna artemisioi… higherMat… species species Senn… FALSE
#> 17 Spyridium erioce… exactMatch variety species Spyr… TRUE
#> 18 Spyridium glabri… exactMatch variety species Spyr… TRUE
#> 19 TERMITOIDAE sp. exactMatch NA order Blat… FALSE
#> 20 Thinornis cucull… exactMatch species species Thin… FALSE
#> # ℹ 1 more variable: original_is_bi <lgl>
#>
#> $taxonomy
#> # A tibble: 17 × 8
#> taxa kingdom phylum class order family genus species
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 Bettongia lesueur Animalia Chordata Mammal… Dipr… Potor… Bett… Betton…
#> 2 Charadrius mongolus Animalia Chordata Aves Char… Chara… Char… Charad…
#> 3 Phalaropus lobatus Animalia Chordata Aves Char… Scolo… Phal… Phalar…
#> 4 Eucalyptus Plantae Charophyta Equise… Myrt… Myrta… Euca… NA
#> 5 Eucalyptus viminalis Plantae Charophyta Equise… Myrt… Myrta… Euca… Eucaly…
#> 6 Gehyra montium Animalia Chordata Reptil… Squa… Gekko… Gehy… Gehyra…
#> 7 Tecticornia Plantae Charophyta Equise… Cary… Cheno… Tect… NA
#> 8 Korthalsella japonica Plantae Charophyta Equise… Sant… Santa… Kort… Kortha…
#> 9 Lagorchestes hirsutus Animalia Chordata Mammal… Dipr… Macro… Lago… Lagorc…
#> 10 Melithreptus gularis Animalia Chordata Aves Pass… Melip… Meli… Melith…
#> 11 Perameles gunnii Animalia Chordata Mammal… Pera… Peram… Pera… Perame…
#> 12 Petrogale lateralis Animalia Chordata Mammal… Dipr… Macro… Petr… Petrog…
#> 13 Pterostylis Plantae Charophyta Equise… Aspa… Orchi… Pter… NA
#> 14 Senna artemisioides Plantae Charophyta Equise… Faba… Fabac… Senna Senna …
#> 15 Spyridium eriocephalum Plantae Charophyta Equise… Rosa… Rhamn… Spyr… Spyrid…
#> 16 Blattodea Animalia Arthropoda Insecta Blat… NA NA NA
#> 17 Thinornis cucullatus Animalia Chordata Aves Char… Chara… Thin… Thinor…
#>
taxonomy$subspecies
#> $lutaxa
#> # A tibble: 20 × 7
#> original_name match_type matched_rank returned_rank taxa original_is_tri
#> <chr> <chr> <ord> <ord> <chr> <lgl>
#> 1 Bettongia lesueu… exactMatch subspecies subspecies Bett… TRUE
#> 2 Charadrius mongo… higherMat… species species Char… FALSE
#> 3 Charadrius rubri… exactMatch species species Phal… FALSE
#> 4 Eucalyptus exactMatch genus genus Euca… FALSE
#> 5 Eucalyptus vimin… exactMatch species species Euca… FALSE
#> 6 Eucalyptus vimin… exactMatch subspecies subspecies Euca… TRUE
#> 7 Gehyra montium (… canonical… species species Gehy… FALSE
#> 8 Halosarcia sp. … exactMatch genus genus Tect… FALSE
#> 9 Korthalsella jap… higherMat… species species Kort… TRUE
#> 10 Lagorchestes hir… canonical… species species Lago… TRUE
#> 11 Melithreptus gul… exactMatch subspecies subspecies Meli… TRUE
#> 12 Melithreptus gul… exactMatch subspecies subspecies Meli… TRUE
#> 13 Perameles gunnii… exactMatch subspecies subspecies Pera… TRUE
#> 14 Petrogale latera… canonical… species species Petr… TRUE
#> 15 Pterostylis sp. … exactMatch genus genus Pter… FALSE
#> 16 Senna artemisioi… higherMat… species species Senn… FALSE
#> 17 Spyridium erioce… exactMatch variety subspecies Spyr… TRUE
#> 18 Spyridium glabri… exactMatch variety subspecies Spyr… TRUE
#> 19 TERMITOIDAE sp. exactMatch NA order Blat… FALSE
#> 20 Thinornis cucull… exactMatch species species Thin… FALSE
#> # ℹ 1 more variable: original_is_bi <lgl>
#>
#> $taxonomy
#> # A tibble: 19 × 9
#> taxa kingdom phylum class order family genus species subspecies
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 Bettongia lesueur… Animal… Chord… Mamm… Dipr… Potor… Bett… Betton… Bettongia…
#> 2 Charadrius mongol… Animal… Chord… Aves Char… Chara… Char… Charad… NA
#> 3 Phalaropus lobatus Animal… Chord… Aves Char… Scolo… Phal… Phalar… NA
#> 4 Eucalyptus Plantae Charo… Equi… Myrt… Myrta… Euca… NA NA
#> 5 Eucalyptus vimina… Plantae Charo… Equi… Myrt… Myrta… Euca… Eucaly… NA
#> 6 Eucalyptus vimina… Plantae Charo… Equi… Myrt… Myrta… Euca… Eucaly… Eucalyptu…
#> 7 Gehyra montium Animal… Chord… Rept… Squa… Gekko… Gehy… Gehyra… NA
#> 8 Tecticornia Plantae Charo… Equi… Cary… Cheno… Tect… NA NA
#> 9 Korthalsella japo… Plantae Charo… Equi… Sant… Santa… Kort… Kortha… NA
#> 10 Lagorchestes hirs… Animal… Chord… Mamm… Dipr… Macro… Lago… Lagorc… NA
#> 11 Melithreptus gula… Animal… Chord… Aves Pass… Melip… Meli… Melith… Melithrep…
#> 12 Melithreptus gula… Animal… Chord… Aves Pass… Melip… Meli… Melith… Melithrep…
#> 13 Perameles gunnii … Animal… Chord… Mamm… Pera… Peram… Pera… Perame… Perameles…
#> 14 Petrogale lateral… Animal… Chord… Mamm… Dipr… Macro… Petr… Petrog… NA
#> 15 Pterostylis Plantae Charo… Equi… Aspa… Orchi… Pter… NA NA
#> 16 Senna artemisioid… Plantae Charo… Equi… Faba… Fabac… Senna Senna … NA
#> 17 Spyridium eriocep… Plantae Charo… Equi… Rosa… Rhamn… Spyr… Spyrid… Spyridium…
#> 18 Blattodea Animal… Arthr… Inse… Blat… NA NA NA NA
#> 19 Thinornis cuculla… Animal… Chord… Aves Char… Chara… Thin… Thinor… NA
#>
# query more taxa (results are added to taxonomy_file but only the new taxa are returned (default `limit = TRUE`)
more_taxa <- tibble::tibble(original_name = c("Amytornis whitei"
, "Amytornis striatus"
, "Amytornis modestus (North, 1902)"
, "Amytornis modestus modestus"
, "Amytornis modestus cowarie"
)
)
make_taxonomy(df = more_taxa
, taxonomy_file = temp_file
, needed_ranks = c("species")
)
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> saving results to C:/temp/nige\RtmpEBQxW6\file34d81c9d7718.parquet
#> The following were completely unmatched: Galaxias sp. nov. Hunter and Some rubbish. Consider providing more taxonomic levels, or an override, for each unmatched taxa?
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> $raw
#> # A tibble: 5 × 20
#> original_name search_term scientific_name scientific_name_auth…¹
#> <chr> <chr> <chr> <chr>
#> 1 Amytornis modestus (North,… Amytornis … Amytornis (Amy… (North, 1902)
#> 2 Amytornis modestus cowarie Amytornis … Amytornis (Amy… Black, 2016
#> 3 Amytornis modestus modestus Amytornis … Amytornis (Amy… (North, 1902)
#> 4 Amytornis striatus Amytornis … Amytornis (Mag… (Gould, 1840)
#> 5 Amytornis whitei Amytornis … Amytornis (Mag… Mathews, 1910
#> # ℹ abbreviated name: ¹scientific_name_authorship
#> # ℹ 16 more variables: taxon_concept_id <chr>, rank <ord>, match_type <chr>,
#> # kingdom <chr>, phylum <chr>, class <chr>, order <chr>, family <chr>,
#> # genus <chr>, species <chr>, vernacular_name <chr>, stamp <dttm>,
#> # subspecies <chr>, rank_adj <ord>, original_is_tri <lgl>,
#> # original_is_bi <lgl>
#>
#> $species
#> $species$lutaxa
#> # A tibble: 5 × 7
#> original_name match_type matched_rank returned_rank taxa original_is_tri
#> <chr> <chr> <ord> <ord> <chr> <lgl>
#> 1 Amytornis modestu… canonical… species species Amyt… FALSE
#> 2 Amytornis modestu… exactMatch subspecies species Amyt… TRUE
#> 3 Amytornis modestu… exactMatch subspecies species Amyt… TRUE
#> 4 Amytornis striatus exactMatch species species Amyt… FALSE
#> 5 Amytornis whitei exactMatch species species Amyt… FALSE
#> # ℹ 1 more variable: original_is_bi <lgl>
#>
#> $species$taxonomy
#> # A tibble: 3 × 8
#> taxa kingdom phylum class order family genus species
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 Amytornis modestus Animalia Chordata Aves Passeriformes Maluri… Amyt… Amytor…
#> 2 Amytornis striatus Animalia Chordata Aves Passeriformes Maluri… Amyt… Amytor…
#> 3 Amytornis whitei Animalia Chordata Aves Passeriformes Maluri… Amyt… Amytor…
#>
#>
# no dataframe supplied - all results in taxonomy_file returned
make_taxonomy(taxonomy_file = temp_file
, needed_ranks = c("subspecies")
)
#> Joining with `by = join_by(original_name)`
#> $raw
#> original_name
#> 1 Amytornis modestus (North, 1902)
#> 2 Amytornis modestus cowarie
#> 3 Amytornis modestus modestus
#> 4 Amytornis striatus
#> 5 Amytornis whitei
#> 6 Bettongia lesueur Barrow and Boodie Islands subspecies
#> 7 Charadrius mongolus all subspecies
#> 8 Charadrius rubricollis
#> 9 Eucalyptus
#> 10 Eucalyptus viminalis
#> 11 Eucalyptus viminalis cygnetensis
#> 12 Galaxias sp. nov. Hunter
#> 13 Gehyra montium (revised)
#> 14 Halosarcia sp. (NC)
#> 15 Korthalsella japonica f. japonica
#> 16 Lagorchestes hirsutus Central Australian subspecies
#> 17 Melithreptus gularis gularis
#> 18 Melithreptus gularis laetior
#> 19 Perameles gunnii Victorian subspecies
#> 20 Petrogale lateralis (MacDonnell Ranges race)
#> 21 Pterostylis sp. Rock ledges (pl. 185, Bates & Weber 1990)
#> 22 Senna artemisioides subsp x artemisioides
#> 23 Some rubbish
#> 24 Spyridium eriocephalum var. glabrisepalum
#> 25 Spyridium glabrisepalum
#> 26 TERMITOIDAE sp.
#> 27 Thinornis cucullatus
#> search_term
#> 1 Amytornis modestus (North, 1902)
#> 2 Amytornis modestus cowarie
#> 3 Amytornis modestus modestus
#> 4 Amytornis striatus
#> 5 Amytornis whitei
#> 6 Bettongia lesueur Barrow and Boodie Islands subspecies
#> 7 Charadrius mongolus all subspecies
#> 8 Charadrius rubricollis
#> 9 Eucalyptus
#> 10 Eucalyptus viminalis
#> 11 Eucalyptus viminalis cygnetensis
#> 12 Galaxias sp. nov. Hunter
#> 13 Gehyra montium (revised)
#> 14 Halosarcia sp.
#> 15 Korthalsella japonica f. japonica
#> 16 Lagorchestes hirsutus Central Australian subspecies
#> 17 Melithreptus gularis gularis
#> 18 Melithreptus gularis laetior
#> 19 Perameles gunnii Victorian subspecies
#> 20 Petrogale lateralis (MacDonnell Ranges race)
#> 21 Pterostylis sp. Rock ledges (pl. 185, Bates & Weber 1990)
#> 22 Senna artemisioides subsp
#> 23 Some rubbish
#> 24 Spyridium eriocephalum var. glabrisepalum
#> 25 Spyridium glabrisepalum
#> 26 TERMITOIDAE
#> 27 Thinornis cucullatus
#> scientific_name
#> 1 Amytornis (Amytornis) modestus
#> 2 Amytornis (Amytornis) modestus cowarie
#> 3 Amytornis (Amytornis) modestus modestus
#> 4 Amytornis (Magnamytis) striatus
#> 5 Amytornis (Magnamytis) whitei
#> 6 Bettongia lesueur Barrow and Boodie Islands subspecies
#> 7 Charadrius (Charadrius) mongolus
#> 8 Phalaropus lobatus
#> 9 Eucalyptus
#> 10 Eucalyptus viminalis
#> 11 Eucalyptus viminalis subsp. cygnetensis
#> 12 <NA>
#> 13 Gehyra montium
#> 14 Tecticornia
#> 15 Korthalsella japonica
#> 16 Lagorchestes hirsutus
#> 17 Melithreptus (Eidopsarus) gularis gularis
#> 18 Melithreptus (Eidopsarus) gularis laetior
#> 19 Perameles gunnii Victorian subspecies
#> 20 Petrogale lateralis
#> 21 Pterostylis
#> 22 Senna artemisioides
#> 23 <NA>
#> 24 Spyridium eriocephalum var. glabrisepalum
#> 25 Spyridium eriocephalum var. glabrisepalum
#> 26 Termitoidae
#> 27 Thinornis cucullatus
#> scientific_name_authorship
#> 1 (North, 1902)
#> 2 Black, 2016
#> 3 (North, 1902)
#> 4 (Gould, 1840)
#> 5 Mathews, 1910
#> 6 <NA>
#> 7 Pallas, 1776
#> 8 (Linnaeus, 1758)
#> 9 L'Hér.
#> 10 Labill.
#> 11 Boomsma
#> 12 <NA>
#> 13 Storr, 1982
#> 14 Hook.f.
#> 15 (Thunb.) Engl.
#> 16 Gould, 1844
#> 17 (Gould, 1837)
#> 18 Gould, 1875
#> 19 <NA>
#> 20 Gould, 1842
#> 21 R.Br.
#> 22 (Gaudich. ex DC.) Randell
#> 23 <NA>
#> 24 J.M.Black
#> 25 J.M.Black
#> 26 <NA>
#> 27 (Vieillot, 1818)
#> taxon_concept_id
#> 1 https://biodiversity.org.au/afd/taxa/cfb67754-ed3f-4612-92bf-4753d23d49f0
#> 2 https://biodiversity.org.au/afd/taxa/a2736af9-dba4-4469-8439-9c367b1abb9f
#> 3 https://biodiversity.org.au/afd/taxa/6c201f56-6aba-47a7-95ce-4d0ba55cd55e
#> 4 https://biodiversity.org.au/afd/taxa/5b31a114-e88b-4b78-a2fe-c2fdc306dbdc
#> 5 https://biodiversity.org.au/afd/taxa/85b1ccbd-eefd-41b6-870b-cdbe452639cb
#> 6 ALA_DR22912_1262
#> 7 https://biodiversity.org.au/afd/taxa/fe772d84-8028-44b8-8530-c30a9112157e
#> 8 https://biodiversity.org.au/afd/taxa/9f562f99-aac8-4b16-b3c9-04a7eea35269
#> 9 https://id.biodiversity.org.au/taxon/apni/51738743
#> 10 https://id.biodiversity.org.au/taxon/apni/51290421
#> 11 https://id.biodiversity.org.au/node/apni/2914880
#> 12 <NA>
#> 13 https://biodiversity.org.au/afd/taxa/b84bd290-03f0-46d6-9cf7-e83600c60699
#> 14 https://id.biodiversity.org.au/taxon/apni/51433264
#> 15 https://id.biodiversity.org.au/node/apni/2916672
#> 16 https://biodiversity.org.au/afd/taxa/6e3213e1-c33b-45ae-9e05-6019211dd454
#> 17 https://biodiversity.org.au/afd/taxa/5ae49193-9e02-4a8d-a573-112cbf35c350
#> 18 https://biodiversity.org.au/afd/taxa/8742561c-c7e4-445c-97b6-ac63db433f6c
#> 19 ALA_DR22912_428
#> 20 https://biodiversity.org.au/afd/taxa/a610443a-5830-4d3c-a34a-092bf3adcb96
#> 21 https://id.biodiversity.org.au/taxon/apni/51726566
#> 22 https://id.biodiversity.org.au/taxon/apni/51285233
#> 23 <NA>
#> 24 https://id.biodiversity.org.au/node/apni/2887920
#> 25 https://id.biodiversity.org.au/node/apni/2887920
#> 26 https://biodiversity.org.au/afd/taxa/bf0614d9-26df-43f0-bfe8-b6d16d5d640a
#> 27 https://biodiversity.org.au/afd/taxa/1ebf8ec6-9aee-4b58-8789-0be73eb97b2f
#> rank match_type kingdom phylum class order
#> 1 species canonicalMatch Animalia Chordata Aves Passeriformes
#> 2 subspecies exactMatch Animalia Chordata Aves Passeriformes
#> 3 subspecies exactMatch Animalia Chordata Aves Passeriformes
#> 4 species exactMatch Animalia Chordata Aves Passeriformes
#> 5 species exactMatch Animalia Chordata Aves Passeriformes
#> 6 subspecies exactMatch Animalia Chordata Mammalia Diprotodontia
#> 7 species higherMatch Animalia Chordata Aves Charadriiformes
#> 8 species exactMatch Animalia Chordata Aves Charadriiformes
#> 9 genus exactMatch Plantae Charophyta Equisetopsida Myrtales
#> 10 species exactMatch Plantae Charophyta Equisetopsida Myrtales
#> 11 subspecies exactMatch Plantae Charophyta Equisetopsida Myrtales
#> 12 <NA> <NA> <NA> <NA> <NA> <NA>
#> 13 species canonicalMatch Animalia Chordata Reptilia Squamata
#> 14 genus exactMatch Plantae Charophyta Equisetopsida Caryophyllales
#> 15 species higherMatch Plantae Charophyta Equisetopsida Santalales
#> 16 species canonicalMatch Animalia Chordata Mammalia Diprotodontia
#> 17 subspecies exactMatch Animalia Chordata Aves Passeriformes
#> 18 subspecies exactMatch Animalia Chordata Aves Passeriformes
#> 19 subspecies exactMatch Animalia Chordata Mammalia Peramelemorphia
#> 20 species canonicalMatch Animalia Chordata Mammalia Diprotodontia
#> 21 genus exactMatch Plantae Charophyta Equisetopsida Asparagales
#> 22 species higherMatch Plantae Charophyta Equisetopsida Fabales
#> 23 <NA> <NA> <NA> <NA> <NA> <NA>
#> 24 variety exactMatch Plantae Charophyta Equisetopsida Rosales
#> 25 variety exactMatch Plantae Charophyta Equisetopsida Rosales
#> 26 <NA> exactMatch Animalia Arthropoda Insecta Blattodea
#> 27 species exactMatch Animalia Chordata Aves Charadriiformes
#> family genus species
#> 1 Maluridae Amytornis Amytornis modestus
#> 2 Maluridae Amytornis Amytornis modestus
#> 3 Maluridae Amytornis Amytornis modestus
#> 4 Maluridae Amytornis Amytornis striatus
#> 5 Maluridae Amytornis Amytornis whitei
#> 6 Potoroidae Bettongia Bettongia lesueur
#> 7 Charadriidae Charadrius Charadrius mongolus
#> 8 Scolopacidae Phalaropus Phalaropus lobatus
#> 9 Myrtaceae Eucalyptus <NA>
#> 10 Myrtaceae Eucalyptus Eucalyptus viminalis
#> 11 Myrtaceae Eucalyptus Eucalyptus viminalis
#> 12 <NA> <NA> <NA>
#> 13 Gekkonidae Gehyra Gehyra montium
#> 14 Chenopodiaceae Tecticornia <NA>
#> 15 Santalaceae Korthalsella Korthalsella japonica
#> 16 Macropodidae Lagorchestes Lagorchestes hirsutus
#> 17 Meliphagidae Melithreptus Melithreptus gularis
#> 18 Meliphagidae Melithreptus Melithreptus gularis
#> 19 Peramelidae Perameles Perameles gunnii
#> 20 Macropodidae Petrogale Petrogale lateralis
#> 21 Orchidaceae Pterostylis <NA>
#> 22 Fabaceae Senna Senna artemisioides
#> 23 <NA> <NA> <NA>
#> 24 Rhamnaceae Spyridium Spyridium eriocephalum
#> 25 Rhamnaceae Spyridium Spyridium eriocephalum
#> 26 <NA> <NA> <NA>
#> 27 Charadriidae Thinornis Thinornis cucullatus
#> vernacular_name stamp
#> 1 Thick-billed Grasswren 2025-06-17 07:25:46
#> 2 Cowarie Thick-billed Grasswren 2025-06-17 07:25:46
#> 3 Macdonnell Ranges Thick-billed Grasswren 2025-06-17 07:25:46
#> 4 Striated Grasswren 2025-06-17 07:25:46
#> 5 <NA> 2025-06-17 07:25:46
#> 6 Burrowing Bettong (barrow And Boodie Islands) 2025-06-17 07:25:44
#> 7 Lesser Sand Plover 2025-06-17 07:25:44
#> 8 Red-necked Phalarope 2025-06-17 07:25:44
#> 9 Studley Park Gum 2025-06-17 07:25:44
#> 10 Ribbon Gum 2025-06-17 07:25:44
#> 11 Rough-barked Manna-gum 2025-06-17 07:25:44
#> 12 <NA> 2025-06-17 07:25:44
#> 13 Centralian Dtella 2025-06-17 07:25:44
#> 14 Samphires 2025-06-17 07:25:44
#> 15 Jointed Mistletoe 2025-06-17 07:25:44
#> 16 Rufous Hare-wallaby 2025-06-17 07:25:44
#> 17 Black-chinned Honeyeater (eastern) 2025-06-17 07:25:44
#> 18 Golden-backed Honeyeater 2025-06-17 07:25:44
#> 19 Eastern Barred Bandicoot (mainland) 2025-06-17 07:25:44
#> 20 Black-footed Rock-wallaby 2025-06-17 07:25:44
#> 21 Greenhoods 2025-06-17 07:25:44
#> 22 Desert Cassia 2025-06-17 07:25:44
#> 23 <NA> 2025-06-17 07:25:44
#> 24 Macgillivray Spyridium 2025-06-17 07:25:44
#> 25 Macgillivray Spyridium 2025-06-17 07:25:44
#> 26 White Ants 2025-06-17 07:25:44
#> 27 Hooded Plover 2025-06-17 07:25:44
#> subspecies rank_adj
#> 1 <NA> species
#> 2 Amytornis modestus cowarie subspecies
#> 3 Amytornis modestus modestus subspecies
#> 4 <NA> species
#> 5 <NA> species
#> 6 Bettongia lesueur Barrow and Boodie Islands subspecies subspecies
#> 7 <NA> species
#> 8 <NA> species
#> 9 <NA> genus
#> 10 <NA> species
#> 11 Eucalyptus viminalis subsp. cygnetensis subspecies
#> 12 <NA> <NA>
#> 13 <NA> species
#> 14 <NA> genus
#> 15 <NA> species
#> 16 <NA> species
#> 17 Melithreptus gularis gularis subspecies
#> 18 Melithreptus gularis laetior subspecies
#> 19 Perameles gunnii Victorian subspecies subspecies
#> 20 <NA> species
#> 21 <NA> genus
#> 22 <NA> species
#> 23 <NA> <NA>
#> 24 Spyridium eriocephalum var. glabrisepalum subspecies
#> 25 Spyridium eriocephalum var. glabrisepalum subspecies
#> 26 <NA> <NA>
#> 27 <NA> species
#> original_is_tri original_is_bi
#> 1 FALSE TRUE
#> 2 TRUE FALSE
#> 3 TRUE FALSE
#> 4 FALSE TRUE
#> 5 FALSE TRUE
#> 6 TRUE FALSE
#> 7 FALSE TRUE
#> 8 FALSE TRUE
#> 9 FALSE FALSE
#> 10 FALSE TRUE
#> 11 TRUE FALSE
#> 12 FALSE TRUE
#> 13 FALSE TRUE
#> 14 FALSE FALSE
#> 15 TRUE FALSE
#> 16 TRUE FALSE
#> 17 TRUE FALSE
#> 18 TRUE FALSE
#> 19 TRUE FALSE
#> 20 TRUE FALSE
#> 21 FALSE TRUE
#> 22 FALSE TRUE
#> 23 FALSE TRUE
#> 24 TRUE FALSE
#> 25 TRUE FALSE
#> 26 FALSE FALSE
#> 27 FALSE TRUE
#>
#> $subspecies
#> $subspecies$lutaxa
#> # A tibble: 25 × 7
#> original_name match_type matched_rank returned_rank taxa original_is_tri
#> <chr> <chr> <fct> <ord> <chr> <lgl>
#> 1 Amytornis modest… canonical… species species Amyt… FALSE
#> 2 Amytornis modest… exactMatch subspecies subspecies Amyt… TRUE
#> 3 Amytornis modest… exactMatch subspecies subspecies Amyt… TRUE
#> 4 Amytornis striat… exactMatch species species Amyt… FALSE
#> 5 Amytornis whitei exactMatch species species Amyt… FALSE
#> 6 Bettongia lesueu… exactMatch subspecies subspecies Bett… TRUE
#> 7 Charadrius mongo… higherMat… species species Char… FALSE
#> 8 Charadrius rubri… exactMatch species species Phal… FALSE
#> 9 Eucalyptus exactMatch genus genus Euca… FALSE
#> 10 Eucalyptus vimin… exactMatch species species Euca… FALSE
#> # ℹ 15 more rows
#> # ℹ 1 more variable: original_is_bi <lgl>
#>
#> $subspecies$taxonomy
#> # A tibble: 24 × 9
#> taxa kingdom phylum class order family genus species subspecies
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 Amytornis modestus Animal… Chord… Aves Pass… Malur… Amyt… Amytor… NA
#> 2 Amytornis modestu… Animal… Chord… Aves Pass… Malur… Amyt… Amytor… Amytornis…
#> 3 Amytornis modestu… Animal… Chord… Aves Pass… Malur… Amyt… Amytor… Amytornis…
#> 4 Amytornis striatus Animal… Chord… Aves Pass… Malur… Amyt… Amytor… NA
#> 5 Amytornis whitei Animal… Chord… Aves Pass… Malur… Amyt… Amytor… NA
#> 6 Bettongia lesueur… Animal… Chord… Mamm… Dipr… Potor… Bett… Betton… Bettongia…
#> 7 Charadrius mongol… Animal… Chord… Aves Char… Chara… Char… Charad… NA
#> 8 Phalaropus lobatus Animal… Chord… Aves Char… Scolo… Phal… Phalar… NA
#> 9 Eucalyptus Plantae Charo… Equi… Myrt… Myrta… Euca… NA NA
#> 10 Eucalyptus vimina… Plantae Charo… Equi… Myrt… Myrta… Euca… Eucaly… NA
#> # ℹ 14 more rows
#>
#>
# Try automatic overrides
auto_overrides <- make_unmatched_overrides(df = taxa_df
, taxa_col = "taxa"
, taxonomy = taxonomy
, target_rank = "species"
)
#> Joining with `by = join_by(original_name)`
#> Warning: There was 1 warning in `dplyr::mutate()`.
#> ℹ In argument: `res = purrr::map(...)`.
#> Caused by warning:
#> ! Unknown or uninitialised column: `scientificName`.
#> Joining with `by = join_by(original_name)`
#> Error in dplyr::select(dplyr::filter(dplyr::bind_rows(mget(ls(pattern = "^unmatched_")), .id = "note"), !is.na(!!rlang::ensym(taxa_col))), !!rlang::ensym(taxa_col), tidyr::any_of(tidyr::matches(unique(c(target_rank, "species")))), rank, scientific_name, kingdom, note): Can't select columns that don't exist.
#> ✖ Column `rank` doesn't exist.
# overrrides
overrides <- envClean::taxonomy_overrides
# C. rubricollis binned to Phalarope lobatus at species level!
taxonomy <- make_taxonomy(df = overrides
, taxonomy_file = temp_file
, needed_ranks = c("species", "subspecies")
)
#> Joining with `by = join_by(original_name)`
#> Matched 22 of 27 taxonomic search terms in selected atlas (Australia).
#> 5 unmatched search terms:
#> • "Aptenodes patagonicus", "Corybas", "Corybas" + 2 more
#>
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> saving results to C:/temp/nige\RtmpEBQxW6\file34d81c9d7718.parquet
#> The following were completely unmatched: Aptenodes patagonicus, Corybas X dentatus, Galaxias sp. nov. Hunter, Puffinis gravis, Puffinus griseus, Some rubbish and Corybas x dentatus. Consider providing more taxonomic levels, or an override, for each unmatched taxa?
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
taxonomy$species$lutaxa %>%
dplyr::filter(grepl("rubricollis", original_name))
#> # A tibble: 2 × 7
#> original_name match_type matched_rank returned_rank taxa original_is_tri
#> <chr> <chr> <ord> <ord> <chr> <lgl>
#> 1 Charadrius rubric… exactMatch species species Phal… FALSE
#> 2 Thinornis rubrico… higherMat… species species Thin… TRUE
#> # ℹ 1 more variable: original_is_bi <lgl>
# add in override - C. rubricollis is binned to T. cucullatus at species level
taxonomy <- make_taxonomy(df = overrides
, taxonomy_file = temp_file
, needed_ranks = c("species", "subspecies")
, overrides = overrides
)
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name, returned_rank)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> saving results to C:/temp/nige\RtmpEBQxW6\file34d81c9d7718.parquet
#> The following were completely unmatched: Galaxias sp. nov. Hunter and Some rubbish. Consider providing more taxonomic levels, or an override, for each unmatched taxa?
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
taxonomy$species$lutaxa %>%
dplyr::filter(grepl("rubricollis", original_name))
#> # A tibble: 2 × 8
#> original_name match_type matched_rank returned_rank taxa original_is_tri
#> <chr> <chr> <ord> <ord> <chr> <lgl>
#> 1 Charadrius rubric… exactMatch species species Thin… FALSE
#> 2 Thinornis rubrico… exactMatch subspecies species Thin… TRUE
#> # ℹ 2 more variables: original_is_bi <lgl>, override <lgl>
# tweak_species example
make_taxonomy(df = tibble::tibble(original_name = "Acacia sp. Small Red-leaved Wattle (J.B.Williams 95033)")
, tweak_species = FALSE
)$raw %>%
dplyr::select(original_name, scientific_name, species)
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> saving results to C:/temp/nige\RtmpEBQxW6\file34d813fcb92.parquet
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> # A tibble: 1 × 3
#> original_name scientific_name species
#> <chr> <chr> <chr>
#> 1 Acacia sp. Small Red-leaved Wattle (J.B.Williams 9503… Acacia sp. Sma… NA
make_taxonomy(df = tibble::tibble(original_name = "Acacia sp. Small Red-leaved Wattle (J.B.Williams 95033)")
, tweak_species = TRUE
)$raw %>%
dplyr::select(original_name, scientific_name, species)
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> saving results to C:/temp/nige\RtmpEBQxW6\file34d86c6e272.parquet
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> # A tibble: 1 × 3
#> original_name scientific_name species
#> <chr> <chr> <chr>
#> 1 Acacia sp. Small Red-leaved Wattle (J.B.Williams 9503… Acacia sp. Sma… NA
# clean up
rm(taxonomy)
unlist(paste0(temp_file, ".parquet"))
#> [1] "C:/temp/nige\\RtmpEBQxW6\\file34d81c9d7718.parquet"