For an unmatched 'name', try to find a match via: rgbif::name_usage(); rgbif::name_backbone(); and removal of any characters in 'name' after an 'x' or 'X' (i.e. treat hybrids as just the first taxa). Using rgbif::name_backbone() allows fuzzy matching to fix spelling errors. Any results are passed back to galah::search_taxa() to retrieve an override to use for that 'name' (so long as the rgbif result is not identical to 'name'). Any 'name' still completely unmatched is just given the override 'name' so it will not be lost from downstream processes but will not have any associated taxonomic information.

make_unmatched_overrides(
  df,
  taxa_col = "original_name",
  taxonomy,
  target_rank = "species",
  hybrids = FALSE,
  include_unmatched = TRUE,
  results_file = tempfile(fileext = ".parquet"),
  remove_taxa = c("bold:", "unverified", "undetermined", "unidentified", "annual herb",
    "annual grass", "incertae sedis", "\\?", "another\\s", "not naturalised in sa",
    "annual tussock grass", "*no id", "spec\\.", "\\s\\-\\-\\s.*",
    "\\ssp\\.", "\\sspec\\.", "\\ssp$", "\\ssp\\d", "\\ssp\\s",
    "\\sspp\\.", "\\sspp\\s", "\\sspp$", "dead", "unknown", "\\sgroup$",
    "\\sspecies$", "aquatic grass", "hybrid", "\\scultivar$", "\\scomplex$",
    "\\ssect\\.", "\\ss\\.\\sstr\\.", "\\(includes\\s"),
  tri_strings = c("\\sssp\\s", "\\sssp\\.", "\\svar\\s", "\\svar\\.",
    "\\ssubsp\\.", "\\ssubspecies", "\\sform\\)", "\\sform\\s",
    "\\sf\\.", "\\srace\\s", "\\srace\\)", "\\sp\\.v\\.")
)

Arguments

df

Dataframe of biological records

taxa_col

Character. Name of column in df containing the taxonomic entities for which a match is desired.

taxonomy

Result of call to make_taxonomy()

target_rank

Character. Level within envClean::lurank$rank to target

hybrids

Logical. Create overrides for hybrids (e.g. original names with 'x')?

include_unmatched

Logical. Create overrides for taxa not matched via gbif using their original names?

results_file

File path to write results of searches. Previous results files are used to avoid redoing time consuming searches for taxa that are not matched via gbif and not written to the taxonomy file in make_taxonomy.

remove_taxa

Character. Taxa with regular expressions in tolower(taxa_col) that match remove_taxa will not be searched or have overrides constructed.

tri_strings

Character. Taxa names with these strings that indicate a trinomial will not be included as a binomial override (i.e. avoids the use_species column in the overrides being populated with trinomial names).

Value

Tibble in appropriate form to pass to the overrides argument of make_taxonomy()

Examples


  # setup
  # library("envClean")

  temp_file <- tempfile()

  taxa_df <- tibble::tibble(taxa = c("Charadrius rubricollis"
                                     , "Thinornis cucullatus"
                                     , "Melithreptus gularis laetior"
                                     , "Melithreptus gularis gularis"
                                     , "Eucalyptus viminalis"
                                     , "Eucalyptus viminalis cygnetensis"
                                     , "Eucalyptus"
                                     , "Charadrius mongolus all subspecies"
                                     , "Bettongia lesueur Barrow and Boodie Islands subspecies"
                                     , "Lagorchestes hirsutus Central Australian subspecies"
                                     , "Perameles gunnii Victorian subspecies"
                                     , "Pterostylis sp. Rock ledges (pl. 185, Bates & Weber 1990)"
                                     , "Spyridium glabrisepalum"
                                     , "Spyridium eriocephalum var. glabrisepalum"
                                     , "Petrogale lateralis (MacDonnell Ranges race)"
                                     , "Gehyra montium (revised)"
                                     , "Korthalsella japonica f. japonica"
                                     , "Galaxias sp. nov. 'Hunter'"
                                     , "Some rubbish"
                                     , "Senna artemisioides subsp x artemisioides"
                                     , "Halosarcia sp.  (NC)"
                                     , "TERMITOIDAE sp." # 'epifamily'
                                     )
                            )

  # make taxonomy (returns list and writes taxonomy_file)
  taxonomy <- make_taxonomy(df = taxa_df
                            , taxa_col = "taxa"
                            , taxonomy_file = temp_file
                            , needed_ranks = c("kingdom", "genus", "species", "subspecies")
                            )
#> Joining with `by = join_by(original_name)`
#> Matched 20 of 22 taxonomic search terms in selected atlas (Australia).
#> 2 unmatched search terms:
#>  "Galaxias sp. nov. Hunter", "Some rubbish"
#> 
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> saving results to C:/temp/nige\RtmpEBQxW6\file34d81c9d7718.parquet
#> The following were completely unmatched: Galaxias sp. nov. Hunter and Some rubbish. Consider providing more taxonomic levels, or an override, for each unmatched taxa?
#> Joining with `by = join_by(original_name)`
#> Warning: There was 1 warning in `dplyr::filter()`.
#>  In argument: `returned_rank == min(returned_rank)`.
#> Caused by warning in `min()`:
#> ! no non-missing arguments to min; returning Inf
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
  taxonomy$raw
#> # A tibble: 22 × 20
#>    original_name              search_term scientific_name scientific_name_auth…¹
#>    <chr>                      <chr>       <chr>           <chr>                 
#>  1 Bettongia lesueur Barrow … Bettongia … Bettongia lesu… NA                    
#>  2 Charadrius mongolus all s… Charadrius… Charadrius (Ch… Pallas, 1776          
#>  3 Charadrius rubricollis     Charadrius… Phalaropus lob… (Linnaeus, 1758)      
#>  4 Eucalyptus                 Eucalyptus  Eucalyptus      L'Hér.                
#>  5 Eucalyptus viminalis       Eucalyptus… Eucalyptus vim… Labill.               
#>  6 Eucalyptus viminalis cygn… Eucalyptus… Eucalyptus vim… Boomsma               
#>  7 Galaxias sp. nov. Hunter   Galaxias s… NA              NA                    
#>  8 Gehyra montium (revised)   Gehyra mon… Gehyra montium  Storr, 1982           
#>  9 Halosarcia sp.  (NC)       Halosarcia… Tecticornia     Hook.f.               
#> 10 Korthalsella japonica f. … Korthalsel… Korthalsella j… (Thunb.) Engl.        
#> # ℹ 12 more rows
#> # ℹ abbreviated name: ¹​scientific_name_authorship
#> # ℹ 16 more variables: taxon_concept_id <chr>, rank <ord>, match_type <chr>,
#> #   kingdom <chr>, phylum <chr>, class <chr>, order <chr>, family <chr>,
#> #   genus <chr>, species <chr>, vernacular_name <chr>, stamp <dttm>,
#> #   subspecies <chr>, rank_adj <ord>, original_is_tri <lgl>,
#> #   original_is_bi <lgl>
  taxonomy$kingdom
#> $lutaxa
#> # A tibble: 0 × 7
#> # ℹ 7 variables: original_name <chr>, match_type <chr>, matched_rank <ord>,
#> #   returned_rank <ord>, taxa <chr>, original_is_tri <lgl>,
#> #   original_is_bi <lgl>
#> 
#> $taxonomy
#> # A tibble: 0 × 2
#> # ℹ 2 variables: taxa <chr>, kingdom <chr>
#> 
  taxonomy$genus
#> $lutaxa
#> # A tibble: 20 × 7
#>    original_name     match_type matched_rank returned_rank taxa  original_is_tri
#>    <chr>             <chr>      <ord>        <ord>         <chr> <lgl>          
#>  1 Bettongia lesueu… exactMatch subspecies   genus         Bett… TRUE           
#>  2 Charadrius mongo… higherMat… species      genus         Char… FALSE          
#>  3 Charadrius rubri… exactMatch species      genus         Phal… FALSE          
#>  4 Eucalyptus        exactMatch genus        genus         Euca… FALSE          
#>  5 Eucalyptus vimin… exactMatch species      genus         Euca… FALSE          
#>  6 Eucalyptus vimin… exactMatch subspecies   genus         Euca… TRUE           
#>  7 Gehyra montium (… canonical… species      genus         Gehy… FALSE          
#>  8 Halosarcia sp.  … exactMatch genus        genus         Tect… FALSE          
#>  9 Korthalsella jap… higherMat… species      genus         Kort… TRUE           
#> 10 Lagorchestes hir… canonical… species      genus         Lago… TRUE           
#> 11 Melithreptus gul… exactMatch subspecies   genus         Meli… TRUE           
#> 12 Melithreptus gul… exactMatch subspecies   genus         Meli… TRUE           
#> 13 Perameles gunnii… exactMatch subspecies   genus         Pera… TRUE           
#> 14 Petrogale latera… canonical… species      genus         Petr… TRUE           
#> 15 Pterostylis sp. … exactMatch genus        genus         Pter… FALSE          
#> 16 Senna artemisioi… higherMat… species      genus         Senna FALSE          
#> 17 Spyridium erioce… exactMatch variety      genus         Spyr… TRUE           
#> 18 Spyridium glabri… exactMatch variety      genus         Spyr… TRUE           
#> 19 TERMITOIDAE sp.   exactMatch NA           order         Blat… FALSE          
#> 20 Thinornis cucull… exactMatch species      genus         Thin… FALSE          
#> # ℹ 1 more variable: original_is_bi <lgl>
#> 
#> $taxonomy
#> # A tibble: 16 × 7
#>    taxa         kingdom  phylum     class         order           family   genus
#>    <chr>        <chr>    <chr>      <chr>         <chr>           <chr>    <chr>
#>  1 Bettongia    Animalia Chordata   Mammalia      Diprotodontia   Potoroi… Bett…
#>  2 Charadrius   Animalia Chordata   Aves          Charadriiformes Charadr… Char…
#>  3 Phalaropus   Animalia Chordata   Aves          Charadriiformes Scolopa… Phal…
#>  4 Eucalyptus   Plantae  Charophyta Equisetopsida Myrtales        Myrtace… Euca…
#>  5 Gehyra       Animalia Chordata   Reptilia      Squamata        Gekkoni… Gehy…
#>  6 Tecticornia  Plantae  Charophyta Equisetopsida Caryophyllales  Chenopo… Tect…
#>  7 Korthalsella Plantae  Charophyta Equisetopsida Santalales      Santala… Kort…
#>  8 Lagorchestes Animalia Chordata   Mammalia      Diprotodontia   Macropo… Lago…
#>  9 Melithreptus Animalia Chordata   Aves          Passeriformes   Melipha… Meli…
#> 10 Perameles    Animalia Chordata   Mammalia      Peramelemorphia Peramel… Pera…
#> 11 Petrogale    Animalia Chordata   Mammalia      Diprotodontia   Macropo… Petr…
#> 12 Pterostylis  Plantae  Charophyta Equisetopsida Asparagales     Orchida… Pter…
#> 13 Senna        Plantae  Charophyta Equisetopsida Fabales         Fabaceae Senna
#> 14 Spyridium    Plantae  Charophyta Equisetopsida Rosales         Rhamnac… Spyr…
#> 15 Blattodea    Animalia Arthropoda Insecta       Blattodea       NA       NA   
#> 16 Thinornis    Animalia Chordata   Aves          Charadriiformes Charadr… Thin…
#> 
  taxonomy$species
#> $lutaxa
#> # A tibble: 20 × 7
#>    original_name     match_type matched_rank returned_rank taxa  original_is_tri
#>    <chr>             <chr>      <ord>        <ord>         <chr> <lgl>          
#>  1 Bettongia lesueu… exactMatch subspecies   species       Bett… TRUE           
#>  2 Charadrius mongo… higherMat… species      species       Char… FALSE          
#>  3 Charadrius rubri… exactMatch species      species       Phal… FALSE          
#>  4 Eucalyptus        exactMatch genus        genus         Euca… FALSE          
#>  5 Eucalyptus vimin… exactMatch species      species       Euca… FALSE          
#>  6 Eucalyptus vimin… exactMatch subspecies   species       Euca… TRUE           
#>  7 Gehyra montium (… canonical… species      species       Gehy… FALSE          
#>  8 Halosarcia sp.  … exactMatch genus        genus         Tect… FALSE          
#>  9 Korthalsella jap… higherMat… species      species       Kort… TRUE           
#> 10 Lagorchestes hir… canonical… species      species       Lago… TRUE           
#> 11 Melithreptus gul… exactMatch subspecies   species       Meli… TRUE           
#> 12 Melithreptus gul… exactMatch subspecies   species       Meli… TRUE           
#> 13 Perameles gunnii… exactMatch subspecies   species       Pera… TRUE           
#> 14 Petrogale latera… canonical… species      species       Petr… TRUE           
#> 15 Pterostylis sp. … exactMatch genus        genus         Pter… FALSE          
#> 16 Senna artemisioi… higherMat… species      species       Senn… FALSE          
#> 17 Spyridium erioce… exactMatch variety      species       Spyr… TRUE           
#> 18 Spyridium glabri… exactMatch variety      species       Spyr… TRUE           
#> 19 TERMITOIDAE sp.   exactMatch NA           order         Blat… FALSE          
#> 20 Thinornis cucull… exactMatch species      species       Thin… FALSE          
#> # ℹ 1 more variable: original_is_bi <lgl>
#> 
#> $taxonomy
#> # A tibble: 17 × 8
#>    taxa                   kingdom  phylum     class   order family genus species
#>    <chr>                  <chr>    <chr>      <chr>   <chr> <chr>  <chr> <chr>  
#>  1 Bettongia lesueur      Animalia Chordata   Mammal… Dipr… Potor… Bett… Betton…
#>  2 Charadrius mongolus    Animalia Chordata   Aves    Char… Chara… Char… Charad…
#>  3 Phalaropus lobatus     Animalia Chordata   Aves    Char… Scolo… Phal… Phalar…
#>  4 Eucalyptus             Plantae  Charophyta Equise… Myrt… Myrta… Euca… NA     
#>  5 Eucalyptus viminalis   Plantae  Charophyta Equise… Myrt… Myrta… Euca… Eucaly…
#>  6 Gehyra montium         Animalia Chordata   Reptil… Squa… Gekko… Gehy… Gehyra…
#>  7 Tecticornia            Plantae  Charophyta Equise… Cary… Cheno… Tect… NA     
#>  8 Korthalsella japonica  Plantae  Charophyta Equise… Sant… Santa… Kort… Kortha…
#>  9 Lagorchestes hirsutus  Animalia Chordata   Mammal… Dipr… Macro… Lago… Lagorc…
#> 10 Melithreptus gularis   Animalia Chordata   Aves    Pass… Melip… Meli… Melith…
#> 11 Perameles gunnii       Animalia Chordata   Mammal… Pera… Peram… Pera… Perame…
#> 12 Petrogale lateralis    Animalia Chordata   Mammal… Dipr… Macro… Petr… Petrog…
#> 13 Pterostylis            Plantae  Charophyta Equise… Aspa… Orchi… Pter… NA     
#> 14 Senna artemisioides    Plantae  Charophyta Equise… Faba… Fabac… Senna Senna …
#> 15 Spyridium eriocephalum Plantae  Charophyta Equise… Rosa… Rhamn… Spyr… Spyrid…
#> 16 Blattodea              Animalia Arthropoda Insecta Blat… NA     NA    NA     
#> 17 Thinornis cucullatus   Animalia Chordata   Aves    Char… Chara… Thin… Thinor…
#> 
  taxonomy$subspecies
#> $lutaxa
#> # A tibble: 20 × 7
#>    original_name     match_type matched_rank returned_rank taxa  original_is_tri
#>    <chr>             <chr>      <ord>        <ord>         <chr> <lgl>          
#>  1 Bettongia lesueu… exactMatch subspecies   subspecies    Bett… TRUE           
#>  2 Charadrius mongo… higherMat… species      species       Char… FALSE          
#>  3 Charadrius rubri… exactMatch species      species       Phal… FALSE          
#>  4 Eucalyptus        exactMatch genus        genus         Euca… FALSE          
#>  5 Eucalyptus vimin… exactMatch species      species       Euca… FALSE          
#>  6 Eucalyptus vimin… exactMatch subspecies   subspecies    Euca… TRUE           
#>  7 Gehyra montium (… canonical… species      species       Gehy… FALSE          
#>  8 Halosarcia sp.  … exactMatch genus        genus         Tect… FALSE          
#>  9 Korthalsella jap… higherMat… species      species       Kort… TRUE           
#> 10 Lagorchestes hir… canonical… species      species       Lago… TRUE           
#> 11 Melithreptus gul… exactMatch subspecies   subspecies    Meli… TRUE           
#> 12 Melithreptus gul… exactMatch subspecies   subspecies    Meli… TRUE           
#> 13 Perameles gunnii… exactMatch subspecies   subspecies    Pera… TRUE           
#> 14 Petrogale latera… canonical… species      species       Petr… TRUE           
#> 15 Pterostylis sp. … exactMatch genus        genus         Pter… FALSE          
#> 16 Senna artemisioi… higherMat… species      species       Senn… FALSE          
#> 17 Spyridium erioce… exactMatch variety      subspecies    Spyr… TRUE           
#> 18 Spyridium glabri… exactMatch variety      subspecies    Spyr… TRUE           
#> 19 TERMITOIDAE sp.   exactMatch NA           order         Blat… FALSE          
#> 20 Thinornis cucull… exactMatch species      species       Thin… FALSE          
#> # ℹ 1 more variable: original_is_bi <lgl>
#> 
#> $taxonomy
#> # A tibble: 19 × 9
#>    taxa               kingdom phylum class order family genus species subspecies
#>    <chr>              <chr>   <chr>  <chr> <chr> <chr>  <chr> <chr>   <chr>     
#>  1 Bettongia lesueur… Animal… Chord… Mamm… Dipr… Potor… Bett… Betton… Bettongia…
#>  2 Charadrius mongol… Animal… Chord… Aves  Char… Chara… Char… Charad… NA        
#>  3 Phalaropus lobatus Animal… Chord… Aves  Char… Scolo… Phal… Phalar… NA        
#>  4 Eucalyptus         Plantae Charo… Equi… Myrt… Myrta… Euca… NA      NA        
#>  5 Eucalyptus vimina… Plantae Charo… Equi… Myrt… Myrta… Euca… Eucaly… NA        
#>  6 Eucalyptus vimina… Plantae Charo… Equi… Myrt… Myrta… Euca… Eucaly… Eucalyptu…
#>  7 Gehyra montium     Animal… Chord… Rept… Squa… Gekko… Gehy… Gehyra… NA        
#>  8 Tecticornia        Plantae Charo… Equi… Cary… Cheno… Tect… NA      NA        
#>  9 Korthalsella japo… Plantae Charo… Equi… Sant… Santa… Kort… Kortha… NA        
#> 10 Lagorchestes hirs… Animal… Chord… Mamm… Dipr… Macro… Lago… Lagorc… NA        
#> 11 Melithreptus gula… Animal… Chord… Aves  Pass… Melip… Meli… Melith… Melithrep…
#> 12 Melithreptus gula… Animal… Chord… Aves  Pass… Melip… Meli… Melith… Melithrep…
#> 13 Perameles gunnii … Animal… Chord… Mamm… Pera… Peram… Pera… Perame… Perameles…
#> 14 Petrogale lateral… Animal… Chord… Mamm… Dipr… Macro… Petr… Petrog… NA        
#> 15 Pterostylis        Plantae Charo… Equi… Aspa… Orchi… Pter… NA      NA        
#> 16 Senna artemisioid… Plantae Charo… Equi… Faba… Fabac… Senna Senna … NA        
#> 17 Spyridium eriocep… Plantae Charo… Equi… Rosa… Rhamn… Spyr… Spyrid… Spyridium…
#> 18 Blattodea          Animal… Arthr… Inse… Blat… NA     NA    NA      NA        
#> 19 Thinornis cuculla… Animal… Chord… Aves  Char… Chara… Thin… Thinor… NA        
#> 

  # query more taxa (results are added to taxonomy_file but only the new taxa are returned (default `limit = TRUE`)
  more_taxa <- tibble::tibble(original_name = c("Amytornis whitei"
                                                , "Amytornis striatus"
                                                , "Amytornis modestus (North, 1902)"
                                                , "Amytornis modestus modestus"
                                                , "Amytornis modestus cowarie"
                                                )
                              )

  make_taxonomy(df = more_taxa
                , taxonomy_file = temp_file
                , needed_ranks = c("species")
                )
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> saving results to C:/temp/nige\RtmpEBQxW6\file34d81c9d7718.parquet
#> The following were completely unmatched: Galaxias sp. nov. Hunter and Some rubbish. Consider providing more taxonomic levels, or an override, for each unmatched taxa?
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> $raw
#> # A tibble: 5 × 20
#>   original_name               search_term scientific_name scientific_name_auth…¹
#>   <chr>                       <chr>       <chr>           <chr>                 
#> 1 Amytornis modestus (North,… Amytornis … Amytornis (Amy… (North, 1902)         
#> 2 Amytornis modestus cowarie  Amytornis … Amytornis (Amy… Black, 2016           
#> 3 Amytornis modestus modestus Amytornis … Amytornis (Amy… (North, 1902)         
#> 4 Amytornis striatus          Amytornis … Amytornis (Mag… (Gould, 1840)         
#> 5 Amytornis whitei            Amytornis … Amytornis (Mag… Mathews, 1910         
#> # ℹ abbreviated name: ¹​scientific_name_authorship
#> # ℹ 16 more variables: taxon_concept_id <chr>, rank <ord>, match_type <chr>,
#> #   kingdom <chr>, phylum <chr>, class <chr>, order <chr>, family <chr>,
#> #   genus <chr>, species <chr>, vernacular_name <chr>, stamp <dttm>,
#> #   subspecies <chr>, rank_adj <ord>, original_is_tri <lgl>,
#> #   original_is_bi <lgl>
#> 
#> $species
#> $species$lutaxa
#> # A tibble: 5 × 7
#>   original_name      match_type matched_rank returned_rank taxa  original_is_tri
#>   <chr>              <chr>      <ord>        <ord>         <chr> <lgl>          
#> 1 Amytornis modestu… canonical… species      species       Amyt… FALSE          
#> 2 Amytornis modestu… exactMatch subspecies   species       Amyt… TRUE           
#> 3 Amytornis modestu… exactMatch subspecies   species       Amyt… TRUE           
#> 4 Amytornis striatus exactMatch species      species       Amyt… FALSE          
#> 5 Amytornis whitei   exactMatch species      species       Amyt… FALSE          
#> # ℹ 1 more variable: original_is_bi <lgl>
#> 
#> $species$taxonomy
#> # A tibble: 3 × 8
#>   taxa               kingdom  phylum   class order         family  genus species
#>   <chr>              <chr>    <chr>    <chr> <chr>         <chr>   <chr> <chr>  
#> 1 Amytornis modestus Animalia Chordata Aves  Passeriformes Maluri… Amyt… Amytor…
#> 2 Amytornis striatus Animalia Chordata Aves  Passeriformes Maluri… Amyt… Amytor…
#> 3 Amytornis whitei   Animalia Chordata Aves  Passeriformes Maluri… Amyt… Amytor…
#> 
#> 

  # no dataframe supplied - all results in taxonomy_file returned
  make_taxonomy(taxonomy_file = temp_file
                , needed_ranks = c("subspecies")
                )
#> Joining with `by = join_by(original_name)`
#> $raw
#>                                                original_name
#> 1                           Amytornis modestus (North, 1902)
#> 2                                 Amytornis modestus cowarie
#> 3                                Amytornis modestus modestus
#> 4                                         Amytornis striatus
#> 5                                           Amytornis whitei
#> 6     Bettongia lesueur Barrow and Boodie Islands subspecies
#> 7                         Charadrius mongolus all subspecies
#> 8                                     Charadrius rubricollis
#> 9                                                 Eucalyptus
#> 10                                      Eucalyptus viminalis
#> 11                          Eucalyptus viminalis cygnetensis
#> 12                                  Galaxias sp. nov. Hunter
#> 13                                  Gehyra montium (revised)
#> 14                                      Halosarcia sp.  (NC)
#> 15                         Korthalsella japonica f. japonica
#> 16       Lagorchestes hirsutus Central Australian subspecies
#> 17                              Melithreptus gularis gularis
#> 18                              Melithreptus gularis laetior
#> 19                     Perameles gunnii Victorian subspecies
#> 20              Petrogale lateralis (MacDonnell Ranges race)
#> 21 Pterostylis sp. Rock ledges (pl. 185, Bates & Weber 1990)
#> 22                 Senna artemisioides subsp x artemisioides
#> 23                                              Some rubbish
#> 24                 Spyridium eriocephalum var. glabrisepalum
#> 25                                   Spyridium glabrisepalum
#> 26                                           TERMITOIDAE sp.
#> 27                                      Thinornis cucullatus
#>                                                  search_term
#> 1                           Amytornis modestus (North, 1902)
#> 2                                 Amytornis modestus cowarie
#> 3                                Amytornis modestus modestus
#> 4                                         Amytornis striatus
#> 5                                           Amytornis whitei
#> 6     Bettongia lesueur Barrow and Boodie Islands subspecies
#> 7                         Charadrius mongolus all subspecies
#> 8                                     Charadrius rubricollis
#> 9                                                 Eucalyptus
#> 10                                      Eucalyptus viminalis
#> 11                          Eucalyptus viminalis cygnetensis
#> 12                                  Galaxias sp. nov. Hunter
#> 13                                  Gehyra montium (revised)
#> 14                                            Halosarcia sp.
#> 15                         Korthalsella japonica f. japonica
#> 16       Lagorchestes hirsutus Central Australian subspecies
#> 17                              Melithreptus gularis gularis
#> 18                              Melithreptus gularis laetior
#> 19                     Perameles gunnii Victorian subspecies
#> 20              Petrogale lateralis (MacDonnell Ranges race)
#> 21 Pterostylis sp. Rock ledges (pl. 185, Bates & Weber 1990)
#> 22                                 Senna artemisioides subsp
#> 23                                              Some rubbish
#> 24                 Spyridium eriocephalum var. glabrisepalum
#> 25                                   Spyridium glabrisepalum
#> 26                                               TERMITOIDAE
#> 27                                      Thinornis cucullatus
#>                                           scientific_name
#> 1                          Amytornis (Amytornis) modestus
#> 2                  Amytornis (Amytornis) modestus cowarie
#> 3                 Amytornis (Amytornis) modestus modestus
#> 4                         Amytornis (Magnamytis) striatus
#> 5                           Amytornis (Magnamytis) whitei
#> 6  Bettongia lesueur Barrow and Boodie Islands subspecies
#> 7                        Charadrius (Charadrius) mongolus
#> 8                                      Phalaropus lobatus
#> 9                                              Eucalyptus
#> 10                                   Eucalyptus viminalis
#> 11                Eucalyptus viminalis subsp. cygnetensis
#> 12                                                   <NA>
#> 13                                         Gehyra montium
#> 14                                            Tecticornia
#> 15                                  Korthalsella japonica
#> 16                                  Lagorchestes hirsutus
#> 17              Melithreptus (Eidopsarus) gularis gularis
#> 18              Melithreptus (Eidopsarus) gularis laetior
#> 19                  Perameles gunnii Victorian subspecies
#> 20                                    Petrogale lateralis
#> 21                                            Pterostylis
#> 22                                    Senna artemisioides
#> 23                                                   <NA>
#> 24              Spyridium eriocephalum var. glabrisepalum
#> 25              Spyridium eriocephalum var. glabrisepalum
#> 26                                            Termitoidae
#> 27                                   Thinornis cucullatus
#>    scientific_name_authorship
#> 1               (North, 1902)
#> 2                 Black, 2016
#> 3               (North, 1902)
#> 4               (Gould, 1840)
#> 5               Mathews, 1910
#> 6                        <NA>
#> 7                Pallas, 1776
#> 8            (Linnaeus, 1758)
#> 9                      L'Hér.
#> 10                    Labill.
#> 11                    Boomsma
#> 12                       <NA>
#> 13                Storr, 1982
#> 14                    Hook.f.
#> 15             (Thunb.) Engl.
#> 16                Gould, 1844
#> 17              (Gould, 1837)
#> 18                Gould, 1875
#> 19                       <NA>
#> 20                Gould, 1842
#> 21                      R.Br.
#> 22  (Gaudich. ex DC.) Randell
#> 23                       <NA>
#> 24                  J.M.Black
#> 25                  J.M.Black
#> 26                       <NA>
#> 27           (Vieillot, 1818)
#>                                                             taxon_concept_id
#> 1  https://biodiversity.org.au/afd/taxa/cfb67754-ed3f-4612-92bf-4753d23d49f0
#> 2  https://biodiversity.org.au/afd/taxa/a2736af9-dba4-4469-8439-9c367b1abb9f
#> 3  https://biodiversity.org.au/afd/taxa/6c201f56-6aba-47a7-95ce-4d0ba55cd55e
#> 4  https://biodiversity.org.au/afd/taxa/5b31a114-e88b-4b78-a2fe-c2fdc306dbdc
#> 5  https://biodiversity.org.au/afd/taxa/85b1ccbd-eefd-41b6-870b-cdbe452639cb
#> 6                                                           ALA_DR22912_1262
#> 7  https://biodiversity.org.au/afd/taxa/fe772d84-8028-44b8-8530-c30a9112157e
#> 8  https://biodiversity.org.au/afd/taxa/9f562f99-aac8-4b16-b3c9-04a7eea35269
#> 9                         https://id.biodiversity.org.au/taxon/apni/51738743
#> 10                        https://id.biodiversity.org.au/taxon/apni/51290421
#> 11                          https://id.biodiversity.org.au/node/apni/2914880
#> 12                                                                      <NA>
#> 13 https://biodiversity.org.au/afd/taxa/b84bd290-03f0-46d6-9cf7-e83600c60699
#> 14                        https://id.biodiversity.org.au/taxon/apni/51433264
#> 15                          https://id.biodiversity.org.au/node/apni/2916672
#> 16 https://biodiversity.org.au/afd/taxa/6e3213e1-c33b-45ae-9e05-6019211dd454
#> 17 https://biodiversity.org.au/afd/taxa/5ae49193-9e02-4a8d-a573-112cbf35c350
#> 18 https://biodiversity.org.au/afd/taxa/8742561c-c7e4-445c-97b6-ac63db433f6c
#> 19                                                           ALA_DR22912_428
#> 20 https://biodiversity.org.au/afd/taxa/a610443a-5830-4d3c-a34a-092bf3adcb96
#> 21                        https://id.biodiversity.org.au/taxon/apni/51726566
#> 22                        https://id.biodiversity.org.au/taxon/apni/51285233
#> 23                                                                      <NA>
#> 24                          https://id.biodiversity.org.au/node/apni/2887920
#> 25                          https://id.biodiversity.org.au/node/apni/2887920
#> 26 https://biodiversity.org.au/afd/taxa/bf0614d9-26df-43f0-bfe8-b6d16d5d640a
#> 27 https://biodiversity.org.au/afd/taxa/1ebf8ec6-9aee-4b58-8789-0be73eb97b2f
#>          rank     match_type  kingdom     phylum         class           order
#> 1     species canonicalMatch Animalia   Chordata          Aves   Passeriformes
#> 2  subspecies     exactMatch Animalia   Chordata          Aves   Passeriformes
#> 3  subspecies     exactMatch Animalia   Chordata          Aves   Passeriformes
#> 4     species     exactMatch Animalia   Chordata          Aves   Passeriformes
#> 5     species     exactMatch Animalia   Chordata          Aves   Passeriformes
#> 6  subspecies     exactMatch Animalia   Chordata      Mammalia   Diprotodontia
#> 7     species    higherMatch Animalia   Chordata          Aves Charadriiformes
#> 8     species     exactMatch Animalia   Chordata          Aves Charadriiformes
#> 9       genus     exactMatch  Plantae Charophyta Equisetopsida        Myrtales
#> 10    species     exactMatch  Plantae Charophyta Equisetopsida        Myrtales
#> 11 subspecies     exactMatch  Plantae Charophyta Equisetopsida        Myrtales
#> 12       <NA>           <NA>     <NA>       <NA>          <NA>            <NA>
#> 13    species canonicalMatch Animalia   Chordata      Reptilia        Squamata
#> 14      genus     exactMatch  Plantae Charophyta Equisetopsida  Caryophyllales
#> 15    species    higherMatch  Plantae Charophyta Equisetopsida      Santalales
#> 16    species canonicalMatch Animalia   Chordata      Mammalia   Diprotodontia
#> 17 subspecies     exactMatch Animalia   Chordata          Aves   Passeriformes
#> 18 subspecies     exactMatch Animalia   Chordata          Aves   Passeriformes
#> 19 subspecies     exactMatch Animalia   Chordata      Mammalia Peramelemorphia
#> 20    species canonicalMatch Animalia   Chordata      Mammalia   Diprotodontia
#> 21      genus     exactMatch  Plantae Charophyta Equisetopsida     Asparagales
#> 22    species    higherMatch  Plantae Charophyta Equisetopsida         Fabales
#> 23       <NA>           <NA>     <NA>       <NA>          <NA>            <NA>
#> 24    variety     exactMatch  Plantae Charophyta Equisetopsida         Rosales
#> 25    variety     exactMatch  Plantae Charophyta Equisetopsida         Rosales
#> 26       <NA>     exactMatch Animalia Arthropoda       Insecta       Blattodea
#> 27    species     exactMatch Animalia   Chordata          Aves Charadriiformes
#>            family        genus                species
#> 1       Maluridae    Amytornis     Amytornis modestus
#> 2       Maluridae    Amytornis     Amytornis modestus
#> 3       Maluridae    Amytornis     Amytornis modestus
#> 4       Maluridae    Amytornis     Amytornis striatus
#> 5       Maluridae    Amytornis       Amytornis whitei
#> 6      Potoroidae    Bettongia      Bettongia lesueur
#> 7    Charadriidae   Charadrius    Charadrius mongolus
#> 8    Scolopacidae   Phalaropus     Phalaropus lobatus
#> 9       Myrtaceae   Eucalyptus                   <NA>
#> 10      Myrtaceae   Eucalyptus   Eucalyptus viminalis
#> 11      Myrtaceae   Eucalyptus   Eucalyptus viminalis
#> 12           <NA>         <NA>                   <NA>
#> 13     Gekkonidae       Gehyra         Gehyra montium
#> 14 Chenopodiaceae  Tecticornia                   <NA>
#> 15    Santalaceae Korthalsella  Korthalsella japonica
#> 16   Macropodidae Lagorchestes  Lagorchestes hirsutus
#> 17   Meliphagidae Melithreptus   Melithreptus gularis
#> 18   Meliphagidae Melithreptus   Melithreptus gularis
#> 19    Peramelidae    Perameles       Perameles gunnii
#> 20   Macropodidae    Petrogale    Petrogale lateralis
#> 21    Orchidaceae  Pterostylis                   <NA>
#> 22       Fabaceae        Senna    Senna artemisioides
#> 23           <NA>         <NA>                   <NA>
#> 24     Rhamnaceae    Spyridium Spyridium eriocephalum
#> 25     Rhamnaceae    Spyridium Spyridium eriocephalum
#> 26           <NA>         <NA>                   <NA>
#> 27   Charadriidae    Thinornis   Thinornis cucullatus
#>                                  vernacular_name               stamp
#> 1                         Thick-billed Grasswren 2025-06-17 07:25:46
#> 2                 Cowarie Thick-billed Grasswren 2025-06-17 07:25:46
#> 3       Macdonnell Ranges Thick-billed Grasswren 2025-06-17 07:25:46
#> 4                             Striated Grasswren 2025-06-17 07:25:46
#> 5                                           <NA> 2025-06-17 07:25:46
#> 6  Burrowing Bettong (barrow And Boodie Islands) 2025-06-17 07:25:44
#> 7                             Lesser Sand Plover 2025-06-17 07:25:44
#> 8                           Red-necked Phalarope 2025-06-17 07:25:44
#> 9                               Studley Park Gum 2025-06-17 07:25:44
#> 10                                    Ribbon Gum 2025-06-17 07:25:44
#> 11                        Rough-barked Manna-gum 2025-06-17 07:25:44
#> 12                                          <NA> 2025-06-17 07:25:44
#> 13                             Centralian Dtella 2025-06-17 07:25:44
#> 14                                     Samphires 2025-06-17 07:25:44
#> 15                             Jointed Mistletoe 2025-06-17 07:25:44
#> 16                           Rufous Hare-wallaby 2025-06-17 07:25:44
#> 17            Black-chinned Honeyeater (eastern) 2025-06-17 07:25:44
#> 18                      Golden-backed Honeyeater 2025-06-17 07:25:44
#> 19           Eastern Barred Bandicoot (mainland) 2025-06-17 07:25:44
#> 20                     Black-footed Rock-wallaby 2025-06-17 07:25:44
#> 21                                    Greenhoods 2025-06-17 07:25:44
#> 22                                 Desert Cassia 2025-06-17 07:25:44
#> 23                                          <NA> 2025-06-17 07:25:44
#> 24                        Macgillivray Spyridium 2025-06-17 07:25:44
#> 25                        Macgillivray Spyridium 2025-06-17 07:25:44
#> 26                                    White Ants 2025-06-17 07:25:44
#> 27                                 Hooded Plover 2025-06-17 07:25:44
#>                                                subspecies   rank_adj
#> 1                                                    <NA>    species
#> 2                              Amytornis modestus cowarie subspecies
#> 3                             Amytornis modestus modestus subspecies
#> 4                                                    <NA>    species
#> 5                                                    <NA>    species
#> 6  Bettongia lesueur Barrow and Boodie Islands subspecies subspecies
#> 7                                                    <NA>    species
#> 8                                                    <NA>    species
#> 9                                                    <NA>      genus
#> 10                                                   <NA>    species
#> 11                Eucalyptus viminalis subsp. cygnetensis subspecies
#> 12                                                   <NA>       <NA>
#> 13                                                   <NA>    species
#> 14                                                   <NA>      genus
#> 15                                                   <NA>    species
#> 16                                                   <NA>    species
#> 17                           Melithreptus gularis gularis subspecies
#> 18                           Melithreptus gularis laetior subspecies
#> 19                  Perameles gunnii Victorian subspecies subspecies
#> 20                                                   <NA>    species
#> 21                                                   <NA>      genus
#> 22                                                   <NA>    species
#> 23                                                   <NA>       <NA>
#> 24              Spyridium eriocephalum var. glabrisepalum subspecies
#> 25              Spyridium eriocephalum var. glabrisepalum subspecies
#> 26                                                   <NA>       <NA>
#> 27                                                   <NA>    species
#>    original_is_tri original_is_bi
#> 1            FALSE           TRUE
#> 2             TRUE          FALSE
#> 3             TRUE          FALSE
#> 4            FALSE           TRUE
#> 5            FALSE           TRUE
#> 6             TRUE          FALSE
#> 7            FALSE           TRUE
#> 8            FALSE           TRUE
#> 9            FALSE          FALSE
#> 10           FALSE           TRUE
#> 11            TRUE          FALSE
#> 12           FALSE           TRUE
#> 13           FALSE           TRUE
#> 14           FALSE          FALSE
#> 15            TRUE          FALSE
#> 16            TRUE          FALSE
#> 17            TRUE          FALSE
#> 18            TRUE          FALSE
#> 19            TRUE          FALSE
#> 20            TRUE          FALSE
#> 21           FALSE           TRUE
#> 22           FALSE           TRUE
#> 23           FALSE           TRUE
#> 24            TRUE          FALSE
#> 25            TRUE          FALSE
#> 26           FALSE          FALSE
#> 27           FALSE           TRUE
#> 
#> $subspecies
#> $subspecies$lutaxa
#> # A tibble: 25 × 7
#>    original_name     match_type matched_rank returned_rank taxa  original_is_tri
#>    <chr>             <chr>      <fct>        <ord>         <chr> <lgl>          
#>  1 Amytornis modest… canonical… species      species       Amyt… FALSE          
#>  2 Amytornis modest… exactMatch subspecies   subspecies    Amyt… TRUE           
#>  3 Amytornis modest… exactMatch subspecies   subspecies    Amyt… TRUE           
#>  4 Amytornis striat… exactMatch species      species       Amyt… FALSE          
#>  5 Amytornis whitei  exactMatch species      species       Amyt… FALSE          
#>  6 Bettongia lesueu… exactMatch subspecies   subspecies    Bett… TRUE           
#>  7 Charadrius mongo… higherMat… species      species       Char… FALSE          
#>  8 Charadrius rubri… exactMatch species      species       Phal… FALSE          
#>  9 Eucalyptus        exactMatch genus        genus         Euca… FALSE          
#> 10 Eucalyptus vimin… exactMatch species      species       Euca… FALSE          
#> # ℹ 15 more rows
#> # ℹ 1 more variable: original_is_bi <lgl>
#> 
#> $subspecies$taxonomy
#> # A tibble: 24 × 9
#>    taxa               kingdom phylum class order family genus species subspecies
#>    <chr>              <chr>   <chr>  <chr> <chr> <chr>  <chr> <chr>   <chr>     
#>  1 Amytornis modestus Animal… Chord… Aves  Pass… Malur… Amyt… Amytor… NA        
#>  2 Amytornis modestu… Animal… Chord… Aves  Pass… Malur… Amyt… Amytor… Amytornis…
#>  3 Amytornis modestu… Animal… Chord… Aves  Pass… Malur… Amyt… Amytor… Amytornis…
#>  4 Amytornis striatus Animal… Chord… Aves  Pass… Malur… Amyt… Amytor… NA        
#>  5 Amytornis whitei   Animal… Chord… Aves  Pass… Malur… Amyt… Amytor… NA        
#>  6 Bettongia lesueur… Animal… Chord… Mamm… Dipr… Potor… Bett… Betton… Bettongia…
#>  7 Charadrius mongol… Animal… Chord… Aves  Char… Chara… Char… Charad… NA        
#>  8 Phalaropus lobatus Animal… Chord… Aves  Char… Scolo… Phal… Phalar… NA        
#>  9 Eucalyptus         Plantae Charo… Equi… Myrt… Myrta… Euca… NA      NA        
#> 10 Eucalyptus vimina… Plantae Charo… Equi… Myrt… Myrta… Euca… Eucaly… NA        
#> # ℹ 14 more rows
#> 
#> 

  # Try automatic overrides
  auto_overrides <- make_unmatched_overrides(df = taxa_df
                                             , taxa_col = "taxa"
                                             , taxonomy = taxonomy
                                             , target_rank = "species"
                                             )
#> Joining with `by = join_by(original_name)`
#> Warning: There was 1 warning in `dplyr::mutate()`.
#>  In argument: `res = purrr::map(...)`.
#> Caused by warning:
#> ! Unknown or uninitialised column: `scientificName`.
#> Joining with `by = join_by(original_name)`
#> Error in dplyr::select(dplyr::filter(dplyr::bind_rows(mget(ls(pattern = "^unmatched_")),     .id = "note"), !is.na(!!rlang::ensym(taxa_col))), !!rlang::ensym(taxa_col),     tidyr::any_of(tidyr::matches(unique(c(target_rank, "species")))),     rank, scientific_name, kingdom, note): Can't select columns that don't exist.
#>  Column `rank` doesn't exist.

  # overrrides
  overrides <- envClean::taxonomy_overrides

  # C. rubricollis binned to Phalarope lobatus at species level!
  taxonomy <- make_taxonomy(df = overrides
                            , taxonomy_file = temp_file
                            , needed_ranks = c("species", "subspecies")
                            )
#> Joining with `by = join_by(original_name)`
#> Matched 22 of 27 taxonomic search terms in selected atlas (Australia).
#> 5 unmatched search terms:
#>  "Aptenodes patagonicus", "Corybas", "Corybas" + 2 more
#> 
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> saving results to C:/temp/nige\RtmpEBQxW6\file34d81c9d7718.parquet
#> The following were completely unmatched: Aptenodes patagonicus, Corybas X dentatus, Galaxias sp. nov. Hunter, Puffinis gravis, Puffinus griseus, Some rubbish and Corybas x dentatus. Consider providing more taxonomic levels, or an override, for each unmatched taxa?
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`

  taxonomy$species$lutaxa %>%
    dplyr::filter(grepl("rubricollis", original_name))
#> # A tibble: 2 × 7
#>   original_name      match_type matched_rank returned_rank taxa  original_is_tri
#>   <chr>              <chr>      <ord>        <ord>         <chr> <lgl>          
#> 1 Charadrius rubric… exactMatch species      species       Phal… FALSE          
#> 2 Thinornis rubrico… higherMat… species      species       Thin… TRUE           
#> # ℹ 1 more variable: original_is_bi <lgl>

  # add in override - C. rubricollis is binned to T. cucullatus at species level
  taxonomy <- make_taxonomy(df = overrides
                            , taxonomy_file = temp_file
                            , needed_ranks = c("species", "subspecies")
                            , overrides = overrides
                            )
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name, returned_rank)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> saving results to C:/temp/nige\RtmpEBQxW6\file34d81c9d7718.parquet
#> The following were completely unmatched: Galaxias sp. nov. Hunter and Some rubbish. Consider providing more taxonomic levels, or an override, for each unmatched taxa?
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`

  taxonomy$species$lutaxa %>%
    dplyr::filter(grepl("rubricollis", original_name))
#> # A tibble: 2 × 8
#>   original_name      match_type matched_rank returned_rank taxa  original_is_tri
#>   <chr>              <chr>      <ord>        <ord>         <chr> <lgl>          
#> 1 Charadrius rubric… exactMatch species      species       Thin… FALSE          
#> 2 Thinornis rubrico… exactMatch subspecies   species       Thin… TRUE           
#> # ℹ 2 more variables: original_is_bi <lgl>, override <lgl>


  # tweak_species example
  make_taxonomy(df = tibble::tibble(original_name = "Acacia sp. Small Red-leaved Wattle (J.B.Williams 95033)")
                , tweak_species = FALSE
                )$raw %>%
    dplyr::select(original_name, scientific_name, species)
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> saving results to C:/temp/nige\RtmpEBQxW6\file34d813fcb92.parquet
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> # A tibble: 1 × 3
#>   original_name                                          scientific_name species
#>   <chr>                                                  <chr>           <chr>  
#> 1 Acacia sp. Small Red-leaved Wattle (J.B.Williams 9503… Acacia sp. Sma… NA     

  make_taxonomy(df = tibble::tibble(original_name = "Acacia sp. Small Red-leaved Wattle (J.B.Williams 95033)")
                , tweak_species = TRUE
                )$raw %>%
    dplyr::select(original_name, scientific_name, species)
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> saving results to C:/temp/nige\RtmpEBQxW6\file34d86c6e272.parquet
#> Joining with `by = join_by(original_name)`
#> Joining with `by = join_by(original_name)`
#> # A tibble: 1 × 3
#>   original_name                                          scientific_name species
#>   <chr>                                                  <chr>           <chr>  
#> 1 Acacia sp. Small Red-leaved Wattle (J.B.Williams 9503… Acacia sp. Sma… NA     

  # clean up
  rm(taxonomy)
  unlist(paste0(temp_file, ".parquet"))
#> [1] "C:/temp/nige\\RtmpEBQxW6\\file34d81c9d7718.parquet"