% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/flag_florabr.R
\name{flag_florabr}
\alias{flag_florabr}
\title{Identify records outside natural ranges according to Flora e Funga do Brasil}
\usage{
flag_florabr(
  data_dir,
  occ,
  species = "species",
  long = "decimalLongitude",
  lat = "decimalLatitude",
  origin = NULL,
  by_state = TRUE,
  buffer_state = 20,
  by_biome = TRUE,
  buffer_biome = 20,
  by_endemism = TRUE,
  buffer_brazil = 20,
  state_vect = NULL,
  state_column = NULL,
  biome_vect = NULL,
  biome_column = NULL,
  br_vect = NULL,
  keep_columns = TRUE,
  progress_bar = FALSE,
  verbose = FALSE
)
}
\arguments{
\item{data_dir}{(character) directory path where the \code{florabr} data is
saved \strong{Required.}}

\item{occ}{(data.frame) a data frame containing the occurrence records to be
flagged. Must contain columns for species, longitude, and latitude.}

\item{species}{(character) the name of the column in \code{occ} that contains the
species scientific names. Default is \code{"species"}.}

\item{long}{(character) the name of the column in \code{occ} that contains the
longitude values. Default is \code{"decimalLongitude"}.}

\item{lat}{(character) the name of the column in \code{occ} that contains the
latitude values. Default is \code{"decimalLatitude"}.}

\item{origin}{(character or NULL) filter the \code{florabr} data by origin type
before checking (\code{"native"}, \code{"cultivated"}, \code{"naturalized"}, \code{"unknown"},
or \code{"not_found_in_brazil"}). Default is \code{NULL} (no filtering).}

\item{by_state}{(logical) if \code{TRUE}, flags records based on their distance
to known Brazilian state distributions. Default is \code{TRUE}.}

\item{buffer_state}{(numeric) buffer distance (in kilometers) to be applied
around the known state distribution boundaries. Records within this distance
are considered valid. Default is 20 km.}

\item{by_biome}{(logical) if \code{TRUE}, flags records based on their
distance to known Brazilian biome distributions. Default is \code{TRUE}.}

\item{buffer_biome}{(numeric) buffer distance (in kilometers) to be
applied around the known biome distribution boundaries. Records within this
distance are considered valid. Default is 20 km.}

\item{by_endemism}{(logical) if \code{TRUE}, includes a check against the entire
Brazilian boundary. Default is \code{TRUE}.}

\item{buffer_brazil}{(numeric) buffer distance (in kilometers) to be applied
around the entire Brazilian boundary. Default is 20 km.}

\item{state_vect}{(SpatVector) qn optional custom simple features
(\code{sf}) vector representing Brazilian states/regions. If \code{NULL}, uses the
default data loaded by \code{florabr}. Default is \code{NULL}.}

\item{state_column}{(character) the name of the column in \code{state_vect}
(or the default state vector) used to match distribution information.
Default is \code{NULL}.}

\item{biome_vect}{(SpatVector) an optional custom simple features (\code{sf})
vector representing Brazilian biomes. If \code{NULL}, uses the default data
loaded by \code{florabr}. Default is \code{NULL}.}

\item{biome_column}{(character) the name of the column in \code{biome_vect}
(or the default biome vector) used to match distribution information.
Default is \code{NULL}.}

\item{br_vect}{(SpatVector) an optional custom simple features (\code{sf}) vector
representing the entire Brazilian boundary. If \code{NULL}, uses the default data
loaded by \code{florabr}. Default is \code{NULL}.}

\item{keep_columns}{(logical) if \code{TRUE}, the returned data frame contains
all original columns from \code{occ}. If \code{FALSE}, it returns only the key columns
and the flag. Default is \code{TRUE}.}

\item{progress_bar}{(logical) whether to display a progress bar during
processing. If TRUE, the 'pbapply' package must be installed. Default is
\code{FALSE}.}

\item{verbose}{(logical) if \code{TRUE}, prints messages about the progress and
the number of species being checked. Default is \code{FALSE}.}
}
\value{
A \code{data.frame} that is the original \code{occ} data frame
augmented with a new column named \code{florabr_flag}. This column is
logical (\code{TRUE}/\code{FALSE}) indicating whether the record falls
within the expected distribution (plus buffer) based on the \code{florabr}
data. Records for species not found in the \code{florabr} data will have
\code{NA} in the \code{florabr_flag} column.
}
\description{
Flags (validates) occurrence records based on known distribution data
from the Flora e Funga do Brasil (florabr) data. This function checks if an
occurrence point for a given species falls within its documented distribution,
allowing for user-defined buffers around Brazilian states, biomes, or the
entire country. Records are flagged as valid (\code{TRUE}) if they fall within
the specified range for the distribution information available in the
\code{florabr} data.
}
\examples{
# Load example data
data("occurrences", package = "RuHere")
# Get only occurrences from Araucaria
occ <- occurrences[occurrences$species == "Araucaria angustifolia", ]
# Set folder where distributional datasets were saved
# Here, just a sample provided in the package
# You must run 'florabr_here()' beforehand to download the necessary data files for your species
dataset_dir <- system.file("extdata/datasets", package = "RuHere")

# Flag records using specialist information from Flora do Brasil
occ_flora <- flag_florabr(data_dir = dataset_dir, occ = occ)

}
