% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AMBI.R
\name{AMBI}
\alias{AMBI}
\title{Calculates AMBI, the AZTI Marine Biotic Index}
\usage{
AMBI(
  df,
  by = NULL,
  var_rep = NA_character_,
  var_species = "species",
  var_count = "count",
  df_species = NULL,
  var_group_AMBI = "group",
  groups_strict = TRUE,
  quiet = FALSE,
  interactive = FALSE,
  format_pct = NA,
  show_class = TRUE,
  exact_species_match = FALSE
)
}
\arguments{
\item{df}{a dataframe of species observations}

\item{by}{a vector of column names found in \code{df} by which calculations
should be grouped \emph{e.g. c("station","date")}}

\item{var_rep}{\emph{optional} column name in \code{df} which contains the name of
the column identifying replicates. If replicates are used,
the AMBI index will be calculated for each replicate before
an average is calculated for each combination of \code{by}
variables. If the Shannon diversity index \code{H} is calculated
this will be done for species counts collected within \code{by}
groups without any consideration of replicates.}

\item{var_species}{name of the column in \code{df} containing species names}

\item{var_count}{name of the column in \code{df} containing count/density/abundance}

\item{df_species}{\emph{optional} dataframe of user-specified species groups. By default,
the function matches species in \code{df} with the official species
list from AZTI. If a dataframe with a user-defined list of
species is provided, then a search for species groups will
also be made in this list. \emph{see \href{#species-matching-and-interactive-mode}{Details}}.}

\item{var_group_AMBI}{\emph{optional} name of the column in \code{df_species}
containing the groups for the AMBI index calculations. These
should be specified as integer values from 1 to 7. Any other
values will be ignored. If \code{df_species} is not specified
then \code{var_group_AMBI} will be ignored.}

\item{groups_strict}{By default, any user-assigned species group which
conflicts with an original AMBI group assignment will be
ignored and the original group remains unchanged. If the argument
\code{groups_strict = FALSE} is used then user-assigned groups
will always override AMBI groups in case of conflict.
\emph{DO NOT use this option unless you are sure you know what
you are doing! It could invalidate your results.}}

\item{quiet}{warnings about low numbers of species and/or individuals
are contained in the \code{warnings} dataframe. By default
(\code{quiet = FALSE}) these warnings are also shown in the console.
If the function is called with the parameter \code{quiet = TRUE}
then warnings will not be displayed in the console.}

\item{interactive}{(default \code{FALSE}) if a species name in the input data is not
found in the AMBI species list, then this will be seen in
the output dataframe \code{matched}. If \emph{interactive} mode is
selected, the user will be given the opportunity to assign
\emph{manually} a species group (\emph{I, II, III, IV, V}) or to
mark the species as \emph{not assigned} to a species group (see
details).}

\item{format_pct}{(\emph{optional}) By default, frequency results including the
fraction of total numbers within each species group are
expressed as real numbers . If this is argument is
given a positive integer value (e.g. \code{format_pct = 2})
then the fractions are expressed as percentages
with the number of digits shown after the decimal point
equal to the number specified. \emph{NOTE} by formatting as
percentages, values are converted to text and may lose
precision.}

\item{show_class}{(default \code{TRUE}). If \code{TRUE} then the \code{AMBI} results will
include a column showing the AMBI disturbance classification
\emph{Undisturbed}, \emph{Slightly disturbed}, \emph{Moderately disturbed},
or \emph{Heavily disturbed}.}

\item{exact_species_match}{by default, a family name without \emph{sp.} will
be matched with a family name on the AMBI (or
user-specified) species list which includes \emph{sp.}. If
the option \code{exact_species_match = TRUE} is used, species
names will be matched only with identical names.}
}
\value{
a list of dataframes:
\itemize{
\item \code{AMBI} : results of the AMBI index calculations. For each unique
combination of \code{by} variables, the following values are calculated:
\itemize{
\item \code{AMBI} : the AMBI index value
\item \code{AMBI_SD} : sample standard deviation of AMBI \emph{included only
when replicates are used}
has specified \code{var_rep}.
\item \code{N} : number of individuals
\item \code{S} : number of species
\item \code{H} : Shannon diversity index \emph{H'}
\item \code{fNA} : fraction of individuals \emph{not assigned}, that is, matched to
a species in the AMBI species list with \emph{Group 0}. Note that this is
different from the number of rows where no match was found. Species not
matched are excluded from the totals.
}
\item \code{AMBI_rep} : results of the AMBI index calculations \emph{per replicate}. This
dataframe is present only if the observation data includes replicates and
the user has specified \code{var_rep}. Similar to the main \code{AMBI} result but does
not include results for \code{H} (Shannon diversity index) or for \code{AMBI_SD}
(sample standard deviation of AMBI) which are not estimated at replicate level.
\item \code{matched} : the original dataframe with columns added from the species list.
Contains the following columns:
\itemize{
\item \code{group} : showing the species group. Any species/taxa in \code{df} which were not
matched will have an \code{NA} value in this column.
\item \code{RA} : a value of \code{1} indicates that the species is \emph{reallocatable} according to the
AMBI list. That is, it could be re-assigned to a different species group.
\item \code{source} : this column is included only if a user-specified list was
provided \code{df_species}, or if species groups were assigned interactively.
An \code{"I"} in this column indicates that the group was assigned interactively.
A \code{"U"} shows that the group information came from a user-provided species
list. An \code{NA} value indicates that no interactive or user-provided changes
were applied.
}
\item \code{warnings} : a dataframe showing warnings for any combination of \code{by}
variables a warning where
\itemize{
\item The percentage of individuals not assigned to a group is higher than 20\%
\item The (not null) number of species is less than 3
\item The (not null) number of individuals is less than 6
}
}
}
\description{
\code{\link[=AMBI]{AMBI()}} matches a list of species counts with the official AMBI species list
and calculates the AMBI index.
}
\details{
The theory behind the AMBI index calculations and details of the method, as
developed by \href{#references}{Borja et al. (2000)},
\subsection{AMBI method}{

Species can be matched to one of five groups, the distribution of individuals
between the groups reflecting different levels of stress on the ecosystem.
\itemize{
\item \emph{Group I}. Species very sensitive to organic enrichment
and present under unpolluted conditions (initial state). They include the
specialist carnivores and some deposit- feeding \emph{tubicolous polychaetes}.
\item \emph{Group II}. Species indifferent to enrichment, always present in low densities with
non-significant variations with time (from initial state, to slight unbalance).
These include suspension feeders, less selective carnivores and scavengers.
\item \emph{Group III}. Species tolerant to excess organic matter enrichment. These species
may occur under normal conditions, but their populations are stimulated by
organic enrichment (slight unbalance situations). They are surface
deposit-feeding species, as \emph{tubicolous spionids}.
\item \emph{Group IV}. Second-order opportunistic species (slight to pronounced unbalanced
situations). Mainly small sized \emph{polychaetes}: subsurface deposit-feeders,
such as \emph{cirratulids}.
\item \emph{Group V}. First-order opportunistic species (pronounced unbalanced
situations). These are deposit- feeders, which proliferate in reduced
sediments.
}

The distribution of individuals between these ecological groups, according
to their sensitivity to pollution stress, gives a biotic index ranging
from 0.0 to 6.0.

\eqn{Biotic\ Index = 0.0 * f_{I} + 1.5 * f_{II} + 3.0 * f_{III} + 4.5 * f_{IV} + 6.0 * f_V}

where:

\eqn{f_i} = fraction of individuals in Group \eqn{i \in\{I, II, III, IV, V\}}

Under certain circumstances, the AMBI index should not be used:
\itemize{
\item The percentage of individuals not assigned to a group is higher than 20\%
\item The (not null) number of species is less than 3
\item The (not null) number of individuals is less than 6
}

In these cases the function will still perform the calculations but will
also return a warning.(see below)
}

\subsection{Results}{

The output of the function consists of a list of at least three dataframes:
\itemize{
\item \code{AMBI} containing the calculated \code{AMBI} index, as well as other information.
\item (\code{AMBI_rep}) generated only if replicates are used, showing the \code{AMBI} index
for each replicate.
\item \code{matched} showing the species matches used.
\item \code{warnings} containing any warnings generated regarding numbers of of species
or numbers of individuals.
}
}

\subsection{Species matching and interactive mode}{

The function will check for a species list supplied in the function call
using the argument \code{df_species}, if this is specified. The function will
also search for names in the AMBI standard list. After this, if no match
is found in either, then the species will be recorded with a an \code{NA}value
for species group and will be ignored in calculations.

By calling the function once and then checking the output from this first
function call, the user can identify species names which were  not matched.
Then, if necessary, they can provide or update a dataframe with a list of
user-defined species group assignments, before running the function a
second time.
\subsection{Conflicts}{

If there is a conflict between a user-provided group assignment for a species
and the group specified in the AMBI species group information, only one of
them will be selected. The outcome depends on a number of things:
\itemize{
\item some species in the AMBI list are considered \emph{reallocatable} (RA) - that is,
there can be disagreement about which species group they should belong to.
For these species, any user-specified groups will replace the default group.
\item if a species is not \emph{reallocatable}, then any user-specified groups will
\emph{by default} be ignored. However, if the function is called with the argument
\code{groups_strict = FALSE} then the user-specified groups will override AMBI
species groups.
}

Any conflicts and their outcomes will be recorded in
the \code{matched} output.
}

\subsection{\emph{interactive} mode}{

If the function is called using the argument \code{interactive = TRUE} then the
user has an opportunity to \emph{manually} assign species groups
(\emph{I, II, III, IV, V}) for any species names which were not identified.
The user does this by typing \code{1}, \code{2}, \code{3},  \code{4} or \code{5} and pressing \emph{Enter}.
Alternatively, the user can type \code{0} to mark
the species as recognised but not assigned to a group. By typing \emph{Enter} without
any number the species will be recorded as unidentified (\code{NA}). This is the
same result which would have been returned when calling the function in
non-interactive mode. There are two other options: typing \code{s} will display a
list of 10 species names which occur close to the unrecognised name when names
are sorted in alphabetical order. Entering \code{s} a second time will display the
next 10 names, and so on. Finally, entering \code{x} will abort the interactive
species assignment process. Any species groups assigned manually at this point
will be discarded and the calculations will process as in the non-interactive mode.

Any user-provided group information will be recorded in the \code{matched} results.

See \code{vignette("interactive")} for an example.
}

}
}
\examples{

# example (1) - using test data included with package

  AMBI(test_data, by = c("station"), var_rep = "replicate")


# example (2)

  df <- data.frame(station = c("1", "1", "2", "2", "2"),
  species = c("Acidostoma neglectum",
            "Acrocirrus validus",
            "Acteocina bullata",
            "Austrohelice crassa",
            "Capitella nonatoi"),
            count = c(2, 4, 5, 3, 7))

  \donttest{ AMBI(df, by = c("station"))}


# example (3) - conflict with AZTI species group

  df_user <- data.frame(
              species = c("Cumopsis fagei"),
              group = c(1))

  \donttest{AMBI(test_data, by = c("station"), var_rep = "replicate", df_species = df_user)}


}
\references{
Borja, Á., Franco, J., Pérez, V. (2000). “A Marine Biotic Index to Establish the Ecological Quality of Soft-Bottom Benthos Within European Estuarine and Coastal Environments.” \emph{Marine Pollution Bulletin} 40 (12) 1100–1114. \doi{doi:10.1016/S0025-326X(00)00061-8}.
}
\seealso{
\code{\link[=MAMBI]{MAMBI()}} which calculates \emph{M-AMBI} the multivariate AMBI
index using results of \code{AMBI()}.
}
