---
title: "Using neonPlantEcology"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{neonPlantEcology}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
options(rmarkdown.html_vignette.check_title = FALSE)
```

```{r setup}
library(dplyr)
library(tidyr)
library(stringr)
library(ggplot2)
library(neonPlantEcology)
library(vegan)
library(sf)
library(ggpubr)
```

# Doing a community ecology analysis using `npe_community_matrix` and {vegan}

## Setup

Load the `tidyverse` library along with `neonPlantEcology`. There is a 
pre-installed dataset which contains the neonUtilities-generated list object 
for the Jornada Experimental Range ("JORN") and the Santa Rita Experimental 
Range ("SRER") from domain 14. This can be downloaded directly by uncommenting
the following code, or just call `data("D14")`.


```{r getdata}
# D14 <- npe_site_ids(domain = "D14") |>
#   npe_download_plant_div(sites = .)

data("D14")

```

## Wrangle the data

Use `npe_community_matrix` to get the data into the needed format. By default, 
it aggregates annually to the (20m x 20m) plot scale. Use npe_cm_metadata to get 
all of the information from the rownames into a more interpretable format. Plot
centroids, along with additional plot-level metadata, can be loaded with 
`data("plot_centroids")`.

```{r wrangle}
comm <- npe_community_matrix(D14)
comm[1:4, 1:4]

data("plot_centroids")
plot_centroids <- sf::st_set_geometry(plot_centroids, NULL)
metadata <- npe_cm_metadata(comm) |>
  dplyr::left_join(plot_centroids)


```

## `vegan` analysis

first we'll do a species accumulation curve for each site.

```{r sac}

# checking the metadata file to see where JORN stops and SRER begins
which(metadata$site == "JORN") |> range()
which(metadata$site == "SRER") |> range()

# performing and plotting the species accumulation curve analysis
sp_jorn <- vegan::specaccum(comm[1:152,])
sp_srer <- vegan::specaccum(comm[152:347,])
plot(sp_srer, col = "blue")
plot(sp_jorn, col = "gold", add=T)
```

## NMDS

The following code reproduces figure 3 in Mahood et al 2024 (reference ).

```{r nmds, fig.width=7.5, fig.height=6}
nmds <- metaMDS(comm,trace = F)

nmds_sites <- nmds$points |>
  as_tibble(rownames = "rowname") |>
  left_join(metadata)

ggplot(nmds_sites, aes(x=MDS1, y=MDS2, color = eventID, size = elevation, shape = site)) +
  geom_point() +
  theme_classic() +
  scale_color_brewer(palette = "Set1") +
  theme(panel.background = element_rect(fill=NA, color= "black"))

```

# getting summary info by site using `npe_summary`

## Species richness by biogeographical origin

```{r nspp}
di <- npe_summary(D14, scale = "site", timescale = "all")

dj<- di |>
  dplyr::select(site, eventID, starts_with("nspp")) |>
  tidyr::pivot_longer(cols = starts_with("nspp")) |>
  dplyr::filter(!name %in% c("nspp_notexotic", "nspp_total")) |>
  dplyr::transmute(origin = str_remove_all(name, "nspp_"),
            nspp = value,
            site=site)

p1<-ggplot(dj, aes(x=nspp, y=site, fill = origin)) +
  geom_bar(stat = "identity", color = "black", lwd = .2) +
  xlab("Species Richness") +
  ylab("Site")

```

## Relative cover by biogeographical origin

```{r rc}
 dk<- di |>
  dplyr::select(site, eventID, starts_with("rel_cover")) |>
  pivot_longer(cols = starts_with("rel_cover")) |>
  filter(!name %in% c("rel_cover_notexotic", "rel_cover_total")) |>
  transmute(origin = str_remove_all(name, "rel_cover_"),
            relative_cover = value,
            site=site)

p2<- ggplot(dk, aes(x=relative_cover, y=site, fill = origin)) +
  geom_bar(stat = "identity", color = "black", lwd = .2) +
  xlab("Relative Cover")
```

## Alpha biogeographical diversity

```{r alphabeta}
dl <- di|>
  dplyr::select(site, eventID, starts_with("shannon")) |>
  pivot_longer(cols = starts_with("shannon")) |>
  filter(!name %in% c("shannon_notexotic", "shannon_total", "shannon_family")) |>
  transmute(origin = str_remove_all(name, "shannon_"),
            shannon = value,
            site=site)

p3<- ggplot(dl, aes(x = shannon, y = site, fill = origin)) +
  geom_bar(stat = 'identity', color = "black", lwd = .2) +
  xlab("Shannon-Weiner Diveristy")

```

## Make a multipanel figure

```{r multipanel}
ggpubr::ggarrange(p1, 
                  p2 + theme(axis.title.y = element_blank(), 
                                 axis.text.y = element_blank(), 
                                 axis.ticks.y = element_blank()), 
                  p3 + theme(axis.title.y = element_blank(), 
                                 axis.text.y = element_blank(), 
                                 axis.ticks.y = element_blank()), 
                  nrow=1, ncol=3, 
                  common.legend = TRUE, 
                  widths = c(1.35,1,1))


```

# Family cover through time using `npe_longform`

```{r family}
data("D14")
lf <- npe_longform(D14, scale = "plot", timescale = "annual")

lf_f <- lf |>
  mutate(family = ifelse(!family %in% c("Poaceae", "Fabaceae"), "Other", family)) |>
  group_by(site, plotID, eventID, family) |>
  summarise(cover = sum(cover, na.rm=T)) |>
  ungroup()
  
ggplot(lf_f, aes(x=eventID, y=cover, fill = family)) +
  geom_boxplot(position="dodge") +
  facet_wrap(~site, scales = "free_x") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1", name = "Family") +
  ylab("Cover") +
  xlab("eventID (Annual Timestep)")

```