This report is automatically generated with the R package knitr (version 1.40) .

---
title: |
       | Delta Hg TMDL Review
       | Linkage Analysis - Aq MeHg vs. Black Bass THg
author: "Mercury Program and Basin Planning Unit"
date: "8/4/2021"
output:
  html_document:
    code_folding: show
    toc: TRUE
    toc_float: TRUE
    toc_depth: 3
runtime: shiny
assets:
  css:
    - "http://fonts.googleapis.com/css?family=Raleway:300"
    - "http://fonts.googleapis.com/css?family=Oxygen"
---

<style>
body{
  font-family: 'Oxygen', sans-serif;
  font-size: 16px;
  line-height: 24px;
}

h1,h2,h3,h4 {
  font-family: 'Raleway', sans-serif;
}

.container { width: 1250px; }
h3 {
  background-color: #D4DAEC;
  text-indent: 50px; 
}
h4 {
  text-indent: 75px;
  margin-top: 35px;
  margin-bottom: 5px;
}
</style>

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, fig.width=9.5)
```

```{r Libraries, include=FALSE, message=FALSE}
library(knitr)      # generate html tables for this report
library(kableExtra) # better formatting of tables
library(DT)         # creates sortable tables

# Having issue with runtime:shiny resetting the WD of rproj so need to use setwd()
wd <- rstudioapi::getActiveProject()
setwd(wd)

source("R Functions/functions_estimate NDDNQ values.R") # Load first so select() isn't masked from dplyr
source("R Functions/functions_QA data.R")
source("R Functions/function_predictionModels.R")
source("R Functions/function_predictionModelPlot.R")
source("R Functions/function_AqFishSamplePairing.R")
source("R Functions/function_AqSampleFishModelPairing.R")


# Function to round values in a dataframe
signif_df <- function(x, digits) {
  # round numeric columns
  # x = data frame 
  # digits = number of digits to round
  numeric_columns <- sapply(x, mode) == 'numeric'
  x[numeric_columns] <-  signif(x[numeric_columns], digits)
  x
}
```

## Load Data
```{r}
setwd(wd)

# Load Data from "eval1_TLG Eval & BB Imp Goal"
load('Reeval_Impl_Goals_Linkage_Analysis/eval1_TLG Eval & BB Impl Goal/Output/blackBass_2000.RData') # loads "blackBass_2000" ~ Final Black Bass raw data
blackBass_2000 <- blackBass_2000 %>%
  filter(!grepl('yb', Subarea, ignore.case=T)) # removes South Yolo Bypass (north removed during the TLG Eval). It was originally included but removed as it appears to be an outlier attributed to different hydrologic conditions and resulting fish Hg exposure compared to the other subareas (which contain fish year round).

load('Reeval_Impl_Goals_Linkage_Analysis/eval1_TLG Eval & BB Impl Goal/Output/BlkBassStd350_FinalModels_2000.RData') # loads "BlkBassStd350_FinalModels_2000" ~ Final Black Bass Std 350 [Hg] Models for each Year
BlkBassStd350_FinalModels_2000 <- BlkBassStd350_FinalModels_2000 %>% 
  filter(!grepl('yb', Subarea, ignore.case=T)) # removes South Yolo Bypass (north removed during the TLG Eval). It was originally included but removed as it appears to be an outlier attributed to different hydrologic conditions and resulting fish Hg exposure compared to the other subareas (which contain fish year round).

load('Reeval_Impl_Goals_Linkage_Analysis/eval1_TLG Eval & BB Impl Goal/Output/BBImpGoal.RData') # loads "BBImpGoal" loads final Black Bass Implementation Goal (calculated using Black Bass data weighted average by year for each subarea)

# Load Aqueous Data
aqdf <- loadData("Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/6a_AqMaster_noRepeats.xlsx") 

# Convert columns that are character but should be numeric or date
aqdf <- chara_to_NumDate(aqdf)
```



# Aqueous Data QC

**In this markdown, "Original Linkage" refers to the linkage analysis in the 2010 TMDL Staff Report, which used data from the year 2000.**

## Select Years
```{r}
aqData1 <- aqdf %>%
  filter(Year >= 2000) # selects aqueous data starting from year 2000 year, consistent with the Original Linkage. 
```

## View Unique Factors
```{r}
aqdf %>%
  mutate(AnalyteUnitMatrix = paste(Analyte, '-', Unit, '-', MatrixName)) %>% 
  unique_factors(SourceID, Subarea, AnalyteUnitMatrix, WBT)
```

## Change X2 Subarea to West Delta
```{r}
aqData2 <- aqData1 %>% 
  mutate(Subarea = recode(Subarea, "X2" = "West Delta",  # Change X2 to West Delta, consistent with the 2010 TMDL Staff Report
                                    "Cache Creek Settling Basin" = "Yolo Bypass North")) # Change CCSB to Yolo Bypass North (this doesn't meaningfully matter since there are no corresponding BB data in Yolo Bypass North)
```

## Select "Methylmercury, Total", "ng/L", "Aqueous"
```{r}
# Select only aqueous MeHg as in 2010 TMDL Staff Report
aqData3 <- aqData2 %>% 
  filter(Analyte     == 'Methylmercury, Total',
         Unit        == 'ng/L',
         MatrixName  == 'Aqueous')
```


## View Factors for Final Data Selection
```{r}
aqData3 %>%
  mutate(AnalyteUnitMatrix = paste(Analyte, '-', Unit, '-', MatrixName)) %>% 
  unique_factors(SourceID, Subarea, AnalyteUnitMatrix, WBT)
```


## Estimate ND & DNQ Values
```{r}
aqNDDNQs <- robinNDDNQ(aqData3, ResultQualCode)
```

### View Fitted Distribution Graphs
```{r, echo=FALSE}
ui <- fluidPage(
  selectInput('distName', label='Select Distribution', choices=names(aqNDDNQs), selected=1),
  plotOutput('distPlot')
)

server <- function(input, output, session) {
  output$distPlot <- renderPlot({
  plot(aqNDDNQs[[input$distName]]$fitmodel) # view plot fits
    title(input$distName, outer=T)
})
}

shinyApp(ui, server, options=list(height=500))
```

### Select Best Distribution Model
```{r}
aqData4 <- aqNDDNQs$burr$fitted  #selected distribution based on plot showing best fit to data
```

## Check for Outliers
e.g., orders of magnitude maybe issues with unit conversion
```{r}
# View Data for Entire Delta
result_time_Plotly(aqData4 %>% filter(Censored == F),
                   groupByCol=TMDL, showMean=F, interactive=T, logscale=T, showLegend=F)
```

```{r, fig.height=8.5}
# View Data for Each Subarea
result_time_Plotly(aqData4 %>% filter(Censored == F),
                   groupByCol=Subarea, showMean=T, interactive=T, logscale=T, showLegend=F)
```

```{r}
# View NDs & DNQs
result_time_Plotly(aqData4 %>% filter(Censored == T),
                   groupByCol=TMDL, showMean=F, interactive=T, logscale=T, showLegend=F)
```

## Finalize QC & Data Selection
```{r}
aqData <- aqData4
```

## Visualize Aq Data

### Box Plot by Subarea 
```{r}
aqData %>% 
  filter(Censored == F) %>% 
  ggplot(aes(y=Result, x=Subarea)) +
  geom_jitter(aes(colour=as.factor(Year)), size=3) +
  geom_boxplot(alpha = 0.05) +
  scale_y_log10() +
  theme_light() +
  theme(panel.grid.minor.y = element_blank())
```

### Table of Annual Averages
```{r}
aqAvgMeHg <- aqData %>% 
  group_by(Subarea, Year) %>% 
  summarise(AvgMeHg = mean(Result),
            n = n(),
            .groups = 'drop')

aqAvgMeHg %>% 
  arrange(Subarea) %>% 
  datatable()
```

### Boxplot of Annual Averages by Subarea
```{r}
aqData %>% 
  group_by(Subarea, Year) %>% 
  summarise(AvgMeHg = mean(Result),
            n = n(),
            .groups = 'drop') %>%
  ggplot(aes(y=AvgMeHg, x=Subarea)) +
  geom_jitter(aes(colour=as.factor(Year)), size=3) +
  geom_boxplot(alpha = 0.05) +
  theme_light() +
  theme(panel.grid.minor.y = element_blank())
```


# LINKAGE GRAPHS
## 1a: 5yr Aq DRMP Data Overlap (2016-2019) vs. DRMP Black Bass (2016-2019) by Subarea & Year 
```{r}
Linkage.DRMPAq.DRMPBByr <- AqSampleFishModelPairing(aqData=aqData %>% filter(grepl('DRMP', SourceID, ignore.case=T)),
                                                       fishModels=BlkBassStd350_FinalModels_2000 %>% filter(Year >= 2016),
                                                       bylocationColumn=Subarea,
                                                       aqOverlapYrs=5)
```

<em>Model with lowest SER is displayed first. Board staff manually checked that the selected models are appropriate. Board staff also checked if keeping extrapolated value is appropriate.</em><br><br>
```{r, echo=FALSE}
ui <- fluidPage(
  fluidRow(
    column(width=3,
           selectInput('Stat', label='Select Stat', choices=colnames(Linkage.DRMPAq.DRMPBByr)[grepl('mean|median', colnames(Linkage.DRMPAq.DRMPBByr), ignore.case=T)], selected=1)),
    column(width=3,
           selectInput('model', label='Select Model', choices=NULL)
         )
  ),
  plotlyOutput('linkPlot')
)

server <- function(input, output, session) {
  data1 <- reactive({
    Linkage.DRMPAq.DRMPBByr %>% 
  group_modify(~ predictionModels(.x, 
                                  .knownYcol=!! as.name(input$Stat),
                                  .knownXcol=Stdz350, knownX=BBImpGoal, preferExtrapModel=NULL)) %>%
  rename(AqGoal = predictedY) %>% 
  ungroup
  })
  observeEvent(data1(), {
    updateSelectInput(session, 'model',
                      choices=data1()$ModelName, selected=data1()$ModelName[1])
    })
  
  data2 <- reactive({
    req(input$model)
    data1() %>%
      filter(ModelName == input$model)
    })
  
  output$linkPlot <- renderPlotly({
    req(input$model)
    
    title  <- paste('1a: DRMP', input$Stat, '(5yr Overlap 2016-2019) vs. DRMP BB (2016-2019)')
    xTitle <- '2016-2019 DRMP Std.350 BB [Hg] (mg/kg)'
    yTitle <- '5yrs OVerlap 2016-2019 DRMP Aq [MeHg] (ng/L)'
    knownXlabel <- "Black Bass Imp. Goal"
    
    predictionModelPlot(data2(), knownXlabel=knownXlabel,
                    .predictedYcol=AqGoal, .groupByCol=Subarea, allGroups=sort(unique(BlkBassStd350_FinalModels_2000$Subarea)),
                    title=title, xTitle=xTitle, yTitle=yTitle, showLegend=T)
    })
}

shinyApp(ui, server, options=list(height=500))
```

```{r echo=F}
# Table of Models with Lowest SER
    Linkage1a <- NULL
    statChoices <- colnames(Linkage.DRMPAq.DRMPBByr)[grepl('mean|median', colnames(Linkage.DRMPAq.DRMPBByr), ignore.case=T)]
    for(stat in statChoices){
      temp <- Linkage.DRMPAq.DRMPBByr %>%
        group_modify(~ predictionModels(.x,
                                        .knownYcol=!! as.name(stat),
                                        .knownXcol=Stdz350,
                                        knownX=BBImpGoal, preferExtrapModel=NULL)) %>%
        rename(AqGoal = predictedY) %>%
        ungroup %>%
        mutate(LinkageGraph='Linkage1a')

      Linkage1a <- Linkage1a %>%
        bind_rows(temp)
    }
    
# Remove stat evaluations that result in a decreasing Aq [MeHg] with increasing fish [Hg]
    Linkage1a_final <- Linkage1a %>% 
      filter(knownYcol %are not% c('aq_Geomean_byYear'))
# Create Table to show Models with Lowest SER     
    Linkage1a_final %>% 
      select(LinkageGraph, ModelName, knownXcol, knownX, knownYcol, AqGoal, SER) %>% 
      arrange(SER) %>%
      head(10) %>%
      kbl(align='lcc', caption='Linkage 1a Top 10 Lowest SER Models') %>%
      kable_paper('hover', full_width=T) %>%
      kable_styling(fixed_thead = T)
```

## 1b: 5yr Aq DRMP Data Overlap (2016-2019) vs. DRMP Black Bass (2016-2019) by Subarea
```{r, results="hide"}
Linkage.DRMPAq.DRMPBByr.Subarea <- Linkage.DRMPAq.DRMPBByr %>% 
  group_by(Subarea) %>% 
  summarize(Std350_Wt.Mean = weighted.mean(Stdz350, n),
            Std350_Mean    = mean(Stdz350),
            Std350_Geomean = geomean(Stdz350),
            Std350_Median  = median(Stdz350),
            Aq_Mean_ofPooledMeans          = mean(aq_Mean_pool),
            Aq_Mean_ofYearMeans       = mean(aq_Mean_byYear),
            Aq_Wt.Mean_ofWt.WaterYearMeans = weighted.mean(aq_Wt.Mean, aq_n),
            Aq_Geomean_ofPooledGeomeans    = geomean(aq_Geomean_pool),
            Aq_Geomean_ofYearGeomeans = geomean(aq_Geomean_byYear),
            Aq_Median_ofPooledMedians      = median(aq_Median_pool),
            Aq_Median_ofYearMedians   = median(aq_Median_byYear),
            .groups = 'drop')
```

<em>Model with lowest SER is displayed first. Board staff manually checked that the selected models are appropriate. Board staff also checked if keeping extrapolated value is appropriate.</em><br><br>
```{r, echo=FALSE}
ui <- fluidPage(
  fluidRow(
    column(width=3,
           selectInput('Stat', label='Select Stat', choices=c("Mean_ofPooledMeans", "Mean_ofYearMeans", "Wt.Mean_ofWt.WaterYearMeans",
                                                              "Geomean_ofPooledGeomeans", "Geomean_ofYearGeomeans", "Median_ofPooledMedians", "Median_ofYearMedians"), selected=1)),
    column(width=3,selectInput('model', label='Select Model', choices=NULL)
         )
  ),
  plotlyOutput('linkPlot')
)

server <- function(input, output, session) {
  data1 <- reactive({
    Linkage.DRMPAq.DRMPBByr.Subarea %>% 
  group_modify(~ predictionModels(.x,
                                  .knownYcol=!! as.name(paste0('Aq_',input$Stat)),
                                  .knownXcol=!! as.name(paste0('Std350_', str_extract(input$Stat, "[^_]*"))), # paste text up to 1st '_' from Stat to get corresponding Std350 stat
                                  knownX=BBImpGoal, preferExtrapModel=NULL)) %>%
  rename(AqGoal = predictedY) %>% 
  ungroup
  })
  observeEvent(data1(), {
    updateSelectInput(session, 'model',
                      choices=data1()$ModelName, selected=data1()$ModelName[1])
    })
  
  data2 <- reactive({
    req(input$model)
    data1() %>%
      filter(ModelName == input$model)
    })
  
  output$linkPlot <- renderPlotly({
    req(input$model)
    
    title  <- paste('1b: DRMP', input$Stat, '(5yr Overlap 2016-2019) vs. DRMP BB (2016-2019')
    xTitle <- '2016-2019 DRMP Std.350 BB [Hg] (mg/kg)'
    yTitle <- '5yrs OVerlap 2016-2019 DRMP Aq [MeHg] (ng/L)'
    knownXlabel <- "Black Bass Imp. Goal"
    
    predictionModelPlot(data2(), knownXlabel=knownXlabel,
                    .predictedYcol=AqGoal, .groupByCol=Subarea, allGroups=sort(unique(BlkBassStd350_FinalModels_2000$Subarea)),
                    title=title, xTitle=xTitle, yTitle=yTitle, showLegend=T)
    })
}

shinyApp(ui, server, options=list(height=500))
```

```{r echo=F}
# Table of Models with Lowest SER
    Linkage1b <- NULL
    statChoices <- c("Mean_ofPooledMeans", "Mean_ofYearMeans", "Wt.Mean_ofWt.WaterYearMeans",
                                                              "Geomean_ofPooledGeomeans", "Geomean_ofYearGeomeans", "Median_ofPooledMedians", "Median_ofYearMedians")
    for(stat in statChoices){
      temp <- Linkage.DRMPAq.DRMPBByr.Subarea %>%
        group_modify(~ predictionModels(.x,
                                        .knownYcol=!! as.name(paste0('Aq_', stat)),
                                        .knownXcol=!! as.name(paste0('Std350_', str_extract(stat, "[^_]*"))), # paste text up to 1st '_' from Stat to get corresponding
                                        knownX=BBImpGoal, preferExtrapModel=NULL)) %>%
        rename(AqGoal = predictedY) %>%
        ungroup %>%
        mutate(LinkageGraph='Linkage1b')

      Linkage1b <- Linkage1b %>%
        bind_rows(temp)
    }

    Linkage1b_final <- Linkage1b
# Create Table to show Models with Lowest SER 
    Linkage1b_final %>%
      select(LinkageGraph, ModelName, knownXcol, knownX, knownYcol, AqGoal, SER) %>% 
      arrange(SER) %>%
      head(10) %>%
      kbl(align='lcc', caption='Linkage 1b Top 10 Lowest SER Models') %>%
      kable_paper('hover', full_width=T) %>%
      kable_styling(fixed_thead = T)
```


## 2a: 5yr Aq Data Overlap (2012-2019) vs. DRMP Black Bass (2016-2019) by Subarea & Year
```{r}
Linkage.5yrAq.DRMPBByr <- AqSampleFishModelPairing(aqData=aqData,
                                               fishModels=BlkBassStd350_FinalModels_2000 %>% filter(Year >= 2016),
                                               bylocationColumn=Subarea,
                                               aqOverlapYrs=5)

```

<em>Model with lowest SER is displayed first. Board staff manually checked that the selected models are appropriate. Board staff also checked if keeping extrapolated value is appropriate.</em><br><br>
```{r, echo=FALSE}
ui <- fluidPage(
  fluidRow(
    column(width=3,
           selectInput('Stat', label='Select Stat', choices=colnames(Linkage.5yrAq.DRMPBByr)[grepl('mean|median', colnames(Linkage.5yrAq.DRMPBByr), ignore.case=T)], selected=1)),
    column(width=3,
           selectInput('model', label='Select Model', choices=NULL)
         )
  ),
  plotlyOutput('linkPlot')
)

server <- function(input, output, session) {
    data1 <- reactive({
    Linkage.5yrAq.DRMPBByr %>% 
  group_modify(~ predictionModels(.x, 
                                  .knownYcol=!! as.name(input$Stat),
                                  .knownXcol=Stdz350, knownX=BBImpGoal, preferExtrapModel=NULL)) %>%
  rename(AqGoal = predictedY) %>% 
  ungroup
  })
  observeEvent(data1(), {
    updateSelectInput(session, 'model',
                      choices=data1()$ModelName, selected=data1()$ModelName[1])
    })
  
  data2 <- reactive({
    req(input$model)
    data1() %>%
      filter(ModelName == input$model)
    })
  
  output$linkPlot <- renderPlotly({
    req(input$model)
    
    title  <- paste('2a:', input$Stat, '(5yr Overlap 2012-2019) vs. DRMP BB (2016-2019)')
    xTitle <- '2016-2019 DRMP Std.350 BB [Hg] (mg/kg)'
    yTitle <- '5yr Overlap 2012-2019 Aq [MeHg] (ng/L)'
    knownXlabel <- "Black Bass Imp. Goal"
    
    predictionModelPlot(data2(), knownXlabel=knownXlabel,
                    .predictedYcol=AqGoal, .groupByCol=Subarea, allGroups=sort(unique(BlkBassStd350_FinalModels_2000$Subarea)),
                    title=title, xTitle=xTitle, yTitle=yTitle, showLegend=T)
    })
}

shinyApp(ui, server, options=list(height=500))
```

```{r echo=F}
# Table of Models with Lowest SER
    Linkage2a <- NULL
    statChoices <- colnames(Linkage.5yrAq.DRMPBByr)[grepl('mean|median', colnames(Linkage.5yrAq.DRMPBByr), ignore.case=T)]
    for(stat in statChoices){
      temp <- Linkage.5yrAq.DRMPBByr %>%
        group_modify(~ predictionModels(.x,
                                        .knownYcol=!! as.name(stat),
                                        .knownXcol=Stdz350,
                                        knownX=BBImpGoal, preferExtrapModel=NULL)) %>%
        rename(AqGoal = predictedY) %>%
        ungroup %>%
        mutate(LinkageGraph='Linkage2a')

      Linkage2a <- Linkage2a %>%
        bind_rows(temp)
    }

# Remove stat evaluations that result in a decreasing Aq [MeHg] with increasing fish [Hg]
    Linkage2a_final <- Linkage2a %>% 
      filter(knownYcol != 'aq_Geomean_byYear') %>% 
      filter(knownYcol != 'aq_Mean_byYear' | ModelName %are not% c('gam_k=1', 'gam_k=2', 'gam_k=3'))
# Create Table to show Models with Lowest SER  
    Linkage2a_final %>%
      select(LinkageGraph, ModelName, knownXcol, knownX, knownYcol, AqGoal, SER) %>% 
      arrange(SER) %>%
      head(10) %>%
      kbl(align='lcc', caption='Linkage 1a Top 10 Lowest SER Models') %>%
      kable_paper('hover', full_width=T) %>%
      kable_styling(fixed_thead = T)
```

## 2b: 5yr Aq Data Overlap (2012-2019) vs. DRMP Black Bass (2016-2019) by Subarea
```{r, results="hide"}
Linkage.5yrAq.DRMPBByr.Subarea <- Linkage.5yrAq.DRMPBByr %>% 
  group_by(Subarea) %>% 
  summarize(Std350_Wt.Mean = weighted.mean(Stdz350, n),
            Std350_Mean    = mean(Stdz350),
            Std350_Geomean = geomean(Stdz350),
            Std350_Median  = median(Stdz350),
            Aq_Mean_ofPooledMeans          = mean(aq_Mean_pool),
            Aq_Mean_ofYearMeans       = mean(aq_Mean_byYear),
            Aq_Wt.Mean_ofWt.WaterYearMeans = weighted.mean(aq_Wt.Mean, aq_n),
            Aq_Geomean_ofPooledGeomeans    = geomean(aq_Geomean_pool),
            Aq_Geomean_ofYearGeomeans = geomean(aq_Geomean_byYear),
            Aq_Median_ofPooledMedians      = median(aq_Median_pool),
            Aq_Median_ofYearMedians   = median(aq_Median_byYear),
            .groups = 'drop')
```

<em>Model with lowest SER is displayed first. Board staff manually checked that the selected models are appropriate. Board staff also checked if keeping extrapolated value is appropriate.</em><br><br>
```{r, echo=FALSE}
ui <- fluidPage(
  fluidRow(
    column(width=3,
           selectInput('Stat', label='Select Stat', choices=c("Mean_ofPooledMeans", "Mean_ofYearMeans", "Wt.Mean_ofWt.WaterYearMeans",
                                                              "Geomean_ofPooledGeomeans", "Geomean_ofYearGeomeans", "Median_ofPooledMedians", "Median_ofYearMedians"), selected=1)),
    column(width=3,selectInput('model', label='Select Model', choices=NULL)
         )
  ),
  plotlyOutput('linkPlot')
)

server <- function(input, output, session) {
  data1 <- reactive({
    Linkage.5yrAq.DRMPBByr.Subarea %>% 
  group_modify(~ predictionModels(.x,
                                  .knownYcol=!! as.name(paste0('Aq_',input$Stat)),
                                  .knownXcol=!! as.name(paste0('Std350_', str_extract(input$Stat, "[^_]*"))), # paste text up to 1st '_' from Stat to get corresponding Std350 stat
                                  knownX=BBImpGoal, preferExtrapModel=NULL)) %>%
  rename(AqGoal = predictedY) %>% 
  ungroup
  })
  observeEvent(data1(), {
    updateSelectInput(session, 'model',
                      choices=data1()$ModelName, selected=data1()$ModelName[1])
    })
  
  data2 <- reactive({
    req(input$model)
    data1() %>%
      filter(ModelName == input$model)
    })
  
  output$linkPlot <- renderPlotly({
    req(input$model)
    
    title  <- paste('2b:', input$Stat, '(5yr Overlap 2012-2019) vs. DRMP BB (2016-2019)')
    xTitle <- '2016-2019 DRMP Std.350 BB [Hg] (mg/kg)'
    yTitle <- '5yr Overlap 2012-2019 Aq [MeHg] (ng/L)'
    knownXlabel <- "Black Bass Imp. Goal"
    
    predictionModelPlot(data2(), knownXlabel=knownXlabel,
                    .predictedYcol=AqGoal, .groupByCol=Subarea, allGroups=sort(unique(BlkBassStd350_FinalModels_2000$Subarea)),
                    title=title, xTitle=xTitle, yTitle=yTitle, showLegend=T)
    })
}

shinyApp(ui, server, options=list(height=500))
```


```{r echo=F}
# Table of Models with Lowest SER
    Linkage2b <- NULL
    statChoices <- c("Mean_ofPooledMeans", "Mean_ofYearMeans", "Wt.Mean_ofWt.WaterYearMeans",
                                                              "Geomean_ofPooledGeomeans", "Geomean_ofYearGeomeans", "Median_ofPooledMedians", "Median_ofYearMedians")
    for(stat in statChoices){
      temp <- Linkage.5yrAq.DRMPBByr.Subarea %>%
        group_modify(~ predictionModels(.x,
                                        .knownYcol=!! as.name(paste0('Aq_', stat)),
                                        .knownXcol=!! as.name(paste0('Std350_', str_extract(stat, "[^_]*"))), # paste text up to 1st '_' from Stat to get corresponding
                                        knownX=BBImpGoal, preferExtrapModel=NULL)) %>%
        rename(AqGoal = predictedY) %>%
        ungroup %>%
        mutate(LinkageGraph='Linkage2b')

      Linkage2b <- Linkage2b %>%
        bind_rows(temp)
    }
    
    Linkage2b_final <- Linkage2b
# Create Table to show Models with Lowest SER  
    Linkage2b_final %>%
      select(LinkageGraph, ModelName, knownXcol, knownX, knownYcol, AqGoal, SER) %>% 
      arrange(SER) %>%
      head(10) %>%
      kbl(align='lcc', caption='Linkage 1b Top 10 Lowest SER Models') %>%
      kable_paper('hover', full_width=T) %>%
      kable_styling(fixed_thead = T)
```


## 3a: 5yr Aq Data Overlap (2000-2019) vs. Black Bass (2000-2019) by Subarea & Year
```{r}
Linkage.5yrAq.BByr <- AqSampleFishModelPairing(aqData=aqData %>% filter(Year >= 2000),
                                               fishModels=BlkBassStd350_FinalModels_2000 %>% filter(Year >= 2000),
                                               bylocationColumn=Subarea,
                                               aqOverlapYrs=5)

```

<em>Model with lowest SER is displayed first. Board staff manually checked that the selected models are appropriate. Board staff also checked if keeping extrapolated value is appropriate.</em><br><br>
```{r, echo=FALSE}
ui <- fluidPage(
  fluidRow(
    column(width=3,
           selectInput('Stat', label='Select Stat', choices=colnames(Linkage.5yrAq.BByr)[grepl('mean|median', colnames(Linkage.5yrAq.BByr), ignore.case=T)], selected=1)),
    column(width=3,
           selectInput('model', label='Select Model', choices=NULL)
         )
  ),
  plotlyOutput('linkPlot')
)

server <- function(input, output, session) {
  data1 <- reactive({
    Linkage.5yrAq.BByr %>% 
  group_modify(~ predictionModels(.x, 
                                  .knownYcol=!! as.name(input$Stat),
                                  .knownXcol=Stdz350, knownX=BBImpGoal, preferExtrapModel=NULL)) %>%
  rename(AqGoal = predictedY) %>% 
  ungroup
  })
  observeEvent(data1(), {
    updateSelectInput(session, 'model',
                      choices=data1()$ModelName, selected=data1()$ModelName[1])
    })
  
  data2 <- reactive({
    req(input$model)
    data1() %>%
      filter(ModelName == input$model)
    })
  
  output$linkPlot <- renderPlotly({
    req(input$model)
    
    title  <- paste('3a:', input$Stat, '(5yr Overlap 2000-2019) vs. BB (2000-2019)')
    xTitle <- '2000-2019 Std.350 BB [Hg] (mg/kg)'
    yTitle <- '5yr Overlap 2000-2019 Aq [MeHg] (ng/L)'
    knownXlabel <- "Black Bass Imp. Goal"
    
    predictionModelPlot(data2(), knownXlabel=knownXlabel,
                    .predictedYcol=AqGoal, .groupByCol=Subarea, allGroups=sort(unique(BlkBassStd350_FinalModels_2000$Subarea)),
                    title=title, xTitle=xTitle, yTitle=yTitle, showLegend=T)
    })
}

shinyApp(ui, server, options=list(height=500))
```

```{r echo=F}
# Table of Models with Lowest SER
    Linkage3a <- NULL
    statChoices <- colnames(Linkage.5yrAq.BByr)[grepl('mean|median', colnames(Linkage.5yrAq.BByr), ignore.case=T)]
    for(stat in statChoices){
      temp <- Linkage.5yrAq.BByr %>%
        group_modify(~ predictionModels(.x,
                                        .knownYcol=!! as.name(stat),
                                        .knownXcol=Stdz350,
                                        knownX=BBImpGoal, preferExtrapModel=NULL)) %>%
        rename(AqGoal = predictedY) %>%
        ungroup %>%
        mutate(LinkageGraph='Linkage3a') 

      Linkage3a <- Linkage3a %>%
        bind_rows(temp)
    }


# Remove stat evaluations that result in a decreasing Aq [MeHg] with increasing fish [Hg]
    Linkage3a_final <- Linkage3a %>%
      filter(knownYcol %are not% c('aq_Median_pool', 'aq_Median_byYear', 'aq_Geomean_byYear')) # all Statistics appear to result in decreasing slope. Stopped removing more since SER was getting high and would affect final result.
# Create Table to show Models with Lowest SER
    Linkage3a_final %>% 
      select(LinkageGraph, ModelName, knownXcol, knownX, knownYcol, AqGoal, SER) %>% 
      arrange(SER) %>%
      head(10) %>%
      kbl(align='lcc', caption='Linkage 3a Top 10 Lowest SER Models') %>%
      kable_paper('hover', full_width=T) %>%
      kable_styling(fixed_thead = T)
```

## 3b: 5yr Aq Data Overlap (2000-2019) vs. Black Bass (2000-2019) by Subarea
```{r}
Linkage.5yrAq.BByr.Subarea <- Linkage.5yrAq.BByr %>% 
  group_by(Subarea) %>% 
  summarize(Std350_Wt.Mean = weighted.mean(Stdz350, n),
            Std350_Mean    = mean(Stdz350),
            Std350_Geomean = geomean(Stdz350),
            Std350_Median  = median(Stdz350),
            Aq_Mean_ofPooledMeans          = mean(aq_Mean_pool),
            Aq_Mean_ofYearMeans       = mean(aq_Mean_byYear),
            Aq_Wt.Mean_ofWt.WaterYearMeans = weighted.mean(aq_Wt.Mean, aq_n),
            Aq_Geomean_ofPooledGeomeans    = geomean(aq_Geomean_pool),
            Aq_Geomean_ofYearGeomeans = geomean(aq_Geomean_byYear),
            Aq_Median_ofPooledMedians      = median(aq_Median_pool),
            Aq_Median_ofYearMedians   = median(aq_Median_byYear),
            .groups = 'drop')
```

<em>Model with lowest SER is displayed first. Board staff manually checked that the selected models are appropriate. Board staff also checked if keeping extrapolated value is appropriate.</em><br><br>
```{r, echo=FALSE}
ui <- fluidPage(
  fluidRow(
    column(width=3,
           selectInput('Stat', label='Select Stat', choices=c("Mean_ofPooledMeans", "Mean_ofYearMeans", "Wt.Mean_ofWt.WaterYearMeans",
                                                              "Geomean_ofPooledGeomeans", "Geomean_ofYearGeomeans", "Median_ofPooledMedians", "Median_ofYearMedians"), selected=1)),
    column(width=3,selectInput('model', label='Select Model', choices=NULL)
         )
  ),
  plotlyOutput('linkPlot')
)

server <- function(input, output, session) {
  data1 <- reactive({
    Linkage.5yrAq.BByr.Subarea %>% 
  group_modify(~ predictionModels(.x,
                                  .knownYcol=!! as.name(paste0('Aq_',input$Stat)),
                                  .knownXcol=!! as.name(paste0('Std350_', str_extract(input$Stat, "[^_]*"))), # paste text up to 1st '_' from Stat to get corresponding Std350 stat
                                  knownX=BBImpGoal, preferExtrapModel=NULL)) %>%
  rename(AqGoal = predictedY) %>% 
  ungroup
  })
  observeEvent(data1(), {
    updateSelectInput(session, 'model',
                      choices=data1()$ModelName, selected=data1()$ModelName[1])
    })
  
  data2 <- reactive({
    req(input$model)
    data1() %>%
      filter(ModelName == input$model)
    })
  
  output$linkPlot <- renderPlotly({
    req(input$model)
    
    title  <- paste('5yrs Aq', input$Stat, '[MeHg] vs. Std.350 BB', input$Stat, '[Hg]')
    xTitle <- 'Std.350 BB [Hg] (mg/kg)'
    yTitle <- '5yrs Aq [MeHg] (ng/L)'
    knownXlabel <- "Black Bass Imp. Goal"
    
    predictionModelPlot(data2(), knownXlabel=knownXlabel,
                    .predictedYcol=AqGoal, .groupByCol=Subarea, allGroups=sort(unique(BlkBassStd350_FinalModels_2000$Subarea)),
                    title=title, xTitle=xTitle, yTitle=yTitle, showLegend=T)
    })
}

shinyApp(ui, server, options=list(height=500))
```

```{r echo=F}
# Table of Models with Lowest SER
    Linkage3b <- NULL
    statChoices <- c("Mean_ofPooledMeans", "Mean_ofYearMeans", "Wt.Mean_ofWt.WaterYearMeans",
                                                              "Geomean_ofPooledGeomeans", "Geomean_ofYearGeomeans", "Median_ofPooledMedians", "Median_ofYearMedians")
    for(stat in statChoices){
      temp <- Linkage.5yrAq.BByr.Subarea %>%
        group_modify(~ predictionModels(.x,
                                        .knownYcol=!! as.name(paste0('Aq_', stat)),
                                        .knownXcol=!! as.name(paste0('Std350_', str_extract(stat, "[^_]*"))), # paste text up to 1st '_' from Stat to get corresponding
                                        knownX=BBImpGoal, preferExtrapModel=NULL)) %>%
        rename(AqGoal = predictedY) %>%
        ungroup %>%
        mutate(LinkageGraph='Linkage3b') 

      Linkage3b <- Linkage3b %>%
        bind_rows(temp)
    }

# Remove stat evaluations that result in a decreasing Aq [MeHg] with increasing fish [Hg]
    Linkage3b_final <- Linkage3b 
# Create Table to show Models with Lowest SER
    Linkage3b_final %>%
      select(LinkageGraph, ModelName, knownXcol, knownX, knownYcol, AqGoal, SER) %>% 
      arrange(SER) %>%
      head(10) %>%
      kbl(align='lcc', caption='Linkage 3b Top 10 Lowest SER Models') %>%
      kable_paper('hover', full_width=T) %>%
      kable_styling(fixed_thead = T)
```


## 4: Pooled Aq Data (2000-2019) vs. Pooled Black Bass (2000-2019) by Subarea

### Calculate Aq & Black Bass Pooled Statistics
```{r}
blackBass_pooled <- blackBass_2000 %>% 
  group_by(Subarea) %>% 
  summarize(BB_PooledMean    = mean(Result),
            BB_PooledGeomean = geomean(Result),
            BB_PooledMedian  = median(Result),
            .groups = 'drop')

aqData_pooled <- aqData %>% 
  group_by(Subarea) %>% 
  summarize(Aq_PooledMean    = mean(Result),
            Aq_PooledGeomean = geomean(Result),
            Aq_PooledMedian  = median(Result),
            .groups = 'drop')
  
```

### Combine Aq & Black Bass Pooled Data Results
```{r}
Linkage.PooledAq.PooledBB.Subarea <- blackBass_pooled %>% 
  left_join(., aqData_pooled, by='Subarea')
```

<em>Model with lowest SER is displayed first. Board staff manually checked that the selected models are appropriate. Board staff also checked if keeping extrapolated value is appropriate.</em><br><br>
```{r, echo=FALSE}
ui <- fluidPage(
  fluidRow(
    column(width=3,
           selectInput('Stat', label='Select Stat', choices=c("PooledMean", "PooledGeomean", "PooledMedian"), selected=1)),
    column(width=3,selectInput('model', label='Select Model', choices=NULL)
         )
  ),
  plotlyOutput('linkPlot')
)

server <- function(input, output, session) {
  data1 <- reactive({
    Linkage.PooledAq.PooledBB.Subarea %>% 
  group_modify(~ predictionModels(.x,
                                  .knownYcol=!! as.name(paste0('Aq_',input$Stat)),
                                  .knownXcol=!! as.name(paste0('BB_',input$Stat)),
                                  knownX=BBImpGoal, preferExtrapModel=NULL)) %>%
  rename(AqGoal = predictedY) %>% 
  ungroup
  })
  observeEvent(data1(), {
    updateSelectInput(session, 'model',
                      choices=data1()$ModelName, selected=data1()$ModelName[1])
    })
  
  data2 <- reactive({
    req(input$model)
    data1() %>%
      filter(ModelName == input$model)
    })
  
  output$linkPlot <- renderPlotly({
    req(input$model)
    
    title  <- paste('5yrs Aq', input$Stat, '[MeHg] vs. Std.350 BB', input$Stat, '[Hg]')
    xTitle <- 'Std.350 BB [Hg] (mg/kg)'
    yTitle <- '5yrs Aq [MeHg] (ng/L)'
    knownXlabel <- "Black Bass Imp. Goal"
    
    predictionModelPlot(data2(), knownXlabel=knownXlabel,
                    .predictedYcol=AqGoal, .groupByCol=Subarea, allGroups=sort(unique(BlkBassStd350_FinalModels_2000$Subarea)),
                    title=title, xTitle=xTitle, yTitle=yTitle, showLegend=T)
    })
}

shinyApp(ui, server, options=list(height=500))
```

```{r echo=F}
# Table of Models with Lowest SER
    Pooled <- NULL
    statChoices <- c("PooledMean", "PooledGeomean", "PooledMedian")
    for(stat in statChoices){
      temp <- Linkage.PooledAq.PooledBB.Subarea %>%
        group_modify(~ predictionModels(.x,
                                        .knownYcol=!! as.name(paste0('Aq_', stat)),
                                        .knownXcol=!! as.name(paste0('BB_', stat)),
                                        knownX=BBImpGoal, preferExtrapModel=NULL)) %>%
        rename(AqGoal = predictedY) %>%
        ungroup %>%
        mutate(LinkageGraph='Pooled')

      Pooled <- Pooled %>%
        bind_rows(temp)
    }

# Remove stat evaluations that result in a decreasing Aq [MeHg] with increasing fish [Hg]
    Pooled_final <- Pooled 
# Create Table to show Models with Lowest SER
    Pooled_final %>%
      select(LinkageGraph, ModelName, knownXcol, knownX, knownYcol, AqGoal, SER) %>% 
      arrange(SER) %>%
      head(10) %>%
      kbl(align='lcc', caption='Linkage 3b Top 10 Lowest SER Models') %>%
      kable_paper('hover', full_width=T) %>%
      kable_styling(fixed_thead = T)
```



# Final Linkage Graph (Linkage with Lowest SER)
```{r}
Linkages_all <- bind_rows(Linkage1a, Linkage1b,
                     Linkage2a, Linkage2b,
                     Linkage3a, Linkage3b,
                     Pooled)


Linkages_final <- bind_rows(Linkage1a_final, Linkage1b_final,
                     Linkage2a_final, Linkage2b_final,
                     Linkage3a_final, Linkage3b_final,
                     Pooled) %>% 
  arrange(SER)



Linkages_final_graph <- Linkages_final %>% 
  head(1)

title  <- paste(Linkages_final_graph$LinkageGraph, '-', Linkages_final_graph$ModelName, 'Model: DRMP', Linkages_final_graph$knownYcol, 'vs. DRMP BB (2016-2019)')
xTitle <- '2016-2019 DRMP Std.350 BB [Hg] (mg/kg)'
yTitle <- '5yrs OVerlap 2016-2019 DRMP Aq [MeHg] (ng/L)'
knownXlabel <- "Black Bass Imp. Goal"

predictionModelPlot(Linkages_final_graph, knownXlabel=knownXlabel,
                    .predictedYcol=AqGoal, .groupByCol=Subarea, allGroups=sort(unique(BlkBassStd350_FinalModels_2000$Subarea)),
                    title=title, xTitle=xTitle, yTitle=yTitle, showLegend=T)


Linkages_final  %>% 
  head(10) %>%
  select(LinkageGraph, ModelName, knownXcol, knownX, knownYcol, AqGoal, SER) %>% 
  kbl(align='lcc', caption='Top 10 Overall Lowest SER Linkage Models') %>%
  kable_paper('hover', full_width=T) %>%
  kable_styling(fixed_thead = T)

Linkages_final  %>% 
  filter(grepl('mean', knownYcol, ignore.case=T)) %>%
  head(10) %>%
  select(LinkageGraph, ModelName, knownXcol, knownX, knownYcol, AqGoal, SER) %>% 
  kbl(align='lcc', caption='Top 10 Linkage Models using Mean') %>%
  kable_paper('hover', full_width=T) %>%
  kable_styling(fixed_thead = T)
```

# Summary
In total, `r nrow(Linkages_all)` linkage models were considered. The final model chosen was from `r Linkages_final$LinkageGraph[1]` using `r Linkages_final$knownYcol[1]` with a `r Linkages_final$ModelName[1]` regression. The resulting Aqueous MeHg Safe Level is `r signif(as.numeric(Linkages_final$AqGoal[1]), 3)` with a `r signif(as.numeric(Linkages_final$SER[1]), 3)` Standard Error of Regression.


## Save Data used for Linkage Analysis
```{r}
aqData_save <- aqData %>% 
  select(Subarea, SourceID, Project, StationName, SampleDate, Analyte, Unit, Result, MDL, RL, ResultQualCode, Censored) %>%
  rename(ReferenceCode=Project) %>% 
  mutate(SourceID = case_when(SourceID == "CEDENAqSed" ~ "CEDEN",
                              T ~ SourceID),
         ReferenceCode = case_when(SourceID == "R5AQ" ~ ReferenceCode,
                                   T ~ NA_character_)) %>% 
  arrange(Subarea, SampleDate, StationName)


blackBass_2000_save <- blackBass_2000 %>% 
  select(Subarea, SourceID, ProjectCode, StationName, SampleDate, Analyte, Unit, Result, MDL, RL, ResultQualCode, Censored, CommonName, TLAvgLength, TissueName, NumberFishPerComp) %>%
  rename(ReferenceCode=ProjectCode,
         TLAvgLength_mm = TLAvgLength) %>% 
  mutate(SourceID = case_when(SourceID == "CEDENFish" ~ "CEDEN",
                              T ~ SourceID),
         ReferenceCode = case_when(SourceID == "R5MF" ~ ReferenceCode,
                                   T ~ NA_character_)) %>% 
  arrange(Subarea, SampleDate, StationName)


writexl::write_xlsx(list("Linkage Analysis Aq Data"=aqData_save,
                         "Linkage Analysis BB Data"=blackBass_2000_save),
                    paste0(wd, '/Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Appdx A_Data Compilation_Linkage Analysis.xlsx'))
```
## Error: <text>:2:8: unexpected '|'
## 1: ---
## 2: title: |
##           ^

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.8.0    plotly_4.10.0      readxl_1.4.1       actuar_3.3-0      
##  [5] NADA_1.6-1.1       forcats_0.5.2      stringr_1.4.1      dplyr_1.0.9       
##  [9] purrr_0.3.4        readr_2.1.2        tidyr_1.2.0        tibble_3.1.8      
## [13] ggplot2_3.3.6      tidyverse_1.3.2    fitdistrplus_1.1-8 survival_3.4-0    
## [17] MASS_7.3-58.1     
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-45     assertthat_0.2.1    digest_0.6.29       utf8_1.2.2         
##  [5] R6_2.5.1            cellranger_1.1.0    backports_1.4.1     reprex_2.0.2       
##  [9] evaluate_0.16       highr_0.9           httr_1.4.4          pillar_1.8.1       
## [13] rlang_1.0.5         lazyeval_0.2.2      googlesheets4_1.0.1 rstudioapi_0.14    
## [17] data.table_1.14.2   Matrix_1.5-1        splines_4.2.2       googledrive_2.0.0  
## [21] htmlwidgets_1.5.4   munsell_0.5.0       broom_1.0.1         compiler_4.2.2     
## [25] modelr_0.1.9        xfun_0.32           pkgconfig_2.0.3     htmltools_0.5.3    
## [29] tidyselect_1.1.2    fansi_1.0.3         viridisLite_0.4.1   crayon_1.5.1       
## [33] tzdb_0.3.0          dbplyr_2.2.1        withr_2.5.0         grid_4.2.2         
## [37] jsonlite_1.8.0      gtable_0.3.1        lifecycle_1.0.1     DBI_1.1.3          
## [41] magrittr_2.0.3      scales_1.2.1        writexl_1.4.0       cli_3.3.0          
## [45] stringi_1.7.8       fs_1.5.2            xml2_1.3.3          ellipsis_0.3.2     
## [49] generics_0.1.3      vctrs_0.4.1         expint_0.1-7        tools_4.2.2        
## [53] glue_1.6.2          hms_1.1.2           fastmap_1.1.0       colorspace_2.0-3   
## [57] gargle_1.2.0        rvest_1.0.3         knitr_1.40          haven_2.5.1
    Sys.time()
## [1] "2024-01-05 13:10:08 PST"