This report is automatically generated with the R package knitr (version 1.39) .

---
title: "01_SF Bay"
author: "Mercury Program and Basin Planning Unit"
date: '2022-05-26'
output:
  html_document:
    code_folding: show
    toc: TRUE
    
    toc_depth: 3
runtime: shiny
assets:
  css:
    - "http://fonts.googleapis.com/css?family=Raleway:300"
    - "http://fonts.googleapis.com/css?family=Oxygen"
---
<style>
body{
  font-family: 'Oxygen', sans-serif;
  font-size: 16px;
  line-height: 24px;
}

h1,h2,h3,h4 {
  font-family: 'Raleway', sans-serif;
}

.container { width: 1250px; }
h3 {
  background-color: #D4DAEC;
  text-indent: 50px; 
}
h4 {
  text-indent: 75px;
  margin-top: 35px;
  margin-bottom: 5px;
}
</style>

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, fig.width=9.5)
```


```{r Libraries, echo=FALSE}
library(shiny)

# Had issue trying to set WD with Shiny in R project, reset working directory of rproj
wd <- rstudioapi::getActiveProject()
source(paste0(wd,"/R Functions/functions_estimate NDDNQ values.R"))
source(paste0(wd,"/R Functions/functions_QA data.R"))
```

Define some variables for repeat use later
```{r}
# Columns to select
columns_mehg_select <- c("SourceID", "SourceRow", "ReferenceCode", "SampleDate", "StationName", "SimpleStationName", "MDL", "RL", "Unit", "Result", "ResultQualCode", "Comments", "Month", "Year", "DaysInMonth", "WaterYear")
```

# Load and Tidy Data
## MeHg: Updated Linkage Data
Load data
```{r}
mehg_data <- readxl::read_xlsx(paste0(wd,"/Reeval_Source_Analysis/Loss Data/01_SF Bay/X2_Mallard Island.xlsx"),
                           sheet = 1)
```

Tidy Data
```{r}
mehg_clean <- mehg_data %>% 
  mutate(SimpleStationName = case_when(grepl("Mallard", StationName, ignore.case = TRUE)
                              ~ "Mallard Island",
                              grepl("x", StationName, ignore.case = TRUE)
                              ~ "X2"),
         SampleDate = as.Date(SampleDate),
         # SourceRow = 1:nrow(.),
         Year = year(SampleDate),
         Month = month(SampleDate),
         DaysInMonth = days_in_month(month(SampleDate)),
         
         Project = case_when(SourceID == "R5AQ" ~ Project,
                             T ~ NA_character_)) %>% 
  # mutate(DaysInMonth = days_in_month(Month)) %>% 
  rename(ReferenceCode = Project,
         Comments = ResultsComments) %>% 
  mutate(WetDryPeriod = case_when(Month >=10 & Month <=12 ~ "Wet1",   # Wet season Oct-Dec - Start of WaterYear
                            Month >=1  & Month <=4  ~ "Wet2",   # Wet season Jan-Apr of Year
                            Month >=5  & Month <=9 ~ "Dry"),   # Dry season May-Sept of Year
         WaterYear = if_else(WetDryPeriod == 'Wet1', Year+1, Year)) %>%   # Set WaterYear for "Wet1" to next Year, Set WaterYear for Wet2 & Dry same as Year
  filter(WaterYear >= 2000 & WaterYear < 2020) %>% 
  dplyr::select(all_of(columns_mehg_select))
```

Estimate ND/DNQ
```{r}
mehgNDDNQs <- robinNDDNQ(mehg_clean, ResultQualCode)
```

View Fitted Distribution Graphs
Remove comment marks (#'s) and run chunk to see fitted distribution graph options
```{r, echo=FALSE}
ui <- fluidPage(
  selectInput('distName', label='Select Distribution', choices=names(mehgNDDNQs), selected=1),
  plotOutput('distPlot')
)

server <- function(input, output, session) {
  output$distPlot <- renderPlot({
  plot(mehgNDDNQs[[input$distName]]$fitmodel) # view plot fits
    title(input$distName, outer=T)
})
}

shinyApp(ui, server, options=list(height=500))
```

Select Best Distribution Model
```{r}
mehg_final <- mehgNDDNQs$burr$fitted # Selected distribution based on plot showing best fit to data
```



## Flow: Dayflow
Load data
```{r}
dayflow <- read_csv(paste0(wd,"/Reeval_Source_Analysis/Loss Data/01_SF Bay/dayflow-results-1997-2020.csv"))
```

Tidy data
```{r}
# Select only the outflow into SF Bay
dayflow_clean <- dayflow %>%
  dplyr::select(c("Year", "Month", "Date", "OUT"))


dayflow_final <- dayflow_clean %>% 
  # Add water year
  mutate(WetDryPeriod = case_when(Month >=10 & Month <=12 ~ "Wet1",   # Wet season Oct-Dec - Start of WaterYear
                            Month >=1  & Month <=4  ~ "Wet2",   # Wet season Jan-Apr of Year
                            Month >=5  & Month <=9 ~ "Dry"),   # Dry season May-Sept of Year
         WaterYear = if_else(WetDryPeriod == 'Wet1', Year+1, Year)) %>%   # Set WaterYear for "Wet1" to next Year, Set WaterYear for Wet2 & Dry same as Year
  mutate(DaysInMonth = days_in_month(Month)) %>%  # Add column for days in month
  pivot_longer(cols = "OUT",
               names_to = "StationName",
               values_to = "Flow") %>% 
  mutate(SimpleStationName = "Outflow to SF Bay",
         SampleDate = as.Date(Date, format = "%m/%d/%Y")) %>% 
  filter(WaterYear >= 2000 & WaterYear < 2020)
```


# Visualize Data
```{r}
colors_graphs <- RColorBrewer::brewer.pal(n = 11, name = 'RdYlBu')
# [1] "#A50026" "#D73027" "#F46D43" "#FDAE61" "#FEE090" "#FFFFBF" "#E0F3F8" "#ABD9E9" "#74ADD1" "#4575B4" "#313695"

color_mehg <- colors_graphs[3]

color_flow <- colors_graphs[9]
```

## MeHg
### By Month
```{r}
monthly_mehg_plot <- ggplot(data=mehg_final, 
                            aes(x=month(Month, label=T, abbr=T), 
                                y=Result,
                                color = "Mallard Island/X2")) +
  geom_point(size = 3) + 
  scale_y_continuous(expand = expansion(mult = c(0, .1))) +
  scale_color_manual(name = "Site",
                     values = c("Mallard Island/X2" = color_mehg),
                     labels = function(x) str_wrap(x, width = 15)) +
  ylab("MeHg Result (ng/L)") +
  xlab("Month") +
  theme_light() + 
  theme(
    text = element_text(size=14, family="sans"),
    axis.title=element_text(size=14, face="bold"), 
    axis.text.x = element_text(angle = 0, hjust = 0.5),     
    panel.grid.major.y=element_line(size=1),
    panel.grid.minor=element_blank(),
    legend.text = element_text(size = 14), 
    legend.title = element_text(size = 14, face="bold"))
monthly_mehg_plot


ggsave(filename = paste0(wd,'/Reeval_Source_Analysis/Loss Data/01_SF Bay/Output/Fig 6.17 Monthly MeHg.jpg'),
       plot = monthly_mehg_plot,
       width = 9,
       height = 6,
       units = "in",
       dpi = 125)
```

### By Water Year
```{r}
yearly_mehg_plot <- ggplot(data=mehg_final, 
                           aes(x=WaterYear, 
                               y=Result,
                               color = "Mallard Island/X2")) +
  geom_point(size = 3) + 
  scale_y_continuous(expand = expansion(mult = c(0, .1))) +
  scale_x_continuous(breaks = seq(2000, 2020, by = 1)) +
  scale_color_manual(name = "Site",
                     values = c("Mallard Island/X2" = color_mehg),
                     labels = function(x) str_wrap(x, width = 15)) +
  ylab("MeHg Result (ng/L)") +
  theme_light() + 
  theme(
    text = element_text(size=14, family="sans"),
    axis.title=element_text(size=14, face="bold"), 
    axis.text.x = element_text(angle = 45, hjust = 1),     
    panel.grid.major.y=element_line(size=1),
    panel.grid.minor=element_blank(),
    legend.text = element_text(size = 14), 
    legend.title = element_text(size = 14, face="bold"),
   )
yearly_mehg_plot

ggsave(filename = paste0(wd,'/Reeval_Source_Analysis/Loss Data/01_SF Bay/Output/Fig 6.16 Water Year MeHg.jpg'),
       plot = yearly_mehg_plot,
       width = 9,
       height = 6,
       units = "in",
       dpi = 125)
```



## Flow
### By Month
```{r}
monthly_flow_plot <- ggplot(data=dayflow_final, 
                            aes(x=month(Month, label=T, abbr=T), 
                                y=Flow,
                                color = "Mallard Island/X2")) +
  geom_point() + 
  scale_color_manual(name = "Site",
                     values = c("Mallard Island/X2" = color_flow),
                     labels = function(x) str_wrap(x, width = 15)) +
  ylab("Flow (cfs)") +
  xlab("Month") +
  theme_light() + 
  theme(
    text = element_text(size=14, family="sans"),
    axis.title=element_text(size=14, face="bold"), 
    axis.text.x = element_text(angle = 0, hjust = 0.5),     
    panel.grid.major.y=element_line(size=1),
    panel.grid.minor=element_blank(),
    legend.text = element_text(size = 14), 
    legend.title = element_text(size = 14, face="bold"))
monthly_flow_plot


ggsave(filename = paste0(wd,'/Reeval_Source_Analysis/Loss Data/01_SF Bay/Output/Fig 6.19 Monthly Flow.jpg'),
       plot = monthly_flow_plot,
       width = 9,
       height = 6,
       units = "in",
       dpi = 125)
```

### By Water Year
```{r}
yearly_flow_plot <- ggplot(data=dayflow_final, 
                           aes(x=WaterYear, 
                               y=Flow,
                               color = "Mallard Island/X2")) +
  geom_point() + 
  scale_x_continuous(breaks = seq(2000, 2019, by = 1)) +
  scale_color_manual(name = "Site",
                     values = c("Mallard Island/X2" = color_flow),
                     labels = function(x) str_wrap(x, width = 15)) +
  ylab("Flow (cfs)") +
  theme_light() + 
  theme(
    text = element_text(size=14, family="sans"),
    axis.title=element_text(size=14, face="bold"), 
    axis.text.x = element_text(angle = 45, hjust = 1),     
    panel.grid.major.y=element_line(size=1),
    panel.grid.minor=element_blank(),
    legend.text = element_text(size = 14), 
    legend.title = element_text(size = 14, face="bold"))
yearly_flow_plot


ggsave(filename = paste0(wd,'/Reeval_Source_Analysis/Loss Data/01_SF Bay/Output/Fig 6.18 Water Year Flow.jpg'),
       plot = yearly_flow_plot,
       width = 9,
       height = 6,
       units = "in",
       dpi = 125)
```



# Analyze data
## Save Data Used for Calculations
```{r}
mehg_save <- mehg_final %>% 
  select(SourceID, ReferenceCode, StationName, SampleDate, Result, Unit, MDL, RL, ResultQualCode, Censored)

dayflow_save <- dayflow_final %>% 
  rename(Result=Flow) %>% 
  mutate(SourceID="Dayflow",
         ReferenceCode = NA_character_,
         Unit="cfs",
         MDL=NA_real_,
         RL=NA_real_,
         ResultQualCode=NA_character_,
         Censored=NA) %>% 
  select(SourceID, ReferenceCode, StationName, SampleDate, Result, Unit, MDL, RL, ResultQualCode, Censored)

sf_load_data <- mehg_save %>% 
  bind_rows(., dayflow_save) %>% 
  arrange(desc(Unit), SourceID, SampleDate)


writexl::write_xlsx(list("6.3.1 Outflow to SF Bay" = sf_load_data),
                    path=paste0(wd, "/Reeval_Source_Analysis/Loss Data/01_SF Bay/Output/Appdx A_Data Compilation_SF Bay.xlsx"))
```

## Regression tests
```{r}
# Consistent with the 2010 TMDL Staff Report, joining mercury with daily flow when available & matching for the regression analysis
mehg_and_flow <- mehg_final %>% 
  left_join(dayflow_final, by = c("SampleDate"))

daily_correlation <- cor(mehg_and_flow$Flow, mehg_and_flow$Result)

daily_linear_model <- lm(mehg_and_flow$Result ~ mehg_and_flow$Flow)
summary(daily_linear_model)
# No significant relationship between flow and methylmercury with this method


# Monthly method
# Group by month then calculate the median
mehg_summary <- mehg_final %>% 
  group_by(Month) %>% 
  summarise(MeHgMedian = median(Result))

flow_summary <- dayflow_final %>% 
  group_by(Month) %>% 
  summarise(FlowMedian = median(Flow))

# Regression test
mehg_and_flow_summary <- mehg_summary %>% 
  full_join(flow_summary)

monthly_correlation <- cor(mehg_and_flow_summary$FlowMedian, mehg_and_flow_summary$MeHgMedian)

monthly_linear_model <- lm(mehg_and_flow_summary$MeHgMedian ~ mehg_and_flow_summary$FlowMedian)
summary(monthly_linear_model)

ggplot(mehg_and_flow_summary, aes(x=FlowMedian, y=MeHgMedian)) +
  geom_point(size = 5) + 
  ylab("Median Monthly MeHg Result (ng/L)") +
  xlab("Median Monthly Flow (cfs)") +
  stat_smooth()
# Still no significant relationship between flow and methylmercury, not consistent with 2010 TMDL Staff Report
# However, there is more of a correlation than with the exports to southern California dataset
```


## Calculate Loads
Monthly medians
```{r}
sfbay_summary <- mehg_and_flow_summary %>% 
  mutate(DaysInMonth = days_in_month(Month)) %>% 
  mutate(VolumeMedian = FlowMedian * 86400 * DaysInMonth * 28.317) %>%
  mutate(`Loss (g/month)` = VolumeMedian * MeHgMedian / 1000000000)

sfbay_annual_loss <- sum(sfbay_summary$`Loss (g/month)`)
sfbay_annual_loss
```

## Water Balance
```{r}
# Convert from cfs to million acre-feet 
flow_only <- flow_summary %>% 
  mutate(DaysInMonth = days_in_month(Month)) %>% 
  mutate(MonthlyVolumeAF = FlowMedian * DaysInMonth * 1.98)
sfb_water_balance <- sum(flow_only$MonthlyVolumeAF) / 1000000
sfb_water_balance
```

## Data to recreate table 6.16
```{r}
# Total number of samples
n_samples <- nrow(mehg_final)
n_samples

# Minimum
min <- min(mehg_final$Result)
min

# Pooled average
ave <- mean(mehg_final$Result)
ave

# Maximum
max <- max(mehg_final$Result)
max

# Pooled median
median <- median(mehg_final$Result)
median

# Annual average
avg_mehg_summary <- mehg_final %>% 
  group_by(Month) %>% 
  summarise(MeHgMean = mean(Result))
annual_ave <- mean(avg_mehg_summary$MeHgMean)
annual_ave
```



# Alternatives Considered
## Monthly Averages
```{r}
# Group by month then calculate the average
avg_flow_summary <- dayflow_final %>% 
  group_by(Month) %>% 
  summarise(FlowMean = mean(Flow))

avg_volume_summary <- avg_flow_summary %>% 
  mutate(DaysInMonth = days_in_month(Month)) %>% 
  mutate(VolumeMean = FlowMean * 86400 * DaysInMonth * 28.317)

avg_volume_annual <- sum(avg_volume_summary$VolumeMean)

# calculate loads with mean volume and median flow
med_mehg_and_avg_flow_summary <- mehg_summary %>% 
  full_join(avg_flow_summary)

mixed_sfbay_summary <- med_mehg_and_avg_flow_summary %>% 
  mutate(DaysInMonth = days_in_month(Month)) %>% 
  mutate(VolumeMean = FlowMean * 86400 * DaysInMonth * 28.317) %>%
  mutate(`Loss (g/month)` = VolumeMean * MeHgMedian / 1000000000)

mixed_sfbay_annual_loss <- sum(mixed_sfbay_summary$`Loss (g/month)`)
mixed_sfbay_annual_loss

# Calculate loads
avg_mehg_and_flow_summary <- avg_mehg_summary %>% 
  full_join(avg_flow_summary)

avg_sfbay_summary <- avg_mehg_and_flow_summary %>% 
  mutate(DaysInMonth = days_in_month(Month)) %>% 
  mutate(VolumeMean = FlowMean * 86400 * DaysInMonth * 28.317) %>%
  mutate(`Loss (g/month)` = VolumeMean * MeHgMean / 1000000000)

avg_sfbay_annual_loss <- sum(avg_sfbay_summary$`Loss (g/month)`)
avg_sfbay_annual_loss
```
## Error: <text>:17:1: unexpected '<'
## 16: ---
## 17: <
##     ^

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] rstudioapi_0.13    knitr_1.39         magrittr_2.0.3     tidyselect_1.1.2  
##  [5] R6_2.5.1           rlang_1.0.2        fastmap_1.1.0      fansi_1.0.3       
##  [9] stringr_1.4.0      highr_0.9          dplyr_1.0.9        tools_4.2.2       
## [13] xfun_0.31          utf8_1.2.2         DBI_1.1.2          cli_3.3.0         
## [17] htmltools_0.5.2    ellipsis_0.3.2     assertthat_0.2.1   yaml_2.3.5        
## [21] readxl_1.4.0       digest_0.6.29      tibble_3.1.7       lifecycle_1.0.1   
## [25] crayon_1.5.1       RColorBrewer_1.1-3 purrr_0.3.4        vctrs_0.4.1       
## [29] glue_1.6.2         evaluate_0.15      rmarkdown_2.14     stringi_1.7.6     
## [33] compiler_4.2.2     pillar_1.7.0       cellranger_1.1.0   generics_0.1.2    
## [37] writexl_1.4.0      pkgconfig_2.0.3
    Sys.time()
## [1] "2023-12-26 14:51:30 PST"