This report is automatically generated with the R
package knitr
(version 1.39
)
.
--- title: "01_SF Bay" author: "Mercury Program and Basin Planning Unit" date: '2022-05-26' output: html_document: code_folding: show toc: TRUE toc_depth: 3 runtime: shiny assets: css: - "http://fonts.googleapis.com/css?family=Raleway:300" - "http://fonts.googleapis.com/css?family=Oxygen" --- <style> body{ font-family: 'Oxygen', sans-serif; font-size: 16px; line-height: 24px; } h1,h2,h3,h4 { font-family: 'Raleway', sans-serif; } .container { width: 1250px; } h3 { background-color: #D4DAEC; text-indent: 50px; } h4 { text-indent: 75px; margin-top: 35px; margin-bottom: 5px; } </style> ```{r setup, include=FALSE} knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, fig.width=9.5) ``` ```{r Libraries, echo=FALSE} library(shiny) # Had issue trying to set WD with Shiny in R project, reset working directory of rproj wd <- rstudioapi::getActiveProject() source(paste0(wd,"/R Functions/functions_estimate NDDNQ values.R")) source(paste0(wd,"/R Functions/functions_QA data.R")) ``` Define some variables for repeat use later ```{r} # Columns to select columns_mehg_select <- c("SourceID", "SourceRow", "ReferenceCode", "SampleDate", "StationName", "SimpleStationName", "MDL", "RL", "Unit", "Result", "ResultQualCode", "Comments", "Month", "Year", "DaysInMonth", "WaterYear") ``` # Load and Tidy Data ## MeHg: Updated Linkage Data Load data ```{r} mehg_data <- readxl::read_xlsx(paste0(wd,"/Reeval_Source_Analysis/Loss Data/01_SF Bay/X2_Mallard Island.xlsx"), sheet = 1) ``` Tidy Data ```{r} mehg_clean <- mehg_data %>% mutate(SimpleStationName = case_when(grepl("Mallard", StationName, ignore.case = TRUE) ~ "Mallard Island", grepl("x", StationName, ignore.case = TRUE) ~ "X2"), SampleDate = as.Date(SampleDate), # SourceRow = 1:nrow(.), Year = year(SampleDate), Month = month(SampleDate), DaysInMonth = days_in_month(month(SampleDate)), Project = case_when(SourceID == "R5AQ" ~ Project, T ~ NA_character_)) %>% # mutate(DaysInMonth = days_in_month(Month)) %>% rename(ReferenceCode = Project, Comments = ResultsComments) %>% mutate(WetDryPeriod = case_when(Month >=10 & Month <=12 ~ "Wet1", # Wet season Oct-Dec - Start of WaterYear Month >=1 & Month <=4 ~ "Wet2", # Wet season Jan-Apr of Year Month >=5 & Month <=9 ~ "Dry"), # Dry season May-Sept of Year WaterYear = if_else(WetDryPeriod == 'Wet1', Year+1, Year)) %>% # Set WaterYear for "Wet1" to next Year, Set WaterYear for Wet2 & Dry same as Year filter(WaterYear >= 2000 & WaterYear < 2020) %>% dplyr::select(all_of(columns_mehg_select)) ``` Estimate ND/DNQ ```{r} mehgNDDNQs <- robinNDDNQ(mehg_clean, ResultQualCode) ``` View Fitted Distribution Graphs Remove comment marks (#'s) and run chunk to see fitted distribution graph options ```{r, echo=FALSE} ui <- fluidPage( selectInput('distName', label='Select Distribution', choices=names(mehgNDDNQs), selected=1), plotOutput('distPlot') ) server <- function(input, output, session) { output$distPlot <- renderPlot({ plot(mehgNDDNQs[[input$distName]]$fitmodel) # view plot fits title(input$distName, outer=T) }) } shinyApp(ui, server, options=list(height=500)) ``` Select Best Distribution Model ```{r} mehg_final <- mehgNDDNQs$burr$fitted # Selected distribution based on plot showing best fit to data ``` ## Flow: Dayflow Load data ```{r} dayflow <- read_csv(paste0(wd,"/Reeval_Source_Analysis/Loss Data/01_SF Bay/dayflow-results-1997-2020.csv")) ``` Tidy data ```{r} # Select only the outflow into SF Bay dayflow_clean <- dayflow %>% dplyr::select(c("Year", "Month", "Date", "OUT")) dayflow_final <- dayflow_clean %>% # Add water year mutate(WetDryPeriod = case_when(Month >=10 & Month <=12 ~ "Wet1", # Wet season Oct-Dec - Start of WaterYear Month >=1 & Month <=4 ~ "Wet2", # Wet season Jan-Apr of Year Month >=5 & Month <=9 ~ "Dry"), # Dry season May-Sept of Year WaterYear = if_else(WetDryPeriod == 'Wet1', Year+1, Year)) %>% # Set WaterYear for "Wet1" to next Year, Set WaterYear for Wet2 & Dry same as Year mutate(DaysInMonth = days_in_month(Month)) %>% # Add column for days in month pivot_longer(cols = "OUT", names_to = "StationName", values_to = "Flow") %>% mutate(SimpleStationName = "Outflow to SF Bay", SampleDate = as.Date(Date, format = "%m/%d/%Y")) %>% filter(WaterYear >= 2000 & WaterYear < 2020) ``` # Visualize Data ```{r} colors_graphs <- RColorBrewer::brewer.pal(n = 11, name = 'RdYlBu') # [1] "#A50026" "#D73027" "#F46D43" "#FDAE61" "#FEE090" "#FFFFBF" "#E0F3F8" "#ABD9E9" "#74ADD1" "#4575B4" "#313695" color_mehg <- colors_graphs[3] color_flow <- colors_graphs[9] ``` ## MeHg ### By Month ```{r} monthly_mehg_plot <- ggplot(data=mehg_final, aes(x=month(Month, label=T, abbr=T), y=Result, color = "Mallard Island/X2")) + geom_point(size = 3) + scale_y_continuous(expand = expansion(mult = c(0, .1))) + scale_color_manual(name = "Site", values = c("Mallard Island/X2" = color_mehg), labels = function(x) str_wrap(x, width = 15)) + ylab("MeHg Result (ng/L)") + xlab("Month") + theme_light() + theme( text = element_text(size=14, family="sans"), axis.title=element_text(size=14, face="bold"), axis.text.x = element_text(angle = 0, hjust = 0.5), panel.grid.major.y=element_line(size=1), panel.grid.minor=element_blank(), legend.text = element_text(size = 14), legend.title = element_text(size = 14, face="bold")) monthly_mehg_plot ggsave(filename = paste0(wd,'/Reeval_Source_Analysis/Loss Data/01_SF Bay/Output/Fig 6.17 Monthly MeHg.jpg'), plot = monthly_mehg_plot, width = 9, height = 6, units = "in", dpi = 125) ``` ### By Water Year ```{r} yearly_mehg_plot <- ggplot(data=mehg_final, aes(x=WaterYear, y=Result, color = "Mallard Island/X2")) + geom_point(size = 3) + scale_y_continuous(expand = expansion(mult = c(0, .1))) + scale_x_continuous(breaks = seq(2000, 2020, by = 1)) + scale_color_manual(name = "Site", values = c("Mallard Island/X2" = color_mehg), labels = function(x) str_wrap(x, width = 15)) + ylab("MeHg Result (ng/L)") + theme_light() + theme( text = element_text(size=14, family="sans"), axis.title=element_text(size=14, face="bold"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y=element_line(size=1), panel.grid.minor=element_blank(), legend.text = element_text(size = 14), legend.title = element_text(size = 14, face="bold"), ) yearly_mehg_plot ggsave(filename = paste0(wd,'/Reeval_Source_Analysis/Loss Data/01_SF Bay/Output/Fig 6.16 Water Year MeHg.jpg'), plot = yearly_mehg_plot, width = 9, height = 6, units = "in", dpi = 125) ``` ## Flow ### By Month ```{r} monthly_flow_plot <- ggplot(data=dayflow_final, aes(x=month(Month, label=T, abbr=T), y=Flow, color = "Mallard Island/X2")) + geom_point() + scale_color_manual(name = "Site", values = c("Mallard Island/X2" = color_flow), labels = function(x) str_wrap(x, width = 15)) + ylab("Flow (cfs)") + xlab("Month") + theme_light() + theme( text = element_text(size=14, family="sans"), axis.title=element_text(size=14, face="bold"), axis.text.x = element_text(angle = 0, hjust = 0.5), panel.grid.major.y=element_line(size=1), panel.grid.minor=element_blank(), legend.text = element_text(size = 14), legend.title = element_text(size = 14, face="bold")) monthly_flow_plot ggsave(filename = paste0(wd,'/Reeval_Source_Analysis/Loss Data/01_SF Bay/Output/Fig 6.19 Monthly Flow.jpg'), plot = monthly_flow_plot, width = 9, height = 6, units = "in", dpi = 125) ``` ### By Water Year ```{r} yearly_flow_plot <- ggplot(data=dayflow_final, aes(x=WaterYear, y=Flow, color = "Mallard Island/X2")) + geom_point() + scale_x_continuous(breaks = seq(2000, 2019, by = 1)) + scale_color_manual(name = "Site", values = c("Mallard Island/X2" = color_flow), labels = function(x) str_wrap(x, width = 15)) + ylab("Flow (cfs)") + theme_light() + theme( text = element_text(size=14, family="sans"), axis.title=element_text(size=14, face="bold"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y=element_line(size=1), panel.grid.minor=element_blank(), legend.text = element_text(size = 14), legend.title = element_text(size = 14, face="bold")) yearly_flow_plot ggsave(filename = paste0(wd,'/Reeval_Source_Analysis/Loss Data/01_SF Bay/Output/Fig 6.18 Water Year Flow.jpg'), plot = yearly_flow_plot, width = 9, height = 6, units = "in", dpi = 125) ``` # Analyze data ## Save Data Used for Calculations ```{r} mehg_save <- mehg_final %>% select(SourceID, ReferenceCode, StationName, SampleDate, Result, Unit, MDL, RL, ResultQualCode, Censored) dayflow_save <- dayflow_final %>% rename(Result=Flow) %>% mutate(SourceID="Dayflow", ReferenceCode = NA_character_, Unit="cfs", MDL=NA_real_, RL=NA_real_, ResultQualCode=NA_character_, Censored=NA) %>% select(SourceID, ReferenceCode, StationName, SampleDate, Result, Unit, MDL, RL, ResultQualCode, Censored) sf_load_data <- mehg_save %>% bind_rows(., dayflow_save) %>% arrange(desc(Unit), SourceID, SampleDate) writexl::write_xlsx(list("6.3.1 Outflow to SF Bay" = sf_load_data), path=paste0(wd, "/Reeval_Source_Analysis/Loss Data/01_SF Bay/Output/Appdx A_Data Compilation_SF Bay.xlsx")) ``` ## Regression tests ```{r} # Consistent with the 2010 TMDL Staff Report, joining mercury with daily flow when available & matching for the regression analysis mehg_and_flow <- mehg_final %>% left_join(dayflow_final, by = c("SampleDate")) daily_correlation <- cor(mehg_and_flow$Flow, mehg_and_flow$Result) daily_linear_model <- lm(mehg_and_flow$Result ~ mehg_and_flow$Flow) summary(daily_linear_model) # No significant relationship between flow and methylmercury with this method # Monthly method # Group by month then calculate the median mehg_summary <- mehg_final %>% group_by(Month) %>% summarise(MeHgMedian = median(Result)) flow_summary <- dayflow_final %>% group_by(Month) %>% summarise(FlowMedian = median(Flow)) # Regression test mehg_and_flow_summary <- mehg_summary %>% full_join(flow_summary) monthly_correlation <- cor(mehg_and_flow_summary$FlowMedian, mehg_and_flow_summary$MeHgMedian) monthly_linear_model <- lm(mehg_and_flow_summary$MeHgMedian ~ mehg_and_flow_summary$FlowMedian) summary(monthly_linear_model) ggplot(mehg_and_flow_summary, aes(x=FlowMedian, y=MeHgMedian)) + geom_point(size = 5) + ylab("Median Monthly MeHg Result (ng/L)") + xlab("Median Monthly Flow (cfs)") + stat_smooth() # Still no significant relationship between flow and methylmercury, not consistent with 2010 TMDL Staff Report # However, there is more of a correlation than with the exports to southern California dataset ``` ## Calculate Loads Monthly medians ```{r} sfbay_summary <- mehg_and_flow_summary %>% mutate(DaysInMonth = days_in_month(Month)) %>% mutate(VolumeMedian = FlowMedian * 86400 * DaysInMonth * 28.317) %>% mutate(`Loss (g/month)` = VolumeMedian * MeHgMedian / 1000000000) sfbay_annual_loss <- sum(sfbay_summary$`Loss (g/month)`) sfbay_annual_loss ``` ## Water Balance ```{r} # Convert from cfs to million acre-feet flow_only <- flow_summary %>% mutate(DaysInMonth = days_in_month(Month)) %>% mutate(MonthlyVolumeAF = FlowMedian * DaysInMonth * 1.98) sfb_water_balance <- sum(flow_only$MonthlyVolumeAF) / 1000000 sfb_water_balance ``` ## Data to recreate table 6.16 ```{r} # Total number of samples n_samples <- nrow(mehg_final) n_samples # Minimum min <- min(mehg_final$Result) min # Pooled average ave <- mean(mehg_final$Result) ave # Maximum max <- max(mehg_final$Result) max # Pooled median median <- median(mehg_final$Result) median # Annual average avg_mehg_summary <- mehg_final %>% group_by(Month) %>% summarise(MeHgMean = mean(Result)) annual_ave <- mean(avg_mehg_summary$MeHgMean) annual_ave ``` # Alternatives Considered ## Monthly Averages ```{r} # Group by month then calculate the average avg_flow_summary <- dayflow_final %>% group_by(Month) %>% summarise(FlowMean = mean(Flow)) avg_volume_summary <- avg_flow_summary %>% mutate(DaysInMonth = days_in_month(Month)) %>% mutate(VolumeMean = FlowMean * 86400 * DaysInMonth * 28.317) avg_volume_annual <- sum(avg_volume_summary$VolumeMean) # calculate loads with mean volume and median flow med_mehg_and_avg_flow_summary <- mehg_summary %>% full_join(avg_flow_summary) mixed_sfbay_summary <- med_mehg_and_avg_flow_summary %>% mutate(DaysInMonth = days_in_month(Month)) %>% mutate(VolumeMean = FlowMean * 86400 * DaysInMonth * 28.317) %>% mutate(`Loss (g/month)` = VolumeMean * MeHgMedian / 1000000000) mixed_sfbay_annual_loss <- sum(mixed_sfbay_summary$`Loss (g/month)`) mixed_sfbay_annual_loss # Calculate loads avg_mehg_and_flow_summary <- avg_mehg_summary %>% full_join(avg_flow_summary) avg_sfbay_summary <- avg_mehg_and_flow_summary %>% mutate(DaysInMonth = days_in_month(Month)) %>% mutate(VolumeMean = FlowMean * 86400 * DaysInMonth * 28.317) %>% mutate(`Loss (g/month)` = VolumeMean * MeHgMean / 1000000000) avg_sfbay_annual_loss <- sum(avg_sfbay_summary$`Loss (g/month)`) avg_sfbay_annual_loss ```
## Error: <text>:17:1: unexpected '<' ## 16: --- ## 17: < ## ^
The R session information (including the OS info, R version and all packages used):
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## Running under: Windows 10 x64 (build 22621) ## ## Matrix products: default ## ## locale: ## [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=English_United States.utf8 ## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C ## [5] LC_TIME=English_United States.utf8 ## ## attached base packages: ## [1] stats graphics grDevices utils datasets methods base ## ## loaded via a namespace (and not attached): ## [1] rstudioapi_0.13 knitr_1.39 magrittr_2.0.3 tidyselect_1.1.2 ## [5] R6_2.5.1 rlang_1.0.2 fastmap_1.1.0 fansi_1.0.3 ## [9] stringr_1.4.0 highr_0.9 dplyr_1.0.9 tools_4.2.2 ## [13] xfun_0.31 utf8_1.2.2 DBI_1.1.2 cli_3.3.0 ## [17] htmltools_0.5.2 ellipsis_0.3.2 assertthat_0.2.1 yaml_2.3.5 ## [21] readxl_1.4.0 digest_0.6.29 tibble_3.1.7 lifecycle_1.0.1 ## [25] crayon_1.5.1 RColorBrewer_1.1-3 purrr_0.3.4 vctrs_0.4.1 ## [29] glue_1.6.2 evaluate_0.15 rmarkdown_2.14 stringi_1.7.6 ## [33] compiler_4.2.2 pillar_1.7.0 cellranger_1.1.0 generics_0.1.2 ## [37] writexl_1.4.0 pkgconfig_2.0.3
Sys.time()
## [1] "2023-12-26 14:51:30 PST"