This report is automatically generated with the R
package knitr
(version 1.40
)
.
--- title: 'Delta Hg TMDL Review' subtitle: 'Linkage Analysis - Aq MeHg vs. Black Bass THg' author: "Mercury Program and Basin Planning Unit" date: "8/4/2021" output: html_document: code_folding: show toc: TRUE toc_float: TRUE toc_depth: 3 runtime: shiny assets: css: - "http://fonts.googleapis.com/css?family=Raleway:300" - "http://fonts.googleapis.com/css?family=Oxygen" --- <style> body{ font-family: 'Oxygen', sans-serif; font-size: 16px; line-height: 24px; } h1,h2,h3,h4 { font-family: 'Raleway', sans-serif; } .container { width: 1250px; } h3 { background-color: #D4DAEC; text-indent: 50px; } h4 { text-indent: 75px; margin-top: 35px; margin-bottom: 5px; } </style> ```{r setup, include=FALSE} knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, fig.width=9.5) ``` ```{r Libraries, include=FALSE, message=FALSE} library(knitr) # generate html tables for this report library(kableExtra) # better formatting of tables library(DT) # creates sortable tables library(shiny) library(matrixStats) # weightedMedian # Having issue with runtime:shiny resetting the WD of rproj so need to use setwd() wd <- rstudioapi::getActiveProject() setwd(wd) source("R Functions/functions_estimate NDDNQ values.R") # Load first so select() isn't masked from dplyr source("R Functions/functions_QA data.R") source("R Functions/function_predictionModels.R") source("R Functions/function_predictionModelPlot.R") source("R Functions/function_AqSampleFishModelPairing.R") # Function to round values in a dataframe signif_df <- function(x, digits) { # round numeric columns # x = data frame # digits = number of digits to round numeric_columns <- sapply(x, mode) == 'numeric' x[numeric_columns] <- signif(x[numeric_columns], digits) x } ``` ## Load Data ```{r} setwd(wd) # Load Data from "eval1_TLG Eval & BB Impl Goal" load('Reeval_Impl_Goals_Linkage_Analysis/eval1_TLG Eval & BB Impl Goal/Output/blackBass_2000.RData') # loads "blackBass_2000" ~ Final Black Bass raw data load('Reeval_Impl_Goals_Linkage_Analysis/eval1_TLG Eval & BB Impl Goal/Output/BlkBassStd350_FinalModels_2000.RData') # loads "BlkBassStd350_FinalModels_2000" ~ Final Black Bass Std 350 [Hg] Models for each Year load('Reeval_Impl_Goals_Linkage_Analysis/eval1_TLG Eval & BB Impl Goal/Output/BBImpGoal.RData') # loads "BBImpGoal" loads final Black Bass Implementation Goal (calculated using Black Bass data weighted average by year for each subarea) # Load Aqueous Data aqdf <- loadData("Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/6a_AqMaster_noRepeats.xlsx") # Convert columns that are character but should be numeric or date aqdf <- chara_to_NumDate(aqdf) ``` # Aqueous Data QC **This markdown, shows the finalized results from "Test Different Linkage Analysis Aq & BB Year Pairings" markdown, which evaluated data year range selection and which statistic to use for the linkage model.** ## View Unique Factors ```{r} aqdf %>% mutate(AnalyteUnitMatrix = paste(Analyte, '-', Unit, '-', MatrixName)) %>% unique_factors(SourceID, Subarea, AnalyteUnitMatrix, WBT) ``` ## Change X2 Subarea to West Delta ```{r} aqData1 <- aqdf %>% mutate(Subarea = recode(Subarea, "X2" = "West Delta"), # Change X2 to West Delta, consistent with the 2010 TMDL Staff Report Type = 'Aqueous', Linkage = case_when(SampleDate >= "2000-03-01" & SampleDate < "2000-11-01" & grepl("Delta Mendota Canal @ Tracy|Freeport|Mokelumne-Co|Mokelumne River d/s I-5|Greene's Landing|Vernalis|State Water Project|X2", StationName) ~ '2010 Staff Report', T ~ 'Updated')) # Add column to distinguish Updated Linkage data from 2010 Staff Report Linkage data ``` ## Select "Methylmercury, Total", "ng/L", "Aqueous" ```{r} # Select only aqueous MeHg as in 2010 TMDL Staff Report aqData2 <- aqData1 %>% filter(Analyte == 'Methylmercury, Total', Unit == 'ng/L', MatrixName == 'Aqueous') ``` ## View Factors for Final Data Selection ```{r} aqData2 %>% mutate(AnalyteUnitMatrix = paste(Analyte, '-', Unit, '-', MatrixName)) %>% unique_factors(SourceID, Subarea, AnalyteUnitMatrix, WBT) ``` ## Estimate ND & DNQ Values ```{r} aqNDDNQs <- robinNDDNQ(aqData2, ResultQualCode) ``` ### View Fitted Distribution Graphs ```{r, echo=FALSE} ui <- fluidPage( selectInput('distName', label='Select Distribution', choices=names(aqNDDNQs), selected=1), plotOutput('distPlot') ) server <- function(input, output, session) { output$distPlot <- renderPlot({ plot(aqNDDNQs[[input$distName]]$fitmodel) # view plot fits title(input$distName, outer=T) }) } shinyApp(ui, server, options=list(height=500)) ``` ### Select Best Distribution Model ```{r} aqData3 <- aqNDDNQs$llogis$fitted # Selected distribution - primarily based on CDF & P-P plot showing best fit to data ``` ## Check for Outliers e.g., orders of magnitude maybe issues with unit conversion ```{r} # View Data for Entire Delta result_time_Plotly(aqData3 %>% filter(Censored == F), groupByCol=TMDL, showMean=F, interactive=T, logscale=T, showLegend=F) ``` ```{r, fig.height=8.5} # View Data for Each Subarea result_time_Plotly(aqData3 %>% filter(Censored == F), groupByCol=Subarea, showMean=T, interactive=T, logscale=T, showLegend=F) ``` ```{r} # View NDs & DNQs result_time_Plotly(aqData3 %>% filter(Censored == T), groupByCol=TMDL, showMean=F, interactive=T, logscale=T, showLegend=F) ``` ## Finalize QC & Data Selection ```{r} # Select DRMP Data aqDRMP_data <- aqData3 %>% filter(grepl('DRMP', SourceID, ignore.case=T), !grepl('YB', Subarea, ignore.case=T)) %>% # remove Yolo Bypass (north removed from Black Bass during TLG Eval). South Yolo Bypass was originally included in year range and statistic evaluations but removed as it appears to be an outlier attributed to different hydrologic conditions and resulting fish Hg exposure compared to the other subareas (which contain fish year round). arrange(Subarea, SampleDate) ``` ### Black Bass Data Selection ```{r} # Add Linkage Designation to raw data bbData_2000_2019 <- blackBass_2000 %>% mutate(Linkage = case_when(SampleDate >= as.Date("2000-08-01") & SampleDate < as.Date("2000-10-01") & grepl("544ST0909|Mokelumne River/d/s Cosumnes River|544ST0934|Isleton|510ST1312|Vernalis|541ST1481|510ST1666|510ST1304", StationName) ~ '2010 Staff Report', T ~ 'Updated'), # Add column to distinguish Updated Linkage data from 2010 Staff Report Linkage data Type = case_when(Linkage == "2010 Staff Report" ~ "Largemouth Bass", T ~ "Black Bass")) bbData_2016_2019 <- bbData_2000_2019 %>% filter(Year >= 2016) # model result data BlkBassStd350_FinalModels_2000 <- BlkBassStd350_FinalModels_2000 %>% filter(Year >= 2016) ``` ### Aq & BB Data Sig Fig Count ```{r} # Function to Find # of Sig Figs - does not count trailing 0's but those don't exist from excel anyway sigdigs <- function(x){ orig_scipen <- getOption("scipen") options(scipen = 999) on.exit(options(scipen = orig_scipen)) x <- as.character(x) x <- sub("\\.", "", x) x <- gsub("(^0+|0+$)", "", x) nchar(x) } aqData_decimals <- aqDRMP_data %>% mutate(SigFigs = map_dbl(aqDRMP_data$Result, sigdigs), Type = 'Aqueous') %>% select(SigFigs, Type) blackBass_decimals <- bbData_2016_2019 %>% mutate(SigFigs = map_dbl(bbData_2016_2019$Result, sigdigs), Type = 'Black Bass') %>% select(SigFigs, Type) aqData_decimals %>% bind_rows(blackBass_decimals) %>% count(Type, SigFigs) %>% rename(Occurences = n) %>% kbl(align='lcc', caption="Significant Figure Count - Does not count trailing 0's") %>% kable_paper('hover', full_width=T) %>% kable_styling(fixed_thead = T) # Aqueous data predominantly at least 2 sig figs, so round Aq Impl goal to 2 sig figs (15 sig figs are estimated ND/DNQ values) # Black data predominantly 3 sig figs, so round BB Impl goal to 3 sig figs sigdigs(BBImpGoal) ``` ### Save Linkage Model Aq & BB Data Sets & Sampling Locations ```{r} setwd(wd) aqDRMP_data_save <- aqDRMP_data %>% select(Subarea, SourceID, Project, StationName, SampleDate, Analyte, Unit, Result, MDL, RL, ResultQualCode, Censored) %>% rename(ReferenceCode=Project) %>% mutate(SourceID = case_when(SourceID == "CEDENAqSed" ~ "CEDEN", T ~ SourceID), ReferenceCode = case_when(SourceID == "R5AQ" ~ ReferenceCode, T ~ NA_character_)) %>% arrange(Subarea, SampleDate, StationName) bbData_2016_2019_save <- bbData_2016_2019 %>% select(Subarea, SourceID, ProjectCode, StationName, SampleDate, Analyte, Unit, Result, MDL, RL, ResultQualCode, Censored, CommonName, TLAvgLength, TissueName, NumberFishPerComp) %>% rename(ReferenceCode=ProjectCode, TLAvgLength_mm = TLAvgLength) %>% mutate(SourceID = case_when(SourceID == "CEDENFish" ~ "CEDEN", T ~ SourceID), ReferenceCode = case_when(SourceID == "R5MF" ~ ReferenceCode, T ~ NA_character_)) %>% arrange(Subarea, SampleDate, StationName) # Save Linkage Model Data Sets writexl::write_xlsx(list("Linkage Model Aq Data" = aqDRMP_data_save, "Linkage Model BB Data"=bbData_2016_2019_save), path='Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Appdx A_Data Compilation_Linkage Model.xlsx') # Save Linkage Model Sampling Locations aq_SampLoc <- aqDRMP_data %>% # Type & Linkage columns previously added when creating "aqData2" mutate(uniqName = paste(StationName, Linkage, round(TargetLatitude,4), round(TargetLongitude,4))) %>% # argetLat & TargetLong needs to be the same code used in Step 3 Script when creating variable "AqMaster" in mutate(uniqName = ...) distinct(uniqName, .keep_all=T) %>% select(Subarea, StationName, Type, Linkage, TargetLatitude, TargetLongitude, CoordSystem, uniqName) bb_SampLoc <- bbData_2016_2019 %>% mutate(uniqName = paste(StationName, Linkage, round(TargetLatitude,4), round(TargetLongitude,4))) %>% # TargetLat & TargetLong needs to be the same code used in Step 3 Script when creating variable "AqMaster" in mutate(uniqName = ...) distinct(uniqName, .keep_all=T) %>% select(Subarea, StationName, Type, Linkage, TargetLatitude, TargetLongitude, CoordSystem, uniqName) SampLoc <- aq_SampLoc %>% bind_rows(., bb_SampLoc) %>% # combine BB & Aqueous sampling locations arrange(Type, Subarea, Linkage) writexl::write_xlsx(SampLoc, path='Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Aq&BB_SampleLocations.xlsx') ``` # UPDATED LINKAGE GRAPH ## 5yr Aq DRMP Data Overlap (2016-2019) vs. DRMP Black Bass (2016-2019) by Subarea ```{r} Linkage.DRMPAq.DRMPBByr <- AqSampleFishModelPairing(aqData=aqDRMP_data, fishModels=BlkBassStd350_FinalModels_2000, bylocationColumn=Subarea, aqYearColumn=seasonYear, aqOverlapYrs=5) Linkage.DRMPAq.DRMPBByr.Subarea <- Linkage.DRMPAq.DRMPBByr %>% group_by(Subarea) %>% mutate(sampleYears = aq_sampleYears[which.max(aq_n)]) %>% ungroup() %>% group_by(Subarea, sampleYears) %>% summarize(n = n(), Std350_Median = median(Stdz350), Aq_Median_ofPooledMedians = median(aq_Median_pool), UsedInLinkage = 'Yes', .groups = 'drop') updated_linkage <- predictionModels(Linkage.DRMPAq.DRMPBByr.Subarea, .knownYcol=Aq_Median_ofPooledMedians, .knownXcol=Std350_Median, knownX=BBImpGoal) %>% rename(aqMeHgSafeConc = predictedY) %>% mutate(aqMeHgSafeConc = signif(aqMeHgSafeConc, 2)) %>% ungroup %>% head(1) # select best model. in this case it is log ``` ### Linkage Graph ```{r} title <- "" xTitle <- 'Median Hg Conc. of Standardized 350 mm Black Bass (mg/Kg)' yTitle <- 'Median of Pooled Medians of Unfiltered Aqueous MeHg (ng/L)' # extract data and model dataframe <- updated_linkage$data[[1]] dfKnownX <- updated_linkage$knownX # Regression Model model <- updated_linkage$Model[[1]] modelName <- updated_linkage$ModelName model_predict <- tibble(Std350_Median = seq(floor( min(dataframe %>% pull(Std350_Median), dfKnownX)*95)/100, ceiling(max(dataframe %>% pull(Std350_Median), dfKnownX)*105)/100, length.out=500)) model_predictUpdate <- model_predict %>% mutate(Aq_Median_ofPooledMedians = case_when(modelName == 'pwr' | modelName == 'exp' ~ exp(predict(model, model_predict %>% select(Std350_Median))), T ~ predict(model, model_predict %>% select(Std350_Median)))) #ggplot shapes & colors Subareas <- sort(unique(BlkBassStd350_FinalModels_2000$Subarea)) mySubareashapes <- c(15, 16, 17, 18, 8, 22, 21, 23, 24, 4) Subareashapes <- setNames(mySubareashapes[1:length(Subareas)], Subareas) # color safe palette - https://personal.sron.nl/~pault/#sec:qualitative mySubareacolors <- c('#0077BB','#33BBEE','#009988','#EE7733','#CC3311','#EE3377', '#919191') # Changed '#BBBBBB' light gray to '#919191' darker gray Subareacolors <- setNames(mySubareacolors[1:length(Subareas)], Subareas) used_colors_orig <- Subareacolors[unique(dataframe %>% pull(Subarea))] used_shapes_orig <- Subareashapes[unique(dataframe %>% pull(Subarea))] # Graph Limits xMax <- ceiling(max(model_predictUpdate[[1]])*10)/10 yMax <- ceiling(max(model_predictUpdate[[2]], max(dataframe %>% select(Aq_Median_ofPooledMedians)))*10)/10 #Plot Data Points & Regression Line regressionGraph <- ggplot() + ggtitle(title) + xlab(xTitle) + ylab(yTitle) + # Data Points geom_point(data=dataframe, aes(x=Std350_Median, y=Aq_Median_ofPooledMedians, color=Subarea, shape=Subarea), alpha=1, size=4, stroke=1.5, show.legend=T) + # Regression Line geom_point(data=model_predictUpdate, aes(x=Std350_Median, y=Aq_Median_ofPooledMedians), color='grey20', size=.3, show.legend=F) + # Text - Regression Equation annotate("text", x=1.05, hjust=0, y=.1+.004, label=paste0("italic(y)==", round(model$coefficients['(Intercept)'],3), "~'+'~", round(model$coefficients['log(Std350_Median)'],4), "~'*'~'ln('*italic(x)*')'"), parse=T) + # Text - Regression Model SER (Standard Error of Regression) annotate("text", x=1.05, hjust=0, y=.1-.003, # updated_linkage %>% pull(aqMeHgSafeConc)*1.9, label=paste0("italic(~SER) == ", sigfig(updated_linkage$SER,3)), parse=T) + #Scale & Theme scale_x_continuous(expand = c(0, 0), limits = c(0, xMax)) + scale_y_continuous(expand = c(0, 0), limits = c(0, yMax)) + scale_color_manual(values = used_colors_orig) + scale_shape_manual(values = used_shapes_orig) + theme_light() + theme(text = element_text(size=14, family="sans"), axis.title=element_text(size=12, face="bold"), axis.text.x = element_text(size=10.5, angle = 0, hjust = 0.5), panel.grid.major.y=element_line(size=1), panel.grid.minor=element_blank(), legend.text = element_text(size = 14), legend.title = element_text(size = 14, face="bold")) regressionGraph #Add Prediction Line predictionGraph <- regressionGraph + #Vertical Line annotate(geom="segment", size=0.8, arrow=arrow(type="open", length=unit(0.02, "npc")), x=updated_linkage$knownX, xend=updated_linkage$knownX, y=0, yend=updated_linkage %>% pull(aqMeHgSafeConc), color='grey20') + #Vertical Line Text - Known X annotate("text", x=0.27, hjust=0, y=0.02, label=paste0("italic(x)== ", updated_linkage$knownX, "[~Black~Bass~Implementation~Goal]"), parse=T, color='grey20') + #Horizontal Line annotate(geom="segment", size=0.8, arrow=arrow(type="open", length=unit(0.02, "npc")), x=updated_linkage$knownX, xend=0, y=updated_linkage %>% pull(aqMeHgSafeConc), yend=updated_linkage %>% pull(aqMeHgSafeConc), color='grey20') + #Horizontal Line Text - Predicted Y annotate("text", x=.027, hjust=0, y=updated_linkage %>% pull(aqMeHgSafeConc) + .004, label=paste0('italic(y)== ', sigfig(updated_linkage %>% pull(aqMeHgSafeConc), 3)), parse=T, color='grey20') predictionGraph ggsave(filename = paste0(wd, '/Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Fig 5.2 Updated Linkage Analysis.png'), plot = predictionGraph, width = 9, height = 5.75, units = "in", dpi = 125) ``` ### Linkage Data Table ```{r} Linkage.DRMPAq.DRMPBByr.Subarea %>% kbl(align='lccc', caption='Linkage Data by Subarea') %>% kable_paper('hover', full_width=T) %>% kable_styling(fixed_thead = T) Linkage.DRMPAq.DRMPBByr %>% select(Subarea, Year, n, Stdz350, aq_sampleYears, aq_n, aq_Median_pool) %>% kbl(align='lcccccc', caption='Linkage Data used to find Medians by Subarea') %>% kable_paper('hover', full_width=T) %>% kable_styling(fixed_thead = T) ``` # UPDATED LINKAGE vs. 2010 Staff Report LINKAGE ## Prepare 2010 Staff Report Linkage Data ```{r} LMBgoal <- 0.24 # Summarize Aq data to Recreate Orig Linkage Linkage2010_aqData <- aqData3 %>% filter(Linkage == "2010 Staff Report") %>% substituteNDDNQ(ResultQualCode, MDL.RL.fraction=.5) aq_origLinkage1 <- Linkage2010_aqData %>% group_by(Subarea, Unit, month(SampleDate)) %>% summarise(n = n(), Avg = mean(Result), .groups='drop') aq_origLinkage <- aq_origLinkage1 %>% group_by(Subarea, Unit) %>% summarise(Aq_n = sum(n), Aq_Mean = mean(Avg), Aq_Median = median(Avg), .groups='drop') %>% rename(Aq_Unit = Unit) # Summarize Fish Data to Recreate Orig Linkage Linkage2010_lmbData <- bbData_2000_2019 %>% filter (Linkage == "2010 Staff Report") %>% substituteNDDNQ(ResultQualCode, MDL.RL.fraction=.5) Linkage2010_lmbData_350models <- Linkage2010_lmbData %>% add_count(Subarea) %>% group_by(Subarea, Analyte, Unit) %>% nest %>% mutate(PwrReg = purrr::map(.x = data, .f = ~lm(log(Result) ~ log(TLAvgLength), data=.))) %>% # helpful link showing power regression in R (equivalent to power regression used in Excel) https://stackoverflow.com/questions/18305852/power-regression-in-r-similar-to-excel mutate(Stand350_pwrReg = purrr::map_dbl(.x = PwrReg, .f = ~signif(exp(summary(.x)$coefficients[1]), 1) * 350^summary(.x)$coefficients[2])) %>% #### mutate(ExpoReg = purrr::map(.x = data, .f = ~lm(log(Result) ~ TLAvgLength, data=.))) %>% mutate(Stand350_expoReg = purrr::map_dbl(.x = ExpoReg, .f = ~signif(exp(summary(.x)$coefficients[1]), 3) * exp(signif(summary(.x)$coefficients[2], 2) * 350) )) %>% #### mutate(Rsq_pwrReg = purrr::map_dbl(.x = PwrReg, .f = ~summary(.x)$r.squared)) %>% mutate(Rsq_expoReg = purrr::map_dbl(.x = ExpoReg, .f = ~summary(.x)$r.squared)) %>% select(-PwrReg, -ExpoReg) %>% unnest(cols = Stand350_pwrReg) %>% ungroup() %>% arrange(Subarea) Linkage2010_lmbData_350 <- Linkage2010_lmbData_350models %>% select(-starts_with("Rsq")) %>% pivot_longer( cols = Stand350_pwrReg:Stand350_expoReg, names_to = "Model", names_prefix = "Stand350_", values_to = "Stand350" ) %>% arrange(Subarea, Model) Linkage2010_lmbData_rsq <- Linkage2010_lmbData_350models %>% select(-starts_with("Stand350")) %>% pivot_longer( cols = Rsq_pwrReg:Rsq_expoReg, names_to = "Model", names_prefix = "Rsq_", values_to = "Rsq" ) %>% arrange(Subarea, Model) Linkage2010_lmbData_Stand350 <- Linkage2010_lmbData_350 %>% mutate(Rsq = Linkage2010_lmbData_rsq$Rsq) %>% group_by(Subarea) %>% filter(Rsq == max(Rsq)) %>% select(Subarea, lmb_data=data, lmb_Analyte=Analyte, lmb_Unit=Unit, lmb_350=Stand350, lmb_model=Model, lmb_rsq=Rsq) # Merge 2010 Staff Report Aq & LMB Summary Data for Linkage Graph orig_linkageData <- aq_origLinkage %>% left_join(., Linkage2010_lmbData_Stand350, "Subarea") %>% drop_na() %>% mutate(Aq_Mean = round(Aq_Mean, 3), Aq_Median = round(Aq_Median, 3), lmb_350 = round(lmb_350, 2)) ``` ## View Updated & 2010 Staff Report Linkage Graph ```{r} # 2010 Staff Report Linkage Data Model Prediction Line origModel_predictRange <- tibble(lmb_350=seq(floor(min(LMBgoal, BBImpGoal, orig_linkageData$lmb_350, Linkage.DRMPAq.DRMPBByr.Subarea$Std350_Median) * 95) /100, ceiling(max(orig_linkageData$lmb_350, Linkage.DRMPAq.DRMPBByr.Subarea$Std350_Median) * 105) /100, length.out=500)) orig_linkageModels <- predictionModels(orig_linkageData, .knownYcol=Aq_Mean, .knownXcol=lmb_350, knownX=LMBgoal) %>% rename(aqMeHgSafeConc = predictedY) orig_linkage <- orig_linkageModels %>% filter(ModelName == 'pwr') %>% mutate(aqMeHgSafeConc = signif(aqMeHgSafeConc, 2)) orig_linkage_model <- orig_linkage$Model[[1]] model_predictOrig <- origModel_predictRange %>% mutate(Aq_Mean = case_when(orig_linkage$ModelName == 'pwr' | orig_linkage$ModelName == 'exp' ~ exp(predict(orig_linkage_model, origModel_predictRange %>% select(lmb_350))), T ~ predict(orig_linkage_model, origModel_predictRange %>% select(lmb_350))) ) # PLOT DATA POINTS & REGRESSION MODELS DMCP_vs_Orig_LinkageGraph <- predictionGraph + # ggtitle("2010 Staff Report Linkage Data [2000] & Updated Linkage [DRMP 2016-2019]") + xlab('Hg Conc. of Standardized 350mm Largemouth or Black Bass (mg/Kg)') + ylab('Unfiltered Aqueous MeHg (ng/L)') + #2010 Staff Report Linkage Data Points geom_point(data=orig_linkageData, aes(x=lmb_350, y=Aq_Mean, shape=Subarea), alpha=1, size=3.5, stroke=1.5, color='grey46', show.legend=F) + #2010 Staff Report Linkage Model Regression Line geom_point(data=model_predictOrig, aes(x=lmb_350, y=Aq_Mean), color='grey46', size=.3, show.legend=F) + # Text - Regression Equation annotate("text", x=1.05, hjust=0, y=.185+.005, label=paste0("italic(y)==", round(exp(orig_linkage_model$coefficients['(Intercept)']),3), "~'*'~ italic(x) ^", round(orig_linkage_model$coefficients['log(lmb_350)'],3)), parse=T, color='grey46') + # Text - Regression Model SER (Standard Error of Regression) annotate("text", x=1.05, hjust=0, y=.185-.004, # updated_linkage %>% pull(aqMeHgSafeConc)*1.9, label=paste0("italic(~SER) == ", sigfig(orig_linkage$SER,3)), parse=T, color='grey46') DMCP_vs_Orig_LinkageGraph # ADD MODEL PREDICTION VALUES DMCP_vs_Orig_LinkageGraph_prediction <- DMCP_vs_Orig_LinkageGraph + #2010 Staff Report Linkage Vertical Line geom_segment(aes(x=LMBgoal, xend=LMBgoal, y=0, yend=orig_linkage$aqMeHgSafeConc), size=0.8, arrow=arrow(type="open", length=unit(0.02, "npc")), color='grey46') + #2010 Staff Report Linkage Vertical Line Text - Linkage LMB Goal annotate("text", x=.27, hjust=0, y=0.03, label=paste0("italic(x)== ", LMBgoal, "[~Largemouth~Bass~Implementation~Goal]"), parse=T, color='grey46') + # label=paste0('LMB<sub>goal</sub>=',LMBgoal,' mg/Kg '), color='grey56') + #2010 Staff Report Linkage Horizontal Line geom_segment(aes(x=LMBgoal, xend=0, y=orig_linkage$aqMeHgSafeConc, yend=orig_linkage$aqMeHgSafeConc), size=0.8, arrow=arrow(type="open", length=unit(0.02, "npc")), color='grey46') + #2010 Staff Report Linkage Horizontal Line Text - Predicted Aqueous Concentration annotate("text", x=.027, hjust=0, y=orig_linkage$aqMeHgSafeConc + .004, label=paste0('italic(y)== ', sigfig(orig_linkage$aqMeHgSafeConc,2)), parse=T, color='grey46') DMCP_vs_Orig_LinkageGraph_prediction ggsave(filename = paste0(wd, '/Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Fig 5.3 Linkage Analysis Comparison.png'), plot = DMCP_vs_Orig_LinkageGraph_prediction, width = 9, height = 5.75, units = "in", dpi = 125) ``` ## Effect of Switching Axes for 2010 Staff Report Linkage Analysis 2010 Staff Report Linkage plotted Largemouth Bass data (Y-axis) vs. Aqueous data (X-axis). The resulting regression equation requires solving for X to determine the Aqueous [Hg] Implementation Goal. Because R's predict() solves for Y and not X, test to see if switching coordinates has an effect on calculating the Aq [Hg] Implementation Goal ```{r} origLinakge_predictAq <- function(lmb_goal){ signif((lmb_goal/exp(PwrReg$coefficients[1]))^(1/PwrReg$coefficients[2]), 3) } # Calculate Aq Implementation Goal using LMB 350mm vs. Aq (Method for Linkage used in 2010 TMDL Staff Report) PwrReg <- lm(log(lmb_350) ~ log(Aq_Mean), data=orig_linkageData) aqMeHgSafeConc_calc <- origLinakge_predictAq(LMBgoal) aqMeHgSafeConc_pred <- NA # axes are in wrong order & can't be done in R Rsq <- signif(summary(PwrReg)$r.square, 2) RMSE <- signif(sqrt(mean((origLinakge_predictAq(orig_linkageData$lmb_350) - orig_linkageData$Aq_Mean)^2)), 3) # Calculate Aq Implementation Goal using Aq vs. LMB 350 (New Method) PwrReg_flip <- lm(log(Aq_Mean) ~ log(lmb_350), data=orig_linkageData) aqMeHgSafeConc_flip_calc <- signif(exp(PwrReg_flip$coefficients[1])*LMBgoal^PwrReg_flip$coefficients[2], 2) aqMeHgSafeConc_flip_pred <- signif(exp(predict(PwrReg_flip, tibble(lmb_350=LMBgoal))), 2) aqMeHgSafeConc_flip_Rsq <- signif(summary(PwrReg_flip)$r.square, 2) aqMeHgSafeConc_flip_RMSE <- sqrt(mean((exp(predict(PwrReg_flip)) - orig_linkageData$Aq_Mean)^2)) coordFlipResultSummary <- data.frame('_' = c('Aq Hg Goal (Calc)', 'Aq Hg Goal (R Predict)', 'R^2', 'RMSE'), OrigLinkage = c(aqMeHgSafeConc_calc, aqMeHgSafeConc_pred, Rsq, RMSE), OrigLinkageCoordFlip = c(aqMeHgSafeConc_flip_calc, aqMeHgSafeConc_flip_pred, aqMeHgSafeConc_flip_Rsq, aqMeHgSafeConc_flip_RMSE)) coordFlipResultSummary %>% signif_df(., 3) %>% kbl(align='lcc', caption='Comparison of Aq Hg Goal After Flipping Coords') %>% kable_paper('hover', full_width=T) %>% kable_styling(fixed_thead = T) # Switching axes for 2010 Staff Report linkage results in different Aq [Hg] Implementation Goal. However, Board staff decided to update the linkage graphs as Aqueous data (Y-axis) vs. Largemouth Bass data (X-axis). This is because solving for Y given X is (a) a more standard mathematical approach, and (b) is easily implemented in R using the predict() function. In addition, the difference from switching coordinates (0.066 for the 2010 Staff Report graph vs 0.0689 with coordinates switched) is not considerable. ``` # UPDATED LINKAGE: Margin of Safety Analysis ## View Probability Histogram of Aqueous Safe Concentrations ```{r} setwd(wd) load('Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/randomSampled_FINAL_median_5yr DRMP Aq & BB 2016-2019.RData') # loads randomSampled_AqSafeConc ~ random sampling results ggplot() + geom_histogram(aes(x=randomSampled_AqSafeConc$aqMeHgSafeConcs$SER, y=after_stat(ncount)), alpha = 0.5, position = 'identity') + geom_vline(xintercept=quantile(randomSampled_AqSafeConc$aqMeHgSafeConcs$SER, .5), color='red', size=1) + annotate(geom='text', x=quantile(randomSampled_AqSafeConc$aqMeHgSafeConcs$SER, .5) - .0005, y=.5, label='Median', color='red', angle=90)+ geom_vline(xintercept=quantile(randomSampled_AqSafeConc$aqMeHgSafeConcs$SER, .05), color='red', size=1.2, linetype='dashed') + annotate(geom='text', x=quantile(randomSampled_AqSafeConc$aqMeHgSafeConcs$SER, .05) - .0005, y=.5, label='5th Percentile', color='red', angle=90)+ geom_vline(xintercept=quantile(randomSampled_AqSafeConc$aqMeHgSafeConcs$SER, .95), color='red', size=1.2, linetype='dashed') + annotate(geom='text', x=quantile(randomSampled_AqSafeConc$aqMeHgSafeConcs$SER, .95) - .0005, y=.5, label='95th Percentile', color='red', angle=90)+ theme_light() ``` ## View Histogram of Aqueous Safe Conc Linkage Model Errors ```{r} wt.avg5yr_BestErrors <- randomSampled_AqSafeConc$aqMeHgSafeConcs %>% filter(SER <= quantile(SER, 1)) # quantile(SER, 1) uses all the models wg_hist <- ggplot() + xlab("Predicted Protective Aqueous MeHg Concentration (ng/L)") + ylab("Frequency") + geom_histogram(aes(x=wt.avg5yr_BestErrors$aqMeHgSafeConc, y=after_stat(count)), alpha = 0.5, position = 'identity') + geom_vline(xintercept=updated_linkage$aqMeHgSafeConc, color='black', size=1, linetype='dashed') + annotate(geom='text', x=updated_linkage$aqMeHgSafeConc - .0006, y=505, hjust=0, label=paste('DMCP Review Linkage Model =',updated_linkage$aqMeHgSafeConc), color='black', angle=90)+ geom_vline(xintercept=quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .5), color='red', size=1) + annotate(geom='text', x=quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .5) - .0006, y=505, hjust=0, label=paste('Median =',signif(quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .5),3)), color='red', angle=90)+ geom_vline(xintercept=signif(quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .05), 2), color='red', size=1.2, linetype='dashed') + annotate(geom='text', x=signif(quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .05), 2) - .0006, y=505, hjust=0, label=paste('5th Percentile =',signif(quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .05), 2)), color='red', angle=90) + scale_y_continuous(expand=c(0,0), limits = c(0, 1500)) + theme_light() + theme(text = element_text(size=14, family="sans"), axis.title=element_text(size=14, face="bold"), axis.text = element_text(size=14, angle = 0, hjust = 0.5), panel.grid.major.y=element_line(size=1), # panel.grid.minor=element_blank(), legend.text = element_text(size = 14), legend.title = element_text(size = 14, face="bold")) wg_hist ggsave(filename = paste0(wd, '/Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Fig 5.4 Probability Distribution of Aqueous Protective Concs.png'), plot = wg_hist, width = 9, height = 5.75, units = "in", dpi = 125) signif(quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .5), 4) # Median value updated_linkage$aqMeHgSafeConc (aq_HgGoal <- signif(quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .05), 2)) # Aq water goal w/ Margin of Safety # Calculate Margin of Safety MarginSafety <- signif((1 - aq_HgGoal/updated_linkage$aqMeHgSafeConc) * 100, 2) names(MarginSafety) <- 'Margin of Safety %' MarginSafety ``` # Aqueous Percent Reduction Needed ```{r} # Get median MeHg for Subareas not used in the Linkage Analysis using same methods as AqSampleFishModelPairing() - then calc median of pooled median CCSB <- aqData3 %>% filter(Subarea == 'Cache Creek Settling Basin') YoloBypassNorth_CCSB <- aqData3 %>% filter(Subarea == 'YB-North') %>% bind_rows(CCSB) %>% mutate(Subarea = "YB-North w/ CCSB") YoloBypass_CCSB <- aqData3 %>% filter(grepl("yolo", Subarea, ignore.case=T)) %>% # filter out subareas used in linkage bind_rows(CCSB) %>% mutate(Subarea = "YB N & S w/ CCSB") dataNotInLinkage <- aqData3 %>% filter(Subarea %are not% unique(Linkage.DRMPAq.DRMPBByr.Subarea$Subarea)) %>% bind_rows(CCSB, YoloBypassNorth_CCSB, YoloBypass_CCSB) AqResults_NotInLinkage <- NULL AqResultSummary_NotInLinkage <- NULL subareasNotInLinkage <- dataNotInLinkage %>% distinct(Subarea) %>% pull for(subarea in subareasNotInLinkage){ ## Gather Aq Samples ## aqSubarea <- dataNotInLinkage %>% filter(Subarea == subarea) sampleYears_temp <- aqSubarea %>% filter(seasonYear > 2010) %>% distinct(seasonYear) %>% arrange(seasonYear) %>% pull # only using data after 2010 sampleYears <- sampleYears_temp[ifelse(length(sampleYears_temp)>=5, length(sampleYears_temp)-4, 1):length(sampleYears_temp)] # Get 5 Years of most recent data for (k in 1:length(sampleYears)) { aqSampleYr <- aqSubarea %>% filter(seasonYear %in% sampleYears[1:k]) # Include earliest year and then consecutive subsequent years - as what was done in AqSampleFishModelPairing() # Calc Median Aq Concs of Pooled data aqSampleYrs <- paste(aqSampleYr %>% distinct(seasonYear) %>% pull %>% sort, collapse=", ") # Collapse Aq Smple Years for storage in dataframe tempAqSummary <- tibble(Subarea = subarea, aq_sampleYears = aqSampleYrs, aq_n = nrow(aqSampleYr), aq_Median_pool = median(aqSampleYr$Result)) # Store Results AqResults_NotInLinkage <- bind_rows(AqResults_NotInLinkage, aqSampleYr) AqResultSummary_NotInLinkage <- bind_rows(AqResultSummary_NotInLinkage, tempAqSummary) } } AqResultSummary_NotInLinkage %>% kbl(align='lccc', caption='Pooled Median Aq [MeHg] for Subareas Not Used in Linkage Analysis') %>% kable_paper('hover', full_width=T) %>% kable_styling(fixed_thead = T) AqResults_SubareasNotInLinkage <- AqResultSummary_NotInLinkage %>% group_by(Subarea) %>% mutate(sampleYears = aq_sampleYears[which.max(aq_n)]) %>% ungroup() %>% group_by(Subarea, sampleYears) %>% summarize(n = n(), Std350_Median = NA_real_, Aq_Median_ofPooledMedians = median(aq_Median_pool), modelAqConc = NA_real_, UsedInLinkage = 'No', .groups = 'drop') subarea_percArea <- tibble('Central Delta'=.31,'Moke/Cos Rivers'=.01,'Marsh Creek'=.02,'Sacramento River'=.25,'San Joaquin River'=.16,'West Delta'=.07,'YB-North w/ CCSB'=.05,'YB-South'=.13) %>% # only include values for suabreas to be used to calculate weightedMedian below pivot_longer(cols=everything(), names_to='Subarea', values_to='perc_Area') SubareaPercReduction <- Linkage.DRMPAq.DRMPBByr.Subarea %>% group_by(Subarea) %>% mutate(modelAqConc = purrr::map_dbl(.x=updated_linkage$Model[1], .f = ~predict(.x, tibble(Std350_Median = cur_data()$Std350_Median)))) %>% ungroup %>% select(Subarea, Std350_Median, Aq_Median_ofPooledMedians, modelAqConc, UsedInLinkage, n, sampleYears) %>% bind_rows(AqResults_SubareasNotInLinkage) %>% left_join(., subarea_percArea, by='Subarea') %>% # adds perc_Area column mutate( # Std350_Median = signif(Std350_Median, 3), # Aq_Median_ofPooledMedians = signif(Aq_Median_ofPooledMedians, 2), # `2010 Method % Reduction` = round((Aq_Median_ofPooledMedians - aq_HgGoal)/Aq_Median_ofPooledMedians * 100, 1), `2010 Method % Reduction` = (Aq_Median_ofPooledMedians - aq_HgGoal)/Aq_Median_ofPooledMedians * 100, # `Model Aq Conc Method % Reduction` = round((modelAqConc - aq_HgGoal)/modelAqConc * 100, 1) `Model Aq Conc Method % Reduction` = (modelAqConc - aq_HgGoal)/modelAqConc * 100 ) %>% add_row(Subarea = "Median Weighted by perc_Area", summarize(., across(contains(c('Pooled','Final')), ~ weightedMedian(.x, w=perc_Area, na.rm=T)))) %>% # doesn't include subareas with a perc_Area of NA mutate(`Final Aq Conc Method % Reduction` = ifelse(is.na(modelAqConc), (Aq_Median_ofPooledMedians - aq_HgGoal)/Aq_Median_ofPooledMedians * 100, (modelAqConc - aq_HgGoal)/modelAqConc * 100)) %>% select(Subarea, perc_Area, everything()) SubareaPercReduction %>% kbl(align='lccc', caption='Percent Aq [MeHg] Reductions Needed for each Subarea') %>% kable_paper('hover', full_width=T) %>% kable_styling(fixed_thead = T) ``` ## Save Data Tables ```{r} setwd(wd) writexl::write_xlsx(SubareaPercReduction, path='Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Table 5.1_Aq Percent Reduction Needed for Each Subarea.xlsx') writexl::write_xlsx(Linkage.DRMPAq.DRMPBByr, path='Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Table 5._LinkageDataUsedToFindFinalMediansBySubarea.xlsx') ```
## Error: <text>:19:1: unexpected '<' ## 18: ## 19: < ## ^
The R session information (including the OS info, R version and all packages used):
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## Running under: Windows 10 x64 (build 22621) ## ## Matrix products: default ## ## locale: ## [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=C ## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C ## [5] LC_TIME=English_United States.utf8 ## system code page: 65001 ## ## attached base packages: ## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: ## [1] lubridate_1.8.0 plotly_4.10.0 readxl_1.4.1 actuar_3.3-0 ## [5] NADA_1.6-1.1 forcats_0.5.2 stringr_1.4.1 dplyr_1.0.9 ## [9] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0 tibble_3.1.8 ## [13] ggplot2_3.3.6 tidyverse_1.3.2 fitdistrplus_1.1-8 survival_3.4-0 ## [17] MASS_7.3-58.1 ## ## loaded via a namespace (and not attached): ## [1] lattice_0.20-45 assertthat_0.2.1 digest_0.6.29 utf8_1.2.2 ## [5] R6_2.5.1 cellranger_1.1.0 backports_1.4.1 reprex_2.0.2 ## [9] evaluate_0.16 highr_0.9 httr_1.4.4 pillar_1.8.1 ## [13] rlang_1.0.5 lazyeval_0.2.2 googlesheets4_1.0.1 rstudioapi_0.14 ## [17] data.table_1.14.2 Matrix_1.5-1 rmarkdown_2.16 splines_4.2.2 ## [21] googledrive_2.0.0 htmlwidgets_1.5.4 munsell_0.5.0 broom_1.0.1 ## [25] compiler_4.2.2 modelr_0.1.9 xfun_0.32 pkgconfig_2.0.3 ## [29] htmltools_0.5.3 tidyselect_1.1.2 fansi_1.0.3 viridisLite_0.4.1 ## [33] crayon_1.5.1 tzdb_0.3.0 dbplyr_2.2.1 withr_2.5.0 ## [37] grid_4.2.2 jsonlite_1.8.0 gtable_0.3.1 lifecycle_1.0.1 ## [41] DBI_1.1.3 magrittr_2.0.3 scales_1.2.1 writexl_1.4.0 ## [45] cli_3.3.0 stringi_1.7.8 fs_1.5.2 xml2_1.3.3 ## [49] ellipsis_0.3.2 generics_0.1.3 vctrs_0.4.1 expint_0.1-7 ## [53] tools_4.2.2 glue_1.6.2 hms_1.1.2 yaml_2.3.5 ## [57] fastmap_1.1.0 colorspace_2.0-3 gargle_1.2.0 rvest_1.0.3 ## [61] knitr_1.40 haven_2.5.1
Sys.time()
## [1] "2024-01-05 15:39:39 PST"