This report is automatically generated with the R
package knitr
(version 1.39
)
.
--- title: '03_Wastewater Treatment Plants' subtitle: 'Aqueous MeHg Data Evaluation' author: "Mercury Program and Basin Planning Unit" date: "3/1/2022" output: html_document: code_folding: show toc: TRUE toc_float: TRUE toc_depth: 3 runtime: shiny assets: css: - "http://fonts.googleapis.com/css?family=Raleway:300" - "http://fonts.googleapis.com/css?family=Oxygen" --- --- <style> body{ font-family: 'Oxygen', sans-serif; font-size: 16px; line-height: 24px; } h1,h2,h3,h4 { font-family: 'Raleway', sans-serif; } .container { width: 1250px; } h3 { background-color: #D4DAEC; text-indent: 50px; } h4 { text-indent: 75px; margin-top: 35px; margin-bottom: 5px; } </style> ```{r setup, include=FALSE} knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, fig.width=9.5) ``` ## Load Packages, Functions, & Data ```{r Libraries, echo=FALSE} library(janitor) # for clean_names() library(kableExtra) # better formatting of tables library(shiny) # Had issue trying to set WD with Shiny in R project, reset working directory of rproj wd <- rstudioapi::getActiveProject() setwd(wd) source("R Functions/functions_estimate NDDNQ values.R") source("R Functions/functions_QA data.R") # Read MeHg Data set esmr <- readxl::read_xlsx("Reeval_Source_Analysis/Source Data/03a_Municipal WWTPs (NPDES)/Data/03_NPDES_eSMR MeHg Data.xlsx", guess_max = 300000, sheet = "OIMA eSMR FixErrors-AddLathrop") %>% clean_names() facilityInfo <- readxl::read_xlsx("Reeval_Source_Analysis/Source Data/03a_Municipal WWTPs (NPDES)/Data/03_NPDES Facility Info.xlsx", guess_max = 300000, sheet = "Final Table") %>% select(Discharger, Facility, eSMR_Name, GraphName, FacilityType, Treatment, Subarea, Status, Permit, Receiving_water) %>% filter(Status %in% c("Active", "Closing", "New")) %>% distinct(eSMR_Name, .keep_all=T) ``` ## Clean Data ### Fix Column Names ```{r} ### LIST COLUMNS TO BE USED, ADD USER DEFINED COLUMNS, & RENAME COLUMNS TO CEDEN STANDARDS ### #Use 1.READ ME.xlsx, 'ColumnsForR' to list & identify columns that match corresponding CEDEN Standard columns keep_cols <- c( 'source_row', 'source_id', 'station_name', 'location_place_type', 'location', 'parameter', 'sample_date', 'unit', 'result', 'mdl', 'rl', 'result_qual_code', 'sampling_time', 'analytical_method', 'lattitude', 'longitude', 'location_desc' ) temp_cols <- c('facility_place_id', 'location_place_id', 'calculated_method', 'ml', 'analysis_date', 'analysis_time', 'qa_codes', 'comments', 'analytical_method_code', 'report_name', 'smr_document_id', 'review_priority_indicator', 'receiving_water_body') #Include columns that do not match CEDEN standards but may be useful (e.g., Unit columns for MDL & RL) #temp_cols are removed before the data is merged with other datasets esmr_correct_columns <- esmr %>% dplyr::select(keep_cols,temp_cols) %>% #DO NOT CHANGE - selects columns specified above rename( #Rename worksheet columns to CEDEN format here: CEDEN 'COLUMNNAME' = WORKSHEET 'COLUMNNAME' #DELTE COLUMN NAMES THAT DO NOT HAVE AN EQUIVALENT COLUMN IN THE WORKSHEET SourceRow = source_row, SourceID = source_id, StationName = station_name, StationCode = location_place_type, SampleTypeCode = location, Analyte = parameter, SampleDate = sample_date, Unit = unit, Result = result, MDL = mdl, RL = rl, ResultQualCode = result_qual_code, SampleTime = sampling_time, MethodName = analytical_method, TargetLatitude = lattitude, TargetLongitude = longitude, CollectionComments = location_desc ) ``` ### Format Column Parameters ```{r} esmr_correct_columns <- chara_to_NumDate(esmr_correct_columns) # Check STATIONCODE for unexpected parameter variances esmr_correct_columns %>% unique_factors(StationCode) # Filter for Effluent Monitoring esmr_eff <- esmr_correct_columns %>% filter(StationCode == 'Effluent Monitoring') # Check RESULTQUALCODE for unexpected parameter variances esmr_eff %>% unique_factors(ResultQualCode) # Change "<" to "ND" esmr_qualcode <- esmr_eff %>% mutate(ResultQualCode = recode(ResultQualCode, "<" = "ND")) # Check if Result, MDL, & RL Columns all equal <NA> or 0 - these rows have no useful information nrow(esmr_qualcode) #Number rows before #CODE BELOW REQUIRES USER TROUBLESHOOTING DEPENDING ON AVAILABLE COLUMNS AND SPREADSHEET SPECIFIC CONDITIONS # esmr_qualcode <- esmr_qualcode %>% #Set 0 & negative values as blank mutate(Result = ifelse(Result <= 0, NA_real_, Result), MDL = ifelse(MDL <= 0, NA_real_, MDL), RL = ifelse(RL <= 0, NA_real_, RL)) na_results <- esmr_qualcode %>% #Record rows where Result, MDL, & RL all equal <NA> filter( is.na(Result) & is.na(MDL) & is.na(RL) ) nrow(na_results) esmr_qualcode <- anti_join(esmr_qualcode, na_results) #returns rows from esmr_qualcode not matching values in no_result nrow(esmr_qualcode) #Number rows after # Format Units Column - "ng/L", "mg/Kg" unique(esmr_qualcode$Unit) #Identifies OLDNAMES # If more than 1 unit colmn exists (e.g., for RL and MDL columns) see WQP script for example on merging into 1 column esmr_units <- esmr_qualcode %>% filter(Unit %in% c("ng/L", "ug/L", "mg/L")) %>% standardizeUnits() unique(esmr_qualcode$Unit) #New naming structure for Unit groupings # Add Discharger, Facility, GraphName, FacilityType, Treatment, Subarea, Permit, & Receiving_water columns esmr_facility_info <- esmr_units %>% left_join(., facilityInfo, by=c("StationName"="eSMR_Name")) ### REMOVE TEMPORARY COLUMNS ### esmr_formatted <- esmr_facility_info %>% dplyr::select(-one_of(temp_cols)) %>% #Remove temp columns since they are no longer needed arrange(StationName, SampleDate) %>% chara_to_NumDate #custom function to format numbers & dates from chr ``` ### Graph Data to see if any Order of Magnitude errors ```{r} esmr_formatted_plot <- esmr_formatted %>% # add columns so plotting function works mutate(Shape = NA) result_time_Plotly(esmr_formatted_plot, SourceID, logscale=F) ``` ## Compare Data to CVCWA Control Study ### Data Summary Appendix B (pdf p265) ```{r} esmr_formatted_subND <- substituteNDDNQ(esmr_formatted, ResultQualCode, MDL.RL.fraction=0.5) esmr_cvcwa_summary <- esmr_formatted_subND %>% group_by(StationName) %>% filter(SampleDate >= "2014-10-01" & SampleDate <= "2017-09-30") %>% summarize(NDs = nrow(cur_data() %>% filter(ResultQualCode == 'ND')), # this operation needs to go 1st for some reason, maybe an issue with cur_data()?? Avg = mean(Result), Count = n(), Percent_NDs = NDs/Count * 100, minDate = min(SampleDate), maxDate = max(SampleDate)) %>% select(StationName, Avg, Count, NDs, Percent_NDs, minDate, maxDate) esmr_cvcwa_summary %>% kbl(align='lccc', caption='For Comparison to CVCWA Control Study Appendix B (pdf p265)') %>% kable_paper('hover', full_width=T) %>% kable_styling(fixed_thead = T) # writexl::write_xlsx(esmr_cvcwa_summary, paste0(wd, "Reeval_Source_Analysis/Source Data/03a_Municipal WWTPs (NPDES)/Data/03_eSMR Data Summary_Reproduce 10-2014_09-2017 CVCWA.xlsx") ``` ## Final Data Eval & Save ### Estimate ND & DNQ Values ```{r} esmr_formatted_distND <- robinNDDNQ(esmr_formatted_plot, ResultQualCode) ``` ### View Fitted Distribution Graphs ```{r, echo=FALSE} ui <- fluidPage( selectInput('distName', label='Select Distribution', choices=names(esmr_formatted_distND), selected=1), plotOutput('distPlot') ) server <- function(input, output, session) { output$distPlot <- renderPlot({ plot(esmr_formatted_distND[[input$distName]]$fitmodel) # view plot fits title(input$distName, outer=T) }) } shinyApp(ui, server, options=list(height=500)) ``` ### Set Final ND/DNQ Estimation ```{r} esmr_final <- esmr_formatted_distND$weibull$fitted # Selected distribution - primarily based on CDF & P-P plot showing best fit to data ``` ### Graph Individual WWTPs - Compare w/ Appendix C (pdf p218-244) ```{r, echo=FALSE} ui <- fluidPage( selectInput('wwtp', label='Select WWTP', choices=unique(esmr_final$StationName), selected=1), plotlyOutput('wwtpPlot') ) server <- function(input, output, session) { data1 <- reactive({ esmr_final %>% # add columns so plotting function works mutate( Shape = NA) %>% # filter for selected wwtp filter(StationName == input$wwtp) }) output$wwtpPlot <- renderPlotly({ req(input$wwtp) result_time_Plotly(data1(), StationName, logscale=F) }) } shinyApp(ui, server, options=list(height=500)) ``` ### Final Data Summary ```{r} esmr_final %>% group_by(Discharger, Facility) %>% summarize(NDDNQs = nrow(cur_data() %>% filter(ResultQualCode != '=')), # this operation needs to go 1st for some reason, maybe an issue with cur_data()?? Min = min(Result), Max = max(Result), Median = median(Result), Avg = mean(Result), Count = n(), Percent_NDDNQs = NDDNQs/Count * 100, minDate = min(SampleDate), maxDate = max(SampleDate)) %>% # select(StationName, Min, Max, Median, Avg, Count, NDDNQs, Percent_NDDNQs, minDate, maxDate) %>% kbl(align='lccc', caption='NPDES MeHg Data Summary') %>% kable_paper('hover', full_width=T) %>% kable_styling(fixed_thead = T) ``` # Export to excel ```{r} writexl::write_xlsx(esmr_final, paste0(wd, "/Reeval_Source_Analysis/Source Data/03a_Municipal WWTPs (NPDES)/Data/03.1_WWTP MeHg Data Prep_Clean_", today(), ".xlsx")) ```
## Error: <text>:20:1: unexpected '<' ## 19: ## 20: < ## ^
The R session information (including the OS info, R version and all packages used):
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## Running under: Windows 10 x64 (build 22621) ## ## Matrix products: default ## ## locale: ## [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=English_United States.utf8 ## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C ## [5] LC_TIME=English_United States.utf8 ## ## attached base packages: ## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: ## [1] lubridate_1.8.0 plotly_4.10.0 readxl_1.4.0 actuar_3.2-2 ## [5] NADA_1.6-1.1 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.9 ## [9] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0 tibble_3.1.7 ## [13] ggplot2_3.3.6 tidyverse_1.3.1 fitdistrplus_1.1-8 survival_3.4-0 ## [17] MASS_7.3-58.1 ## ## loaded via a namespace (and not attached): ## [1] lattice_0.20-45 assertthat_0.2.1 digest_0.6.29 utf8_1.2.2 ## [5] R6_2.5.1 cellranger_1.1.0 backports_1.4.1 reprex_2.0.1 ## [9] evaluate_0.15 httr_1.4.3 highr_0.9 pillar_1.7.0 ## [13] rlang_1.0.2 lazyeval_0.2.2 rstudioapi_0.13 data.table_1.14.2 ## [17] Matrix_1.5-1 rmarkdown_2.14 labeling_0.4.2 splines_4.2.2 ## [21] htmlwidgets_1.5.4 munsell_0.5.0 broom_0.8.0 compiler_4.2.2 ## [25] modelr_0.1.8 xfun_0.31 pkgconfig_2.0.3 htmltools_0.5.2 ## [29] tidyselect_1.1.2 viridisLite_0.4.0 fansi_1.0.3 crayon_1.5.1 ## [33] tzdb_0.3.0 dbplyr_2.2.0 withr_2.5.0 grid_4.2.2 ## [37] jsonlite_1.8.0 gtable_0.3.0 lifecycle_1.0.1 DBI_1.1.2 ## [41] magrittr_2.0.3 scales_1.2.0 writexl_1.4.0 cli_3.3.0 ## [45] stringi_1.7.6 farver_2.1.0 fs_1.5.2 xml2_1.3.3 ## [49] ellipsis_0.3.2 generics_0.1.2 vctrs_0.4.1 expint_0.1-7 ## [53] RColorBrewer_1.1-3 tools_4.2.2 glue_1.6.2 hms_1.1.1 ## [57] yaml_2.3.5 fastmap_1.1.0 colorspace_2.0-3 rvest_1.0.2 ## [61] knitr_1.39 haven_2.5.0
Sys.time()
## [1] "2023-12-27 10:14:32 PST"