This report is automatically generated with the R
package knitr
(version 1.39
)
.
--- title: '03_Wastewater Treatment Plants' subtitle: 'Flow Data Evaluation' author: "Mercury Program and Basin Planning Unit" date: "3/1/2022" output: html_document: code_folding: show toc: TRUE toc_float: TRUE toc_depth: 3 runtime: shiny assets: css: - "http://fonts.googleapis.com/css?family=Raleway:300" - "http://fonts.googleapis.com/css?family=Oxygen" --- --- <style> body{ font-family: 'Oxygen', sans-serif; font-size: 16px; line-height: 24px; } h1,h2,h3,h4 { font-family: 'Raleway', sans-serif; } .container { width: 1250px; } h3 { background-color: #D4DAEC; text-indent: 50px; } h4 { text-indent: 75px; margin-top: 35px; margin-bottom: 5px; } </style> ```{r setup, include=FALSE} knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, fig.width=9.5) ``` ```{r Libraries, echo=FALSE} library(janitor) library(kableExtra) # better formatting of tables library(shiny) # Had issue trying to set WD with Shiny in R project, reset working directory of rproj wd <- rstudioapi::getActiveProject() setwd(wd) source("R Functions/functions_estimate NDDNQ values.R") source("R Functions/functions_QA data.R") # Read MeHg Data set flow <- readxl::read_xlsx("Reeval_Source_Analysis/Source Data/03a_Municipal WWTPs (NPDES)/Data/03_NPDES_eSMR Flow Data.xlsx", guess_max = 300000, sheet = "Flow") %>% clean_names() facilityInfo <- readxl::read_xlsx("Reeval_Source_Analysis/Source Data/03a_Municipal WWTPs (NPDES)/Data/03_NPDES Facility Info.xlsx", guess_max = 300000, sheet = "Final Table") %>% select(Discharger, Facility, eSMR_Name, GraphName, FacilityType, Treatment, Subarea, Permit, Receiving_water) %>% filter(!is.na(eSMR_Name)) %>% distinct(eSMR_Name, .keep_all=T) ``` ## Clean Data ### Fix Column Names ```{r} ### LIST COLUMNS TO BE USED, ADD USER DEFINED COLUMNS, & RENAME COLUMNS TO CEDEN STANDARDS ### #Use 1.READ ME.xlsx, 'ColumnsForR' to list & identify columns that match corresponding CEDEN Standard columns keep_cols <- c( 'source_row', 'source_id', 'facility_name', 'location_place_type', 'location', 'result', 'units', 'sampling_date', 'sampling_time', 'lattitude', 'longitude', 'location_desc' ) temp_cols <- c('report_name') #Include columns that do not match CEDEN standards but may be useful (e.g., Unit columns for MDL & RL) #temp_cols are removed before the data is merged with other datasets flow_correct_columns <- flow %>% dplyr::select(keep_cols,temp_cols) %>% #DO NOT CHANGE - selects columns specified above rename( #Rename worksheet columns to CEDEN format here: CEDEN 'COLUMNNAME' = WORKSHEET 'COLUMNNAME' #DELTE COLUMN NAMES THAT DO NOT HAVE AN EQUIVALENT COLUMN IN THE WORKSHEET SourceRow = source_row, SourceID = source_id, StationName = facility_name, StationCode = location_place_type, SampleTypeCode = location, Result = result, Unit = units, SampleDate = sampling_date, SampleTime = sampling_time, TargetLatitude = lattitude, TargetLongitude = longitude, CollectionComments = location_desc ) ``` ### Format Column Parameters ```{r} flow_correct_columns <- chara_to_NumDate(flow_correct_columns) # Check for unexpected StationCode variances flow_correct_columns %>% unique_factors(StationCode) # Filter for Effluent Monitoring flows flow_effluent <- flow_correct_columns %>% filter(StationCode == 'Effluent Monitoring') # Check for unexpected Unit variances flow_effluent %>% mutate(StationNameUnit = paste(StationName, Unit, sep=' ~ ')) %>% unique_factors(StationNameUnit) # City of Manteca submitted double results for same day using 2 Units, MGD & gal/min per lamp - filter for MGD so other units don't cause confusion flow_units <- flow_effluent %>% filter(Unit == 'MGD') ### Graph Data to see if any Order of Magnitude errors ggplot() + geom_point(data=flow_units, aes(x=SampleDate, y=Result)) + theme_light() # FIX Dates - 1900 could not find appropriate date for 1900 (n ~ 2); 2112 is supposed to be 2012 according to Analysis Date col, however there are repeated date entries with correct dates and same flow values - filter these date extremes out flow_dates <- flow_units %>% filter(SampleDate > '2000-01-01' & SampleDate < '2025-01-01') ggplot() + geom_point(data=flow_dates, aes(x=SampleDate, y=Result)) + theme_light() flow_dates %>% mutate(StationName_SampleType = paste(StationName, SampleTypeCode, sep=' ~ ')) %>% unique_factors(StationName_SampleType, StationCode, Unit) # Add Discharger, Facility, GraphName, FacilityType, Treatment, Subarea, Permit, & Receiving_water columns flow_facility_info <- flow_dates %>% left_join(., facilityInfo, by=c("StationName"="eSMR_Name")) ### REMOVE TEMPORARY COLUMNS ### flow_formatted <- flow_facility_info %>% dplyr::select(-one_of(temp_cols)) %>% #Remove temp columns since they are no longer needed arrange(StationName, SampleDate) ``` # Export to excel ```{r} writexl::write_xlsx(flow_formatted, paste0(wd, "/Reeval_Source_Analysis/Source Data/03a_Municipal WWTPs (NPDES)/Data/03.1_WWTP Flow Data Prep_Clean_", today(), ".xlsx")) ```
## Error: <text>:20:1: unexpected '<' ## 19: ## 20: < ## ^
The R session information (including the OS info, R version and all packages used):
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## Running under: Windows 10 x64 (build 22621) ## ## Matrix products: default ## ## locale: ## [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=English_United States.utf8 ## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C ## [5] LC_TIME=English_United States.utf8 ## ## attached base packages: ## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: ## [1] lubridate_1.8.0 plotly_4.10.0 readxl_1.4.0 actuar_3.2-2 ## [5] NADA_1.6-1.1 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.9 ## [9] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0 tibble_3.1.7 ## [13] ggplot2_3.3.6 tidyverse_1.3.1 fitdistrplus_1.1-8 survival_3.4-0 ## [17] MASS_7.3-58.1 ## ## loaded via a namespace (and not attached): ## [1] lattice_0.20-45 assertthat_0.2.1 digest_0.6.29 utf8_1.2.2 ## [5] R6_2.5.1 cellranger_1.1.0 backports_1.4.1 reprex_2.0.1 ## [9] evaluate_0.15 httr_1.4.3 highr_0.9 pillar_1.7.0 ## [13] rlang_1.0.2 lazyeval_0.2.2 rstudioapi_0.13 data.table_1.14.2 ## [17] Matrix_1.5-1 rmarkdown_2.14 labeling_0.4.2 splines_4.2.2 ## [21] htmlwidgets_1.5.4 munsell_0.5.0 broom_0.8.0 compiler_4.2.2 ## [25] modelr_0.1.8 xfun_0.31 pkgconfig_2.0.3 htmltools_0.5.2 ## [29] tidyselect_1.1.2 viridisLite_0.4.0 fansi_1.0.3 crayon_1.5.1 ## [33] tzdb_0.3.0 dbplyr_2.2.0 withr_2.5.0 grid_4.2.2 ## [37] jsonlite_1.8.0 gtable_0.3.0 lifecycle_1.0.1 DBI_1.1.2 ## [41] magrittr_2.0.3 scales_1.2.0 writexl_1.4.0 cli_3.3.0 ## [45] stringi_1.7.6 farver_2.1.0 fs_1.5.2 xml2_1.3.3 ## [49] ellipsis_0.3.2 generics_0.1.2 vctrs_0.4.1 expint_0.1-7 ## [53] RColorBrewer_1.1-3 tools_4.2.2 glue_1.6.2 hms_1.1.1 ## [57] yaml_2.3.5 fastmap_1.1.0 colorspace_2.0-3 rvest_1.0.2 ## [61] knitr_1.39 haven_2.5.0
Sys.time()
## [1] "2023-12-27 10:11:32 PST"