This report is automatically generated with the R package knitr (version 1.39) .

---
title: '03_Wastewater Treatment Plants'
subtitle: 'Flow Data Evaluation'
author: "Mercury Program and Basin Planning Unit"
date: "3/1/2022"
output:
  html_document:
    code_folding: show
    toc: TRUE
    toc_float: TRUE
    toc_depth: 3
runtime: shiny
assets:
  css:
    - "http://fonts.googleapis.com/css?family=Raleway:300"
    - "http://fonts.googleapis.com/css?family=Oxygen"
---
---

<style>
body{
  font-family: 'Oxygen', sans-serif;
  font-size: 16px;
  line-height: 24px;
}

h1,h2,h3,h4 {
  font-family: 'Raleway', sans-serif;
}

.container { width: 1250px; }
h3 {
  background-color: #D4DAEC;
  text-indent: 50px; 
}
h4 {
  text-indent: 75px;
  margin-top: 35px;
  margin-bottom: 5px;
}
</style>

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, fig.width=9.5)
```


```{r Libraries, echo=FALSE}
library(janitor)
library(kableExtra) # better formatting of tables
library(shiny)

# Had issue trying to set WD with Shiny in R project, reset working directory of rproj
wd <- rstudioapi::getActiveProject()
setwd(wd)

source("R Functions/functions_estimate NDDNQ values.R")
source("R Functions/functions_QA data.R")


# Read MeHg Data set
flow <- readxl::read_xlsx("Reeval_Source_Analysis/Source Data/03a_Municipal WWTPs (NPDES)/Data/03_NPDES_eSMR Flow Data.xlsx", guess_max = 300000, sheet = "Flow") %>% 
  clean_names()

facilityInfo <- readxl::read_xlsx("Reeval_Source_Analysis/Source Data/03a_Municipal WWTPs (NPDES)/Data/03_NPDES Facility Info.xlsx", guess_max = 300000, sheet = "Final Table") %>%
  select(Discharger, Facility, eSMR_Name, GraphName, FacilityType, Treatment, Subarea, Permit, Receiving_water) %>% 
  filter(!is.na(eSMR_Name)) %>% 
  distinct(eSMR_Name, .keep_all=T)
```

## Clean Data
### Fix Column Names
```{r}
### LIST COLUMNS TO BE USED, ADD USER DEFINED COLUMNS, & RENAME COLUMNS TO CEDEN STANDARDS ###
#Use 1.READ ME.xlsx, 'ColumnsForR' to list & identify columns that match corresponding CEDEN Standard columns
keep_cols <-
  c(
    'source_row',
    'source_id',
    'facility_name',
    'location_place_type',
    'location',
    'result',
    'units',
    'sampling_date',
    'sampling_time',
    'lattitude',
    'longitude',
    'location_desc'
  )

temp_cols <- c('report_name') #Include columns that do not match CEDEN standards but may be useful (e.g., Unit columns for MDL & RL)
#temp_cols are removed before the data is merged with other datasets

flow_correct_columns <- flow %>% 
  dplyr::select(keep_cols,temp_cols) %>% #DO NOT CHANGE - selects columns specified above
  rename(
    #Rename worksheet columns to CEDEN format here: CEDEN 'COLUMNNAME' = WORKSHEET 'COLUMNNAME'
    #DELTE COLUMN NAMES THAT DO NOT HAVE AN EQUIVALENT COLUMN IN THE WORKSHEET
    SourceRow = source_row,
    SourceID = source_id,
    StationName = facility_name,
    StationCode = location_place_type,
    SampleTypeCode = location,
    Result = result,
    Unit = units,
    SampleDate = sampling_date,
    SampleTime = sampling_time,
    TargetLatitude = lattitude,
    TargetLongitude = longitude,
    CollectionComments = location_desc
  )
```

### Format Column Parameters
```{r}
flow_correct_columns <- chara_to_NumDate(flow_correct_columns)

  # Check for unexpected StationCode variances
flow_correct_columns %>% 
  unique_factors(StationCode)
  # Filter for Effluent Monitoring flows
flow_effluent <- flow_correct_columns %>% 
  filter(StationCode == 'Effluent Monitoring')


# Check for unexpected Unit variances
flow_effluent %>%
  mutate(StationNameUnit = paste(StationName, Unit, sep=' ~ ')) %>%
  unique_factors(StationNameUnit)
# City of Manteca submitted double results for same day using 2 Units, MGD & gal/min per lamp - filter for MGD so other units don't cause confusion
flow_units <- flow_effluent %>% 
  filter(Unit == 'MGD')

### Graph Data to see if any Order of Magnitude errors
ggplot() +
    geom_point(data=flow_units, aes(x=SampleDate, y=Result)) +
    theme_light()


# FIX Dates - 1900 could not find appropriate date for 1900 (n ~ 2); 2112 is supposed to be 2012 according to Analysis Date col, however there are repeated date entries with correct dates and same flow values - filter these date extremes out
flow_dates <- flow_units %>% 
  filter(SampleDate > '2000-01-01' & SampleDate < '2025-01-01')
ggplot() +
    geom_point(data=flow_dates, aes(x=SampleDate, y=Result)) +
    theme_light()



flow_dates %>% 
  mutate(StationName_SampleType = paste(StationName, SampleTypeCode, sep=' ~ ')) %>% 
  unique_factors(StationName_SampleType, StationCode, Unit)


# Add Discharger, Facility, GraphName, FacilityType, Treatment, Subarea, Permit, & Receiving_water columns
flow_facility_info <- flow_dates %>% 
  left_join(., facilityInfo, by=c("StationName"="eSMR_Name"))


### REMOVE TEMPORARY COLUMNS ###
flow_formatted <- flow_facility_info %>%
  dplyr::select(-one_of(temp_cols)) %>%  #Remove temp columns since they are no longer needed
  arrange(StationName, SampleDate)

```




# Export to excel
```{r}
writexl::write_xlsx(flow_formatted, paste0(wd, "/Reeval_Source_Analysis/Source Data/03a_Municipal WWTPs (NPDES)/Data/03.1_WWTP Flow Data Prep_Clean_", today(), ".xlsx"))
```
## Error: <text>:20:1: unexpected '<'
## 19: 
## 20: <
##     ^

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.8.0    plotly_4.10.0      readxl_1.4.0       actuar_3.2-2      
##  [5] NADA_1.6-1.1       forcats_0.5.1      stringr_1.4.0      dplyr_1.0.9       
##  [9] purrr_0.3.4        readr_2.1.2        tidyr_1.2.0        tibble_3.1.7      
## [13] ggplot2_3.3.6      tidyverse_1.3.1    fitdistrplus_1.1-8 survival_3.4-0    
## [17] MASS_7.3-58.1     
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-45    assertthat_0.2.1   digest_0.6.29      utf8_1.2.2        
##  [5] R6_2.5.1           cellranger_1.1.0   backports_1.4.1    reprex_2.0.1      
##  [9] evaluate_0.15      httr_1.4.3         highr_0.9          pillar_1.7.0      
## [13] rlang_1.0.2        lazyeval_0.2.2     rstudioapi_0.13    data.table_1.14.2 
## [17] Matrix_1.5-1       rmarkdown_2.14     labeling_0.4.2     splines_4.2.2     
## [21] htmlwidgets_1.5.4  munsell_0.5.0      broom_0.8.0        compiler_4.2.2    
## [25] modelr_0.1.8       xfun_0.31          pkgconfig_2.0.3    htmltools_0.5.2   
## [29] tidyselect_1.1.2   viridisLite_0.4.0  fansi_1.0.3        crayon_1.5.1      
## [33] tzdb_0.3.0         dbplyr_2.2.0       withr_2.5.0        grid_4.2.2        
## [37] jsonlite_1.8.0     gtable_0.3.0       lifecycle_1.0.1    DBI_1.1.2         
## [41] magrittr_2.0.3     scales_1.2.0       writexl_1.4.0      cli_3.3.0         
## [45] stringi_1.7.6      farver_2.1.0       fs_1.5.2           xml2_1.3.3        
## [49] ellipsis_0.3.2     generics_0.1.2     vctrs_0.4.1        expint_0.1-7      
## [53] RColorBrewer_1.1-3 tools_4.2.2        glue_1.6.2         hms_1.1.1         
## [57] yaml_2.3.5         fastmap_1.1.0      colorspace_2.0-3   rvest_1.0.2       
## [61] knitr_1.39         haven_2.5.0
    Sys.time()
## [1] "2023-12-27 10:11:32 PST"