This report is automatically generated with the R package knitr (version 1.40) .

### LOAD REQUIRED LIBRARIES ###
source("R Functions/functions_estimate NDDNQ values.R") # Load first so select() isn't masked from dplyr
source("R Functions/functions_QA data.R")
source("R Functions/function_AqFishSamplePairing.R")
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## This is mgcv 1.8-41. For overview type 'help("mgcv-package")'.
# LOAD DATA MASTER #
AqMaster <- loadData('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/4a_AqMaster_DELTAdata.xlsx')
## Data set has 16767 rows. Added Shape column based on SourceID for use in result_time_Plotly().
nrow(AqMaster)
## [1] 16767
str(AqMaster)
## tibble [16,767 × 35] (S3: tbl_df/tbl/data.frame)
##  $ TMDL              : chr [1:16767] "Delta" "Delta" "Delta" "Delta" ...
##  $ Subarea           : chr [1:16767] "Cache Creek Settling Basin" "Cache Creek Settling Basin" "Cache Creek Settling Basin" "Cache Creek Settling Basin" ...
##  $ SourceID          : chr [1:16767] "AppL_MeHg" "AppL_MeHg" "AppL_MeHg" "AppL_MeHg" ...
##  $ SourceRow         : chr [1:16767] "5" "6" "7" "8" ...
##  $ StationName       : chr [1:16767] "Cache Creek Settling Basin" "Cache Creek Settling Basin" "Cache Creek Settling Basin" "Cache Creek Settling Basin" ...
##  $ SampleDateTime    : POSIXct[1:16767], format: "2000-03-01 00:01:00" "2000-03-18 00:01:00" ...
##  $ Analyte           : chr [1:16767] "Methylmercury, Total" "Methylmercury, Total" "Methylmercury, Total" "Methylmercury, Total" ...
##  $ Result            : num [1:16767] 0.443 0.204 0.328 0.154 0.532 ...
##  $ MDL               : num [1:16767] NA NA NA NA NA NA NA NA NA NA ...
##  $ RL                : num [1:16767] NA NA NA NA NA NA NA NA NA NA ...
##  $ Unit              : chr [1:16767] "ng/L" "ng/L" "ng/L" "ng/L" ...
##  $ ResultQualCode    : chr [1:16767] "=" "=" "=" "AVG" ...
##  $ WBT               : chr [1:16767] "Not Recorded" "Not Recorded" "Not Recorded" "Not Recorded" ...
##  $ MatrixName        : chr [1:16767] "Aqueous" "Aqueous" "Aqueous" "Aqueous" ...
##  $ TargetLatitude    : num [1:16767] 38.7 38.7 38.7 38.7 38.7 ...
##  $ TargetLongitude   : num [1:16767] -122 -122 -122 -122 -122 ...
##  $ CoordSystem       : chr [1:16767] "NAD83" "NAD83" "NAD83" "NAD83" ...
##  $ StationCode       : chr [1:16767] "CCHSB" "CCHSB" "CCHSB" "CCHSB" ...
##  $ QACode            : chr [1:16767] NA NA NA NA ...
##  $ CollectionComments: chr [1:16767] "Cache Creek d/s Settling Basin" "Cache Creek d/s Settling Basin" "Cache Creek d/s Settling Basin" "Cache Creek d/s Settling Basin" ...
##  $ SampleID          : chr [1:16767] NA NA NA NA ...
##  $ SampleComments    : chr [1:16767] NA NA NA NA ...
##  $ SampleTypeCode    : chr [1:16767] NA NA NA NA ...
##  $ ResultsComments   : chr [1:16767] NA NA NA "Average of Lab Replicates: 0.155 ng/L, 0.154 ng/L" ...
##  $ BatchComments     : chr [1:16767] NA NA NA NA ...
##  $ BatchVerification : chr [1:16767] NA NA NA NA ...
##  $ LabSampleID       : chr [1:16767] NA NA NA NA ...
##  $ LabBatch          : chr [1:16767] NA NA NA NA ...
##  $ MethodName        : chr [1:16767] NA NA NA NA ...
##  $ ComplianceCode    : chr [1:16767] NA NA NA NA ...
##  $ Project           : chr [1:16767] NA NA NA NA ...
##  $ CitationCode      : chr [1:16767] "CALFED-SubTask1C" "CALFED-SubTask1C" "CALFED-SubTask1C" "CVRWQCB - CALFED" ...
##  $ SampleDate        : chr [1:16767] "2000-03-01" "2000-03-18" "2001-02-22" "2003-04-28" ...
##  $ SampleTime        : chr [1:16767] "00:01:00" "00:01:00" "00:01:00" "00:00:00" ...
##  $ Shape             : chr [1:16767] "cross-open" "cross-open" "cross-open" "cross-open" ...
# LOOK AT DATA FOR ANY NEGATIVE PATTERNS #
AqMaster_graph <- AqMaster %>%
  mutate(SourceID = substr(SourceID, 1, 5))
result_time_Plotly(AqMaster_graph, groupByCol=TMDL, showMean=F, interactive=T, logscale=T, showLegend=F)
## Warning: Ignoring unknown aesthetics: text
## PhantomJS not found. You can install it with webshot::install_phantomjs(). If it is installed, please make sure the phantomjs executable can be found via the PATH variable.
## PhantomJS not found. You can install it with webshot::install_phantomjs(). If it is installed, please make sure the phantomjs executable can be found via the PATH variable.
## Error in path.expand(path): invalid 'path' argument
#Very little useful data before 1992 so only keep Date > 1992-01-01
AqMaster <- AqMaster %>%
  filter(SampleDateTime > '1992-01-01')
nrow(AqMaster)
## [1] 15533
# PRECHECK DATA FOR MISSING VALUES #
#Any sites without a StationName?
anyBlank(AqMaster, "StationName") #Great! No Blanks
## The column "StationName" does not contain any blanks.
#Any sites with missing dates?
anyBlank(AqMaster, "SampleDateTime") #Great! No Blanks
## The column "SampleDateTime" does not contain any blanks.
#Initial ResultQualCode Check
temp <- AqMaster %>%
  filter(is.na(MDL) & is.na(RL)) %>%  #this case could be '=' or 'ND'.
  mutate(Source_RQC = paste(SourceID, ResultQualCode, sep="; "))
unique_factors(temp, ResultQualCode, Source_RQC)
## $ResultQualCode
## [1] "="   "AVG" "DNQ"
## 
## $Source_RQC
##  [1] "AppL_MeHg; ="     "AppL_MeHg; AVG"   "AppL_THg; ="      "AppL_THg; AVG"   
##  [5] "CALFED2003-1a; =" "CEDENAqSed; ="    "R5AQ; ="          "R5SED; ="        
##  [9] "SNIP; ="          "SNIP; DNQ"        "USGS; ="          "WQP; ="
# ResultQualCode is not consistent 

  #see is ResultQualCode is consistent when Result == MDL
temp <- AqMaster %>%
  filter(Result == MDL) %>%
  mutate(Source_RQC = paste(SourceID, ResultQualCode, sep="; "))
unique_factors(temp, ResultQualCode, Source_RQC)
## $ResultQualCode
## [1] "="   "DNQ" "ND" 
## 
## $Source_RQC
## [1] "CEDENAqSed; ="   "CEDENAqSed; DNQ" "CEDENAqSed; ND"  "DeltaSED; ="    
## [5] "R5AQ; ="
# ResultQualCode is not consistent

  #see is ResultQualCode is consistent when Result == RL
temp <- AqMaster %>%
  filter(Result == RL) %>%
  mutate(Source_RQC = paste(SourceID, ResultQualCode, sep="; "))
unique_factors(temp, ResultQualCode, Source_RQC)
## $ResultQualCode
## [1] "="   "DNQ" "E"   "ND" 
## 
## $Source_RQC
##  [1] "CEDENAqSed; ="                 "CEDENAqSed; DNQ"              
##  [3] "CEDENAqSed; ND"                "DRMP_19Sep_Hg; ="             
##  [5] "DRMP_2016-17_Hg_Anc_Aq_Sed; =" "DRMP_2017-18_Hg_Anc_Aq_Sed; ="
##  [7] "DRMP_2018-19_Hg_Aq; ="         "DWRMeHg; ="                   
##  [9] "WQP; ="                        "WQP; E"
# ResultQualCode is not consistent

  #MDLs should not be greater than RL
temp <- AqMaster %>%
  filter(MDL > RL) %>%
  mutate(Source_RQC = paste(SourceID, ResultQualCode, sep="; "))
unique_factors(temp, ResultQualCode, Source_RQC)
## $ResultQualCode
## [1] "="  "ND"
## 
## $Source_RQC
## [1] "CEDENAqSed; ="  "CEDENAqSed; ND"
# Only occurs in "CEDENAqSed"


# In general, ResultQualCode needs to be standardized since it doesn't meet the criteria we established
# in "1_ResQualCode Rules.xlsx"



# STANDARDIZE RESULTQUALCODE #
# I created the function "standQualCode()" based on rules in "1_ResQualCode Rules.xlsx". The function is located in "functions_QA data.R"
AqMaster_fixedQualCode <- standQualCode(AqMaster, ResultQualCode)
## 
## Dataset tested for the following logic:
##    !is.na(Result) & is.na(MDL) & is.na(RL) & ResultQualCode == "ND" 
## No instances found to be TRUE. No data removed.
## 
## 
## Dataset tested for the following logic:
##    is.na(Result) & (!is.na(MDL) & !is.na(RL)) & (ResultQualCode != "ND" & ResultQualCode != "DNQ") 
## No instances found to be TRUE. No data removed.
## 
## 
## Dataset tested for the following logic:
##    Result == RL & MDL < RL & ResultQualCode == "ND" 
## No instances found to be TRUE. No data removed.
## 
## 
##  ResultQualCode is now standardized and original QualCodes saved in column named origQualCode. The column ResultQualCode does not contain any blanks.
## 
## QualCode standardization completed based on rules established in "1_ResQualCode Rules.xlsx".
nrow(AqMaster_fixedQualCode )
## [1] 15533
View(anti_join(AqMaster, AqMaster_fixedQualCode, by=c("Result"="origResult", "MDL", "RL", "ResultQualCode"="origQualCode")))  # View all Data deleted from AqMaster


# STANDARDIZING WBT #
WBTs <- AqMaster_fixedQualCode %>%
  mutate(stationname = tolower(StationName),          # make StationNames all lowercase to remove capitalization varability
         origWBT = WBT) %>%                           # add origWBT to keep WBT for reference
  distinct(stationname, WBT, .keep_all = T) %>%
  select(SourceID, stationname, WBT, origWBT, TargetLatitude, TargetLongitude)

dupStationNameWBT <- WBTs %>%
  group_by(stationname) %>%
  mutate(n = n()) %>%
  filter(n() > 1,
         WBT %are not% c('Not Recorded', 'Other-Surface Water')) %>%   # Remove WBTs "Not Recorded" & "Other-Surface Water" so only WBT designations from same StationName remain
  distinct(stationname, .keep_all=T) %>%                               # Remove duplicate StationNames (having different WTBs) - Keeps top row - dupliates were causing duplication in left_join below
  select(stationname, WBT) %>%
  mutate(`Change Type` = 'From Duplicate StationName WBT')

# For WBT's "Not Recorded" & "Other-Surface Water" - use WBT designation from same StationName
WBTs2 <- WBTs %>%
  select(-WBT) %>%                                # Remove WBT column so WBT's "Not Recorded" & "Other-Surface Water" can be updated using WBT designation from same StationName
  left_join(., dupStationNameWBT, by='stationname') %>%  # Adds WBT column with new designations - replacing "Not Recorded" & "Other-Surface Water"
  mutate(WBT = ifelse(is.na(WBT), origWBT, WBT),         # fill in blank WBTs with origWBT designaions  
         `Change Type` = ifelse(WBT == origWBT, NA_character_, `Change Type`)) %>%
  select(SourceID, stationname, origWBT, WBT, `Change Type`, TargetLatitude, TargetLongitude) %>% # put columns in intuitive order
  arrange(stationname)

View(WBTs2)
# writexl::write_xlsx(WBTs2, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/5b.1_WBTs_ToBeStandardized.xlsx')


# Add fixed WBTs into master data
fixedWBTs <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/5b.2_WBTs_ManuallyStandardized.xlsx', sheet=1, guess_max = 30000) %>%
  select(stationname, tempWBT = WBT) %>%   # change name of WBT column so easier to keep track of new & old WBTs below
  distinct(stationname, .keep_all=T)
nrow(fixedWBTs) == length(unique(fixedWBTs$stationname))
## [1] TRUE
AqMaster_fixedWBT <- AqMaster_fixedQualCode %>%
  mutate(stationname = tolower(StationName)) %>%
  left_join(., fixedWBTs, by='stationname') %>% # adds fixed 'tempWBT' column as last column
  mutate(origWBT = WBT,                         # move original WBTs to last column
         WBT     = tempWBT) %>%                 # move fixed 'WBT' column to original WBT location
  select(-tempWBT, -stationname)         # remove temp columns
nrow(AqMaster_fixedWBT)
## [1] 15533
# Add Year & Water Year columns
AqMaster_waterYear <- AqMaster_fixedWBT %>%
  mutate(Year = year(SampleDate),
         wtrYrSeason = case_when(month(SampleDate) >=10 & month(SampleDate) <=12 ~ "Wet1",  # Wet season Oct-Dec  
                                 month(SampleDate) >=1  & month(SampleDate) <=4  ~ "Wet2",  # Wet season Jan-Apr of Year
                                 month(SampleDate) >=5  & month(SampleDate) <=9 ~ "Dry"),  # Dry season May-Sep of Year
         waterYear = if_else(wtrYrSeason == 'Wet1', Year+1, Year), # Set waterYear for "Wet1" to next Year, Set WaterYear for Wet2 & Dry same as Year
         Season = case_when(month(SampleDate) >=11 & month(SampleDate) <=12 ~ "Wet1",  # Wet season Nov-Dec - Wet Season 1 & 2 based on months > 1" precip using 2000 through 2019 monthly accumulation rainfall data from CDEC precipitation stations Sacramento WB City (SCR) and Los Banos (LSB).  
                            month(SampleDate) >=1  & month(SampleDate) <=4  ~ "Wet2",  # Wet season Jan-Apr of Year
                            month(SampleDate) >=5  & month(SampleDate) <=10 ~ "Dry"),  # Dry season May-Oct of Year
         seasonYear = if_else(Season == 'Wet1', Year+1, Year))  # Set seasonal Year for "Wet1" to next Year, Set seasonal Year for Wet2 & Dry same as Year


# SAVE AqMaster after QA #
writexl::write_xlsx(AqMaster_waterYear %>% select(-Shape),
                    path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/5a_AqMaster_QA.xlsx')

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] mgcv_1.8-41        nlme_3.1-160       lubridate_1.8.0    plotly_4.10.0     
##  [5] readxl_1.4.1       actuar_3.3-0       NADA_1.6-1.1       forcats_0.5.2     
##  [9] stringr_1.4.1      dplyr_1.0.9        purrr_0.3.4        readr_2.1.2       
## [13] tidyr_1.2.0        tibble_3.1.8       ggplot2_3.3.6      tidyverse_1.3.2   
## [17] fitdistrplus_1.1-8 survival_3.4-0     MASS_7.3-58.1     
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-45     assertthat_0.2.1    digest_0.6.29       utf8_1.2.2         
##  [5] R6_2.5.1            cellranger_1.1.0    backports_1.4.1     reprex_2.0.2       
##  [9] evaluate_0.16       highr_0.9           httr_1.4.4          pillar_1.8.1       
## [13] rlang_1.0.5         lazyeval_0.2.2      googlesheets4_1.0.1 rstudioapi_0.14    
## [17] data.table_1.14.2   Matrix_1.5-1        splines_4.2.2       webshot_0.5.3      
## [21] googledrive_2.0.0   htmlwidgets_1.5.4   munsell_0.5.0       broom_1.0.1        
## [25] compiler_4.2.2      modelr_0.1.9        xfun_0.32           pkgconfig_2.0.3    
## [29] htmltools_0.5.3     tidyselect_1.1.2    viridisLite_0.4.1   fansi_1.0.3        
## [33] crayon_1.5.1        tzdb_0.3.0          dbplyr_2.2.1        withr_2.5.0        
## [37] grid_4.2.2          jsonlite_1.8.0      gtable_0.3.1        lifecycle_1.0.1    
## [41] DBI_1.1.3           magrittr_2.0.3      scales_1.2.1        writexl_1.4.0      
## [45] cli_3.3.0           stringi_1.7.8       fs_1.5.2            xml2_1.3.3         
## [49] ellipsis_0.3.2      generics_0.1.3      vctrs_0.4.1         expint_0.1-7       
## [53] tools_4.2.2         glue_1.6.2          crosstalk_1.2.0     hms_1.1.2          
## [57] yaml_2.3.5          fastmap_1.1.0       colorspace_2.0-3    gargle_1.2.0       
## [61] rvest_1.0.3         knitr_1.40          haven_2.5.1
    Sys.time()
## [1] "2024-01-04 14:39:27 PST"