This report is automatically generated with the R package knitr (version 1.40) .

source("R Functions/functions_QA data.R")

### LOAD DATA ###
CEDENAqSed <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/CEDENAqSed.xlsx', sheet='ceden_data_20181024082759', guess_max = 30000)
nrow(CEDENAqSed) #number of rows should match the Excel file (minus the header row)
## [1] 8942
#Load CEDEN StationCodes to look up CoordSystem by StationCode
CedenCoordSys <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/0_CEDEN_StationCode_CoordSystem lookup.xlsx', sheet='Worksheet', guess_max = 30000)
CedenCoordSys <- CedenCoordSys %>%
  select(StationCode, Datum) %>%
  rename('CoordSystem' = 'Datum')

### LIST COLUMNS TO BE USED, ADD USER DEFINED COLUMNS, & RENAME COLUMNS TO CEDEN STANDARDS ###
#Use 1.READ ME.xlsx, 'ColumnsForR' to list & identify columns that match corresponding CEDEN Standard columns
keep_cols <- c('SourceID','SourceRow', 'Project', 'StationName', 'StationCode', 'SampleDate', 'CollectionTime', 'LabBatch', 'LabSampleID', 'MatrixName', 'SWRCBWatTypeCode', 'MethodName', 'Analyte', 'Unit', 'Result', 'MDL', 'RL', 'ResultQualCode', 'SampleID', 'SampleComments', 'TargetLatitude', 'TargetLongitude', 'QACode', 'BatchVerification', 'ComplianceCode', 'CollectionComments', 'ResultsComments', 'BatchComments', 'SampleTypeCode')

temp_cols <- c('Program', 'ParentProject')


CEDENAqSed_new <- CEDENAqSed %>%
  select( c(keep_cols, temp_cols)) %>% #DO NOT CHANGE - selects columns specified above
  rename('SampleTime' = 'CollectionTime') %>%
  mutate(              #Add user defined columns
    #COLUMNNAME = 'THE SPECIFIED VALUE'
    CitationCode = 'CEDENAqSed',
    Project = paste(paste0('Project: ', Project),
                    paste0('ParentProject: ', ParentProject),
                    paste0('Program: ', Program),
                    sep=' ~ '),
  ) %>%
  left_join(
    ., CedenCoordSys, by='StationCode' #adds in CoordSystem column
  ) %>%
  #rename(#Rename columns to CEDEN standards)
  rename(
    'WBT' = 'SWRCBWatTypeCode'
  )

nrow(CEDENAqSed_new)
## [1] 8942
#str(CEDENAqSed_new) #just to check data class of different columns - e.g., is Date column in POSIX format?
#View(CEDENAqSed_new)


### FORMAT COLUMN PARAMETERS ###
  # Standardize MatrixName Groups - "Water", "Sediment", "Soil" #
unique(CEDENAqSed_new$MatrixName) #Identifies OLDNAMES
## [1] "samplewater"      "sediment"         "sediment, <63 um"
#STANDARD CODE TO CHANGE GROUPING NAMES
CEDENAqSed_new <- CEDENAqSed_new %>%
  mutate(MatrixName = recode(MatrixName, #COLUMN NAME WHERE CHANGES WILL BE MADE
                      #"OLDNAME" = "NEWNAME" - create a new line for each OLDNAME that needs to be changed
                      "samplewater" = "Aqueous",
                      "sediment" = "Sediment",
                      "sediment, <63 um" = "Sediment"
                      )
  )
unique(CEDENAqSed_new$MatrixName) #New naming structure should now be listed
## [1] "Aqueous"  "Sediment"
  # Standardize WBT (WaterBodyType) Groups - "River/Stream", "Drain/Canal", "Wetland", "Spring", "Slough", 
  #                                          "Pond",  "Lake/Reservoir", "Delta", "Forebay/Afterbay", "Not Recorded" #
unique(CEDENAqSed_new$WBT) #Identifies OLDNAMES
## [1] "R_Un" "R_NW" "NR"   "W_F"  "L_F"  "R"    "R_W"  "E"    "B"
#Look for inconsistent group pairings between MatrixName and WBT (e.g., 'Soil; Stream')
unique(paste(CEDENAqSed_new$MatrixName, CEDENAqSed_new$WBT, sep='; '))
##  [1] "Aqueous; R_Un"  "Sediment; R_Un" "Aqueous; R_NW"  "Aqueous; NR"    "Sediment; W_F" 
##  [6] "Aqueous; L_F"   "Aqueous; W_F"   "Aqueous; R"     "Sediment; R_W"  "Aqueous; E"    
## [11] "Aqueous; B"     "Sediment; B"
#If an incosistent grouping exists, add comment to 'CollectionComments' column using code:  mutate(CollectionComments = case_when(MatrixName=='CONDITION' ~ 'COMMENT', TRUE ~ MatrixName)), 
#STANDARD CODE TO CHANGE GROUPING NAMES
CEDENAqSed_new <- CEDENAqSed_new %>%

   mutate(WBT = recode(WBT,
                      #"OLDNAME" = "NEWNAME" - create a new line for each OLDNAME that needs to be changed
                      "R" = "River/Stream",
                      "R_Un" = "River/Stream",
                      "R_NW" = "River/Stream",
                      "R_W" = "River/Stream",
                      "W_F" = "Wetland",
                      "L_F" = "Lake/Reservoir",
                      "NR" = "Not Recorded",
                      "E" = "Bay/Estuary",
                      "B" = "Bay/Estuary"
   ))

unique(paste(CEDENAqSed_new$MatrixName, CEDENAqSed_new$WBT, sep='; ')) #New naming structure for Matrix Name & WBT Groupings
## [1] "Aqueous; River/Stream"   "Sediment; River/Stream"  "Aqueous; Not Recorded"  
## [4] "Sediment; Wetland"       "Aqueous; Lake/Reservoir" "Aqueous; Wetland"       
## [7] "Aqueous; Bay/Estuary"    "Sediment; Bay/Estuary"
  # Standardize Analyte Groups - "Mercury, Total", "Mercury, Dissolved", "Mercury, Suspended", & same for Methylmercury # 
#Following code Added by Robin
unique(CEDENAqSed_new$Analyte)
## [1] "Mercury, Total"               "Mercury, Dissolved"          
## [3] "Mercury, Methyl, Dissolved"   "Mercury, Methyl, Total"      
## [5] "Mercury, Particulate"         "Mercury, Methyl, Particulate"
CEDENAqSed_new <- CEDENAqSed_new %>%
  mutate(Analyte = recode(Analyte,
                          #"OLDNAME" = "NEWNAME" - create a new line for each OLDNAME that needs to be changed
                          'Mercury, Methyl, Dissolved' = 'Methylmercury, Dissolved',
                          'Mercury, Methyl, Total' = 'Methylmercury, Total',
                          'Mercury, Methyl, Particulate' = 'Methylmercury, Suspended',
                          'Mercury, Particulate' = 'Mercury, Suspended')
         )
unique(CEDENAqSed_new$Analyte)
## [1] "Mercury, Total"           "Mercury, Dissolved"       "Methylmercury, Dissolved"
## [4] "Methylmercury, Total"     "Mercury, Suspended"       "Methylmercury, Suspended"
  # Standardize ResultQualCode Groups - "ND", "DNQ", NA#
unique(CEDENAqSed_new$ResultQualCode) #Identifies OLDNAMES
## [1] "="   "ND"  "DNQ" "NR"
#CEDENAqSed_new <- CEDENAqSed_new %>%
  #mutate(ResultQualCode = recode(ResultQualCode,
    #Not Needed

CEDENAqSed_new <- CEDENAqSed_new %>%
  filter(ResultQualCode != "NR")


unique(CEDENAqSed_new$ResultQualCode) #New naming structure for ResultQualCode Groupings
## [1] "="   "ND"  "DNQ"
  # Format Result Column to Numeric#
  # Check column for text - based on text user needs to decide what to do
    #If/Else not needed since 'Result', 'MDL', and 'RL' are numeric


  # Format MDL Column to Numeric#
  # Check column for text - based on text user needs to decide what to do
    # If/Else not needed since 'Result', 'MDL', and 'RL' are numeric



  # Check if Result, MDL, & RL Columns all equal <NA> or 0 - these rows have no useful information
nrow(CEDENAqSed_new) #Number rows before
## [1] 8939
CEDENAqSed_new <- CEDENAqSed_new %>%
  #Set 0 & negative values as blank
  mutate(Result = ifelse(Result <= 0, NA_real_, Result),
         MDL = ifelse(MDL <= 0, NA_real_, MDL),
         RL = ifelse(RL <= 0, NA_real_, RL))
na_results <- CEDENAqSed_new %>% #Record rows where Result, MDL, & RL all equal <NA> or 0
  filter( is.na(Result) & is.na(MDL) & is.na(RL) )
nrow(na_results)
## [1] 87
CEDENAqSed_new <- anti_join(CEDENAqSed_new, na_results, by='SourceRow') #returns rows from CEDENAqSed_new not matching values in no_result
nrow(CEDENAqSed_new) #Number rows after - if number rows is the same as before this section can be deleted
## [1] 8852
  # Format Units Column - "ng/L", "mg/Kg"
unique(CEDENAqSed_new$Unit) #Identifies OLDNAMES
## [1] "ng/L"     "mg/Kg dw" "ug/L"     "ug/Kg dw"
# If more than 1 unit colmn exists (e.g., for RL and MDL columns) see WQP script for example on merging into 1 column
CEDENAqSed_new <- CEDENAqSed_new %>%
  standardizeUnits
unique(CEDENAqSed_new$Unit) #New naming structure for ResultQualCode Groupings
## [1] "ng/L"     "mg/Kg dw"
  # Format Date and Time Column #
# NEED TO TALK ABOUT HOW WE WANT TO DO THIS - To graph in R we need Date and Time in same column
# THE EXAMPLE CODE BELOW ASSUMES DATE AND TIME ARE IN SEPERATE COLUMNS
CEDENAqSed_new <- CEDENAqSed_new %>%
  #rowise() %>%    # rowise is very slow - so used sapply to make this a rowise operation
  mutate(
    #If SampleDate & CollectioTIme are not in Character format by defualt, turn it into a character class so it exports better
    SampleDate = ifelse(sapply(SampleDate, is.character), SampleDate, as.character(as.Date(SampleDate))),
    SampleTime = ifelse(sapply(SampleTime, is.character), SampleTime, format(SampleTime, "%H:%M:%S")),
    #COMBINE DATE AND TIME INTO SampleDateTime COLUMN
    SampleDateTime = ifelse(!is.na(SampleTime), paste(SampleDate, SampleTime), paste(SampleDate, '00:00:00')),
    #FORMAT SampleDateTime COLUMN TO DATE FORMAT
    SampleDateTime = lubridate::ymd_hms(SampleDateTime)
  )

### REMOVE TEMPORARY COLUMNS ###
  CEDENAqSed_new <- CEDENAqSed_new %>%
    select(-one_of(temp_cols)) #Remove temp columns since they are no longer needed
#View(CEDENAqSed_new)


## SAVE FORMATTED DATA AS EXCEL FILE ##
#Save Mercury data
writexl::write_xlsx(CEDENAqSed_new, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/CEDENAqSed_ceden_format.xlsx')
# In excel, to convert SampleDate column to Date format
# 1 - Select the date column.
# 2 - Go to the Data-tab and choose "Text to Columns".
# 3 - On the first screen, leave radio button on "delimited" and click Next.
# 4 - Unselect any delimiter boxes (everything blank) and click Next.
# 5 - Under column data format choose Date, select YMD
# 6 - Click Finish.

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=C                            
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## system code page: 65001
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] mgcv_1.8-41        nlme_3.1-160       lubridate_1.8.0    plotly_4.10.0     
##  [5] readxl_1.4.1       actuar_3.3-0       NADA_1.6-1.1       forcats_0.5.2     
##  [9] stringr_1.4.1      dplyr_1.0.9        purrr_0.3.4        readr_2.1.2       
## [13] tidyr_1.2.0        tibble_3.1.8       ggplot2_3.3.6      tidyverse_1.3.2   
## [17] fitdistrplus_1.1-8 survival_3.4-0     MASS_7.3-58.1     
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.4          jsonlite_1.8.0      viridisLite_0.4.1   splines_4.2.2      
##  [5] modelr_0.1.9        assertthat_0.2.1    highr_0.9           googlesheets4_1.0.1
##  [9] cellranger_1.1.0    yaml_2.3.5          pillar_1.8.1        backports_1.4.1    
## [13] lattice_0.20-45     glue_1.6.2          digest_0.6.29       rvest_1.0.3        
## [17] colorspace_2.0-3    htmltools_0.5.3     Matrix_1.5-1        pkgconfig_2.0.3    
## [21] broom_1.0.1         haven_2.5.1         webshot_0.5.3       scales_1.2.1       
## [25] tzdb_0.3.0          googledrive_2.0.0   generics_0.1.3      ellipsis_0.3.2     
## [29] withr_2.5.0         lazyeval_0.2.2      cli_3.3.0           magrittr_2.0.3     
## [33] crayon_1.5.1        evaluate_0.16       fs_1.5.2            fansi_1.0.3        
## [37] xml2_1.3.3          tools_4.2.2         data.table_1.14.2   hms_1.1.2          
## [41] expint_0.1-7        gargle_1.2.0        lifecycle_1.0.1     munsell_0.5.0      
## [45] reprex_2.0.2        writexl_1.4.0       compiler_4.2.2      rlang_1.0.5        
## [49] rstudioapi_0.14     htmlwidgets_1.5.4   crosstalk_1.2.0     rmarkdown_2.16     
## [53] gtable_0.3.1        DBI_1.1.3           R6_2.5.1            knitr_1.40         
## [57] fastmap_1.1.0       utf8_1.2.2          stringi_1.7.8       vctrs_0.4.1        
## [61] dbplyr_2.2.1        tidyselect_1.1.2    xfun_0.32
    Sys.time()
## [1] "2023-12-29 14:09:03 PST"