This report is automatically generated with the R package knitr (version 1.40) .

source("R Functions/functions_QA data.R")


### LOAD DATA ###
DeltaSED <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/DeltaSED.xlsx', sheet='Surface Sediment TotHg Data', guess_max = 30000)
nrow(DeltaSED) #number of rows should match the Excel file (minus the header row)
## [1] 424
### LIST COLUMNS TO BE USED, ADD USER DEFINED COLUMNS, & RENAME COLUMNS TO CEDEN STANDARDS ###
#Use 1.READ ME.xlsx, 'ColumnsForR' to list & identify columns that match corresponding CEDEN Standard columns
keep_cols <- c('SourceID','SourceRow','Station Name','Station #','Date','MMHg (ng/g)','THg (mg/kg)',
               'Latitude','Longitude')

temp_cols <- c('Citation','MMHg Qual (less than MDL)','THg Qual (Less than MDL)','Delta Subarea','Grouped Location')

DeltaSED_new <- DeltaSED %>%
  select( c(keep_cols, temp_cols) ) %>% #DO NOT CHANGE - selects columns specified above
  mutate(              #Add user defined columns
    CoordSystem = NA_character_,
    CitationCode = Citation,
    Project = Citation,
    SampleTime = NA_character_,
    LabBatch = NA_character_,
    LabSampleID = NA_character_,
    MatrixName = 'Sediment',
    WBT = 'Not Recorded',
    MethodName = NA_character_,
    RL = NA_real_,
    SampleID = NA_character_,
    SampleComments = paste( if_else(is.na(`Delta Subarea`), '', if_else(is.na(`Grouped Location`),paste0('In "',`Delta Subarea`,'" Delta Subarea'), paste0('In "',`Delta Subarea`,'" Delta Subarea at'))), if_else(is.na(`Grouped Location`), '', if_else(is.na(`Delta Subarea`),paste('At',`Grouped Location`,'(Not in Legal Delta)'),`Grouped Location`)) ),
    QACode = NA_character_,
    BatchVerification = NA_character_,
    ComplianceCode = NA_character_,
    CollectionComments = NA_character_,
    ResultsComments = NA_character_,
    BatchComments = NA_character_,
    SampleTypeCode = NA_character_
  ) %>%
  rename( #Rename columns to CEDEN standards
    #CEDEN 'COLUMNNAME' = DeltaSED 'COLUMNNAME'
    #If a column doesnt match the CEDEN Standard, delete that column below, talk to Robin and discuss if we can add it using other information
    #If a column below doesn't exist; move COLUMNNAME to Mutate above using format; StationName = NA_character_
    'StationName' = 'Station Name',
    'StationCode' = 'Station #',
    'SampleDate' = 'Date',
    'TargetLatitude' = 'Latitude',
    'TargetLongitude' = 'Longitude'
  )
nrow(DeltaSED_new)
## [1] 424
#str(DeltaSED_new) #just to check data class of different columns - e.g., is Date column in POSIX format?


### FORMAT WIDE TO LONG ###
DeltaSED_new <- DeltaSED_new %>%
  gather(c('MMHg (ng/g)','THg (mg/kg)'),
         key="Analyte", value="Result") %>%
  mutate(
    # Use units form Analyte to create a Unit Column
    Unit = if_else(Analyte == 'MMHg (ng/g)', 'ng/g', 'mg/Kg'),
    # Use Analyte to create MDL column from 'MMHg Qual (less than MDL)' & 'THg Qual (Less than MDL)' column values - based on info provided
    MDL = case_when(Analyte == 'MMHg (ng/g)' ~ 0.019,
                    Analyte == 'THg (mg/kg)' ~ 0.04),
    # Use Analyte and 'MMHg Qual (less than MDL)' & 'THg Qual (Less than MDL)' columns to ResultQualCode column
    ResultQualCode = case_when(Analyte == 'MMHg (ng/g)' & grepl('<', `MMHg Qual (less than MDL)`) ~ 'ND',
                               Analyte == 'THg (mg/kg)' & grepl('<', `THg Qual (Less than MDL)`) ~ 'ND',
                               TRUE ~ '='),
    # Remove values in Result when ND
    Result = if_else(ResultQualCode == 'ND', NA_real_, Result),
    # Rename Analyte groupings to CEDEN standards
    Analyte = recode(Analyte,
                     'MMHg (ng/g)' = 'Methylmercury, Total',
                     'THg (mg/kg)' = 'Mercury, Total')
    )
nrow(DeltaSED_new)
## [1] 848
#PASTE LETTER OF COLUMN TO RESULT ID SO THE RESULT ID IS NOT DUPLICATED
seqPasteLetters <- function(chr_vector){
  col_letters <- LETTERS[c(13,15)] # letters corresponding to column of excel data
  for (i in 1:length(chr_vector)){
    chr_vector[i] <- paste0(chr_vector[i], col_letters[i])
  }
  return(chr_vector)
}
DeltaSED_new <- DeltaSED_new %>%
  mutate(tempSourceRow = SourceRow) %>%
  group_by(tempSourceRow) %>%
  mutate(SourceRow = seqPasteLetters(SourceRow)) %>%
  ungroup() %>%
  dplyr::select(-tempSourceRow)
nrow(DeltaSED_new)
## [1] 848
### FORMAT COLUMN PARAMETERS ###
  # Standardize MatrixName Groups - "Water", "Sediment", "Soil" #
unique(DeltaSED_new$MatrixName) #Identifies OLDNAMES
## [1] "Sediment"
# [1] "Sediment" - no changes necessary

  # Standardize WBT
unique(DeltaSED_new$WBT)
## [1] "Not Recorded"
# [1] NA - no WBT yet - may need GIS to figure what WBT is.

# Standardize Analyte Groups - "Mercury, Total", "Mercury, Dissolved", "Mercury, Suspended", & same for Methylmercury #
unique(DeltaSED_new$Analyte) #IF ANALYTE IS IN 2 COLUMNS e.g., (Hg & MeHg); (Total, Dissolved, Suspended) SEE WQP SCRIPT FOR EXAMPLE
## [1] "Methylmercury, Total" "Mercury, Total"
# [1] "Methylmercury, Total" "Mercury, Total" - formatted when converting wide to long data format

  # Standardize ResultQualCode Groups - "ND", "DNQ", NA#
unique(DeltaSED_new$ResultQualCode) #Identifies OLDNAMES
## [1] "="  "ND"
# [1] NA   "ND" - formatted when converting wide to long data format

  # Format Result Column to Numeric#
class(DeltaSED_new$Result)
## [1] "numeric"
# [1] "numeric" - no changes necessary

  # Format MDL Column to Numeric#
class(DeltaSED_new$MDL)
## [1] "numeric"
# [1] "numeric" - no changes necessary

  # Check if Result & ResultQualCode Columns both equal <NA> - these rows have no useful information
nrow(DeltaSED_new) #Number rows before
## [1] 848
#CODE BELOW REQUIRES USER TROUBLESHOOTING DEPENDING ON AVAILABLE COLUMNS AND SPREADSHEET SPECIFIC CONDITIONS#
DeltaSED_new <- DeltaSED_new %>%
  #Set 0 & negative values as blank
  mutate(Result = ifelse(Result <= 0, NA_real_, Result),
         MDL = ifelse(MDL <= 0, NA_real_, MDL),
         RL = ifelse(RL <= 0, NA_real_, RL))
na_results <- DeltaSED_new %>% #Record rows where Result, MDL, & RL all equal <NA>
  filter( is.na(Result) & is.na(MDL) & is.na(RL) )
#View(na_results)
DeltaSED_new <- anti_join(DeltaSED_new, na_results, by='SourceRow') #returns rows from DeltaSED_new not matching values in no_result
nrow(DeltaSED_new) #Number rows after
## [1] 848
  # Format Units Column - "ng/L", "mg/Kg"
unique(DeltaSED_new$Unit) #Identifies OLDNAMES
## [1] "ng/g"  "mg/Kg"
DeltaSED_new <- DeltaSED_new %>%
  standardizeUnits
unique(DeltaSED_new$Unit)
## [1] "mg/Kg"
  # Format Date and Time Column #
# NEED TO TALK ABOUT HOW WE WANT TO DO THIS - To graph in R we need Date and Time in same column
# THE EXAMPLE CODE BELOW ASSUMES DATE AND TIME ARE IN SEPERATE COLUMNS
DeltaSED_new <- DeltaSED_new %>%
  #rowise() %>%    # rowise is very slow - so used sapply to make this a rowise operation
  mutate(
    #If SampleDate & CollectioTIme are not in Character format by defualt, turn it into a character class so it exports better
    SampleDate = ifelse(sapply(SampleDate, is.character), SampleDate, as.character(as.Date(SampleDate))),
    SampleTime = ifelse(sapply(SampleTime, is.character), SampleTime, format(lubridate::ymd_hms(SampleTime), "%H:%M:%S")),
    #COMBINE DATE AND TIME INTO SampleDateTime COLUMN
    SampleDateTime = ifelse(!is.na(SampleDate), ifelse(!is.na(SampleTime), paste(SampleDate, SampleTime), paste(SampleDate, '00:00:00')), NA),
    #FORMAT SampleDateTime COLUMN TO DATE FORMAT
    SampleDateTime = lubridate::ymd_hms(SampleDateTime)
  )


### REMOVE TEMPORARY COLUMNS ###
DeltaSED_new <- DeltaSED_new %>%
  select(-one_of(temp_cols)) #Remove temp columns since they are no longer needed
#View(DeltaSED_new)

## SAVE FORMATTED DATA AS EXCEL FILE ##
writexl::write_xlsx(DeltaSED_new, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/DeltaSED_ceden_format.xlsx')
# In excel, to convert SampleDate column to Date format
# 1 - Select the date column.
# 2 - Go to the Data-tab and choose "Text to Columns".
# 3 - On the first screen, leave radio button on "delimited" and click Next.
# 4 - Unselect any delimiter boxes (everything blank) and click Next.
# 5 - Under column data format choose Date, select YMD
# 6 - Click Finish.

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=C                            
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## system code page: 65001
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] mgcv_1.8-41        nlme_3.1-160       lubridate_1.8.0    plotly_4.10.0     
##  [5] readxl_1.4.1       actuar_3.3-0       NADA_1.6-1.1       forcats_0.5.2     
##  [9] stringr_1.4.1      dplyr_1.0.9        purrr_0.3.4        readr_2.1.2       
## [13] tidyr_1.2.0        tibble_3.1.8       ggplot2_3.3.6      tidyverse_1.3.2   
## [17] fitdistrplus_1.1-8 survival_3.4-0     MASS_7.3-58.1     
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.4          jsonlite_1.8.0      viridisLite_0.4.1   splines_4.2.2      
##  [5] modelr_0.1.9        assertthat_0.2.1    highr_0.9           googlesheets4_1.0.1
##  [9] cellranger_1.1.0    yaml_2.3.5          pillar_1.8.1        backports_1.4.1    
## [13] lattice_0.20-45     glue_1.6.2          digest_0.6.29       rvest_1.0.3        
## [17] colorspace_2.0-3    htmltools_0.5.3     Matrix_1.5-1        pkgconfig_2.0.3    
## [21] broom_1.0.1         haven_2.5.1         webshot_0.5.3       scales_1.2.1       
## [25] tzdb_0.3.0          googledrive_2.0.0   generics_0.1.3      ellipsis_0.3.2     
## [29] withr_2.5.0         lazyeval_0.2.2      cli_3.3.0           magrittr_2.0.3     
## [33] crayon_1.5.1        evaluate_0.16       fs_1.5.2            fansi_1.0.3        
## [37] xml2_1.3.3          tools_4.2.2         data.table_1.14.2   hms_1.1.2          
## [41] expint_0.1-7        gargle_1.2.0        lifecycle_1.0.1     munsell_0.5.0      
## [45] reprex_2.0.2        writexl_1.4.0       compiler_4.2.2      rlang_1.0.5        
## [49] rstudioapi_0.14     htmlwidgets_1.5.4   crosstalk_1.2.0     rmarkdown_2.16     
## [53] gtable_0.3.1        DBI_1.1.3           R6_2.5.1            knitr_1.40         
## [57] fastmap_1.1.0       utf8_1.2.2          stringi_1.7.8       vctrs_0.4.1        
## [61] dbplyr_2.2.1        tidyselect_1.1.2    xfun_0.32
    Sys.time()
## [1] "2023-12-29 14:23:37 PST"