This report is automatically generated with the R
package knitr
(version 1.40
)
.
### LOAD REQUIRED LIBRARIES ### source("R Functions/functions_estimate NDDNQ values.R") # Load first so select() isn't masked from dplyr source("R Functions/functions_QA data.R") source("R Functions/function_AqFishSamplePairing.R")
# LOAD DATA MASTER # AqMaster <- loadData('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/4a_AqMaster_DELTAdata.xlsx')
## Data set has 16767 rows. Added Shape column based on SourceID for use in result_time_Plotly().
nrow(AqMaster)
## [1] 16767
str(AqMaster)
## tibble [16,767 × 35] (S3: tbl_df/tbl/data.frame) ## $ TMDL : chr [1:16767] "Delta" "Delta" "Delta" "Delta" ... ## $ Subarea : chr [1:16767] "Cache Creek Settling Basin" "Cache Creek Settling Basin" "Cache Creek Settling Basin" "Cache Creek Settling Basin" ... ## $ SourceID : chr [1:16767] "AppL_MeHg" "AppL_MeHg" "AppL_MeHg" "AppL_MeHg" ... ## $ SourceRow : chr [1:16767] "5" "6" "7" "8" ... ## $ StationName : chr [1:16767] "Cache Creek Settling Basin" "Cache Creek Settling Basin" "Cache Creek Settling Basin" "Cache Creek Settling Basin" ... ## $ SampleDateTime : POSIXct[1:16767], format: "2000-03-01 00:01:00" "2000-03-18 00:01:00" ... ## $ Analyte : chr [1:16767] "Methylmercury, Total" "Methylmercury, Total" "Methylmercury, Total" "Methylmercury, Total" ... ## $ Result : num [1:16767] 0.443 0.204 0.328 0.154 0.532 ... ## $ MDL : num [1:16767] NA NA NA NA NA NA NA NA NA NA ... ## $ RL : num [1:16767] NA NA NA NA NA NA NA NA NA NA ... ## $ Unit : chr [1:16767] "ng/L" "ng/L" "ng/L" "ng/L" ... ## $ ResultQualCode : chr [1:16767] "=" "=" "=" "AVG" ... ## $ WBT : chr [1:16767] "Not Recorded" "Not Recorded" "Not Recorded" "Not Recorded" ... ## $ MatrixName : chr [1:16767] "Aqueous" "Aqueous" "Aqueous" "Aqueous" ... ## $ TargetLatitude : num [1:16767] 38.7 38.7 38.7 38.7 38.7 ... ## $ TargetLongitude : num [1:16767] -122 -122 -122 -122 -122 ... ## $ CoordSystem : chr [1:16767] "NAD83" "NAD83" "NAD83" "NAD83" ... ## $ StationCode : chr [1:16767] "CCHSB" "CCHSB" "CCHSB" "CCHSB" ... ## $ QACode : chr [1:16767] NA NA NA NA ... ## $ CollectionComments: chr [1:16767] "Cache Creek d/s Settling Basin" "Cache Creek d/s Settling Basin" "Cache Creek d/s Settling Basin" "Cache Creek d/s Settling Basin" ... ## $ SampleID : chr [1:16767] NA NA NA NA ... ## $ SampleComments : chr [1:16767] NA NA NA NA ... ## $ SampleTypeCode : chr [1:16767] NA NA NA NA ... ## $ ResultsComments : chr [1:16767] NA NA NA "Average of Lab Replicates: 0.155 ng/L, 0.154 ng/L" ... ## $ BatchComments : chr [1:16767] NA NA NA NA ... ## $ BatchVerification : chr [1:16767] NA NA NA NA ... ## $ LabSampleID : chr [1:16767] NA NA NA NA ... ## $ LabBatch : chr [1:16767] NA NA NA NA ... ## $ MethodName : chr [1:16767] NA NA NA NA ... ## $ ComplianceCode : chr [1:16767] NA NA NA NA ... ## $ Project : chr [1:16767] NA NA NA NA ... ## $ CitationCode : chr [1:16767] "CALFED-SubTask1C" "CALFED-SubTask1C" "CALFED-SubTask1C" "CVRWQCB - CALFED" ... ## $ SampleDate : chr [1:16767] "2000-03-01" "2000-03-18" "2001-02-22" "2003-04-28" ... ## $ SampleTime : chr [1:16767] "00:01:00" "00:01:00" "00:01:00" "00:00:00" ... ## $ Shape : chr [1:16767] "cross-open" "cross-open" "cross-open" "cross-open" ...
# LOOK AT DATA FOR ANY NEGATIVE PATTERNS # AqMaster_graph <- AqMaster %>% mutate(SourceID = substr(SourceID, 1, 5)) result_time_Plotly(AqMaster_graph, groupByCol=TMDL, showMean=F, interactive=T, logscale=T, showLegend=F)
## Warning: Ignoring unknown aesthetics: text
## Error in path.expand(path): invalid 'path' argument
#Very little useful data before 1992 so only keep Date > 1992-01-01 AqMaster <- AqMaster %>% filter(SampleDateTime > '1992-01-01') nrow(AqMaster)
## [1] 15533
# PRECHECK DATA FOR MISSING VALUES # #Any sites without a StationName? anyBlank(AqMaster, "StationName") #Great! No Blanks
## The column "StationName" does not contain any blanks.
#Any sites with missing dates? anyBlank(AqMaster, "SampleDateTime") #Great! No Blanks
## The column "SampleDateTime" does not contain any blanks.
#Initial ResultQualCode Check temp <- AqMaster %>% filter(is.na(MDL) & is.na(RL)) %>% #this case could be '=' or 'ND'. mutate(Source_RQC = paste(SourceID, ResultQualCode, sep="; ")) unique_factors(temp, ResultQualCode, Source_RQC)
## $ResultQualCode ## [1] "=" "AVG" "DNQ" ## ## $Source_RQC ## [1] "AppL_MeHg; =" "AppL_MeHg; AVG" "AppL_THg; =" "AppL_THg; AVG" ## [5] "CALFED2003-1a; =" "CEDENAqSed; =" "R5AQ; =" "R5SED; =" ## [9] "SNIP; =" "SNIP; DNQ" "USGS; =" "WQP; ="
# ResultQualCode is not consistent #see is ResultQualCode is consistent when Result == MDL temp <- AqMaster %>% filter(Result == MDL) %>% mutate(Source_RQC = paste(SourceID, ResultQualCode, sep="; ")) unique_factors(temp, ResultQualCode, Source_RQC)
## $ResultQualCode ## [1] "=" "DNQ" "ND" ## ## $Source_RQC ## [1] "CEDENAqSed; =" "CEDENAqSed; DNQ" "CEDENAqSed; ND" "DeltaSED; =" ## [5] "R5AQ; ="
# ResultQualCode is not consistent #see is ResultQualCode is consistent when Result == RL temp <- AqMaster %>% filter(Result == RL) %>% mutate(Source_RQC = paste(SourceID, ResultQualCode, sep="; ")) unique_factors(temp, ResultQualCode, Source_RQC)
## $ResultQualCode ## [1] "=" "DNQ" "E" "ND" ## ## $Source_RQC ## [1] "CEDENAqSed; =" "CEDENAqSed; DNQ" ## [3] "CEDENAqSed; ND" "DRMP_19Sep_Hg; =" ## [5] "DRMP_2016-17_Hg_Anc_Aq_Sed; =" "DRMP_2017-18_Hg_Anc_Aq_Sed; =" ## [7] "DRMP_2018-19_Hg_Aq; =" "DWRMeHg; =" ## [9] "WQP; =" "WQP; E"
# ResultQualCode is not consistent #MDLs should not be greater than RL temp <- AqMaster %>% filter(MDL > RL) %>% mutate(Source_RQC = paste(SourceID, ResultQualCode, sep="; ")) unique_factors(temp, ResultQualCode, Source_RQC)
## $ResultQualCode ## [1] "=" "ND" ## ## $Source_RQC ## [1] "CEDENAqSed; =" "CEDENAqSed; ND"
# Only occurs in "CEDENAqSed" # In general, ResultQualCode needs to be standardized since it doesn't meet the criteria we established # in "1_ResQualCode Rules.xlsx" # STANDARDIZE RESULTQUALCODE # # I created the function "standQualCode()" based on rules in "1_ResQualCode Rules.xlsx". The function is located in "functions_QA data.R" AqMaster_fixedQualCode <- standQualCode(AqMaster, ResultQualCode)
## ## Dataset tested for the following logic: ## !is.na(Result) & is.na(MDL) & is.na(RL) & ResultQualCode == "ND" ## No instances found to be TRUE. No data removed. ## ## ## Dataset tested for the following logic: ## is.na(Result) & (!is.na(MDL) & !is.na(RL)) & (ResultQualCode != "ND" & ResultQualCode != "DNQ") ## No instances found to be TRUE. No data removed. ## ## ## Dataset tested for the following logic: ## Result == RL & MDL < RL & ResultQualCode == "ND" ## No instances found to be TRUE. No data removed. ## ## ## ResultQualCode is now standardized and original QualCodes saved in column named origQualCode. The column ResultQualCode does not contain any blanks. ## ## QualCode standardization completed based on rules established in "1_ResQualCode Rules.xlsx".
nrow(AqMaster_fixedQualCode )
## [1] 15533
View(anti_join(AqMaster, AqMaster_fixedQualCode, by=c("Result"="origResult", "MDL", "RL", "ResultQualCode"="origQualCode"))) # View all Data deleted from AqMaster # STANDARDIZING WBT # WBTs <- AqMaster_fixedQualCode %>% mutate(stationname = tolower(StationName), # make StationNames all lowercase to remove capitalization varability origWBT = WBT) %>% # add origWBT to keep WBT for reference distinct(stationname, WBT, .keep_all = T) %>% select(SourceID, stationname, WBT, origWBT, TargetLatitude, TargetLongitude) dupStationNameWBT <- WBTs %>% group_by(stationname) %>% mutate(n = n()) %>% filter(n() > 1, WBT %are not% c('Not Recorded', 'Other-Surface Water')) %>% # Remove WBTs "Not Recorded" & "Other-Surface Water" so only WBT designations from same StationName remain distinct(stationname, .keep_all=T) %>% # Remove duplicate StationNames (having different WTBs) - Keeps top row - dupliates were causing duplication in left_join below select(stationname, WBT) %>% mutate(`Change Type` = 'From Duplicate StationName WBT') # For WBT's "Not Recorded" & "Other-Surface Water" - use WBT designation from same StationName WBTs2 <- WBTs %>% select(-WBT) %>% # Remove WBT column so WBT's "Not Recorded" & "Other-Surface Water" can be updated using WBT designation from same StationName left_join(., dupStationNameWBT, by='stationname') %>% # Adds WBT column with new designations - replacing "Not Recorded" & "Other-Surface Water" mutate(WBT = ifelse(is.na(WBT), origWBT, WBT), # fill in blank WBTs with origWBT designaions `Change Type` = ifelse(WBT == origWBT, NA_character_, `Change Type`)) %>% select(SourceID, stationname, origWBT, WBT, `Change Type`, TargetLatitude, TargetLongitude) %>% # put columns in intuitive order arrange(stationname) View(WBTs2) # writexl::write_xlsx(WBTs2, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/5b.1_WBTs_ToBeStandardized.xlsx') # Add fixed WBTs into master data fixedWBTs <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/5b.2_WBTs_ManuallyStandardized.xlsx', sheet=1, guess_max = 30000) %>% select(stationname, tempWBT = WBT) %>% # change name of WBT column so easier to keep track of new & old WBTs below distinct(stationname, .keep_all=T) nrow(fixedWBTs) == length(unique(fixedWBTs$stationname))
## [1] TRUE
AqMaster_fixedWBT <- AqMaster_fixedQualCode %>% mutate(stationname = tolower(StationName)) %>% left_join(., fixedWBTs, by='stationname') %>% # adds fixed 'tempWBT' column as last column mutate(origWBT = WBT, # move original WBTs to last column WBT = tempWBT) %>% # move fixed 'WBT' column to original WBT location select(-tempWBT, -stationname) # remove temp columns nrow(AqMaster_fixedWBT)
## [1] 15533
# Add Year & Water Year columns AqMaster_waterYear <- AqMaster_fixedWBT %>% mutate(Year = year(SampleDate), wtrYrSeason = case_when(month(SampleDate) >=10 & month(SampleDate) <=12 ~ "Wet1", # Wet season Oct-Dec month(SampleDate) >=1 & month(SampleDate) <=4 ~ "Wet2", # Wet season Jan-Apr of Year month(SampleDate) >=5 & month(SampleDate) <=9 ~ "Dry"), # Dry season May-Sep of Year waterYear = if_else(wtrYrSeason == 'Wet1', Year+1, Year), # Set waterYear for "Wet1" to next Year, Set WaterYear for Wet2 & Dry same as Year Season = case_when(month(SampleDate) >=11 & month(SampleDate) <=12 ~ "Wet1", # Wet season Nov-Dec - Wet Season 1 & 2 based on months > 1" precip using 2000 through 2019 monthly accumulation rainfall data from CDEC precipitation stations Sacramento WB City (SCR) and Los Banos (LSB). month(SampleDate) >=1 & month(SampleDate) <=4 ~ "Wet2", # Wet season Jan-Apr of Year month(SampleDate) >=5 & month(SampleDate) <=10 ~ "Dry"), # Dry season May-Oct of Year seasonYear = if_else(Season == 'Wet1', Year+1, Year)) # Set seasonal Year for "Wet1" to next Year, Set seasonal Year for Wet2 & Dry same as Year # SAVE AqMaster after QA # writexl::write_xlsx(AqMaster_waterYear %>% select(-Shape), path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/5a_AqMaster_QA.xlsx')
The R session information (including the OS info, R version and all packages used):
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## Running under: Windows 10 x64 (build 22621) ## ## Matrix products: default ## ## locale: ## [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=English_United States.utf8 ## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C ## [5] LC_TIME=English_United States.utf8 ## ## attached base packages: ## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: ## [1] mgcv_1.8-41 nlme_3.1-160 lubridate_1.8.0 plotly_4.10.0 ## [5] readxl_1.4.1 actuar_3.3-0 NADA_1.6-1.1 forcats_0.5.2 ## [9] stringr_1.4.1 dplyr_1.0.9 purrr_0.3.4 readr_2.1.2 ## [13] tidyr_1.2.0 tibble_3.1.8 ggplot2_3.3.6 tidyverse_1.3.2 ## [17] fitdistrplus_1.1-8 survival_3.4-0 MASS_7.3-58.1 ## ## loaded via a namespace (and not attached): ## [1] lattice_0.20-45 assertthat_0.2.1 digest_0.6.29 utf8_1.2.2 ## [5] R6_2.5.1 cellranger_1.1.0 backports_1.4.1 reprex_2.0.2 ## [9] evaluate_0.16 highr_0.9 httr_1.4.4 pillar_1.8.1 ## [13] rlang_1.0.5 lazyeval_0.2.2 googlesheets4_1.0.1 rstudioapi_0.14 ## [17] data.table_1.14.2 Matrix_1.5-1 splines_4.2.2 webshot_0.5.3 ## [21] googledrive_2.0.0 htmlwidgets_1.5.4 munsell_0.5.0 broom_1.0.1 ## [25] compiler_4.2.2 modelr_0.1.9 xfun_0.32 pkgconfig_2.0.3 ## [29] htmltools_0.5.3 tidyselect_1.1.2 viridisLite_0.4.1 fansi_1.0.3 ## [33] crayon_1.5.1 tzdb_0.3.0 dbplyr_2.2.1 withr_2.5.0 ## [37] grid_4.2.2 jsonlite_1.8.0 gtable_0.3.1 lifecycle_1.0.1 ## [41] DBI_1.1.3 magrittr_2.0.3 scales_1.2.1 writexl_1.4.0 ## [45] cli_3.3.0 stringi_1.7.8 fs_1.5.2 xml2_1.3.3 ## [49] ellipsis_0.3.2 generics_0.1.3 vctrs_0.4.1 expint_0.1-7 ## [53] tools_4.2.2 glue_1.6.2 crosstalk_1.2.0 hms_1.1.2 ## [57] yaml_2.3.5 fastmap_1.1.0 colorspace_2.0-3 gargle_1.2.0 ## [61] rvest_1.0.3 knitr_1.40 haven_2.5.1
Sys.time()
## [1] "2024-01-04 14:39:27 PST"