This report is automatically generated with the R
package knitr
(version 1.40
)
.
source("R Functions/functions_QA data.R") #use activitytypecode to filter out QA - 1 field replicate #there were a few samples with 'Sample-Composite Without Parents' in the ActivityTypeCode' column; #I believe these are all in Lake Natmoas and therefore not in our official scope but I included them in this scope b/c of how the GIS layer is currently (all the way up to folsom) #since there was no info on composite data for any samples, I put '1' in the composite number amount but these should be filtered out anyway #(we didn't put NA_Character b/c we needed a number for weighted averaging. we put 1 to be safe) #there was no 'mean' or 'MPN' in 'StatisticalBaseCode' column for WQp fish ### LOAD DATA ### WQP_FISH <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/WQP.xlsx', sheet='WQP FISH filtered 021519', guess_max = 30000) WQP_FISH_STATION <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/WQP.xlsx', sheet='station', guess_max = 30000)%>% select(MonitoringLocationIdentifier,MonitoringLocationName, HorizontalCoordinateReferenceSystemDatumName, LatitudeMeasure, LongitudeMeasure, MonitoringLocationTypeName)%>% rename(StationName = 'MonitoringLocationName')%>% rename(CoordSystem = 'HorizontalCoordinateReferenceSystemDatumName')%>% rename(TargetLatitude = 'LatitudeMeasure')%>% rename(TargetLongitude = 'LongitudeMeasure')%>% rename(StationCode = 'MonitoringLocationIdentifier')%>% rename(WBT = 'MonitoringLocationTypeName') WQP_FISH_PROJECT <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/WQP.xlsx', sheet='project', guess_max = 30000)%>% select(ProjectIdentifier,ProjectName)%>% rename(ProjectCode = ProjectIdentifier) WQP_FISH_RES_DETECT <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/WQP.xlsx', sheet='res-detect-qnt-limit', guess_max = 30000)%>% filter(DetectionQuantitationLimitTypeName == 'Method Detection Level' & `DetectionQuantitationLimitMeasure/MeasureUnitCode`=='ng/g')%>% select(ResultIdentifier, `DetectionQuantitationLimitMeasure/MeasureValue`)%>% rename(MDL = `DetectionQuantitationLimitMeasure/MeasureValue`, SampleID = ResultIdentifier) WQP_FISH_COMMON_NAME <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/WQP.xlsx', sheet='TissueSpecies', guess_max = 30000)%>% select(SubjectTaxonomicName, CommonName)%>% rename(TaxonomicName = SubjectTaxonomicName) nrow(WQP_FISH) #number of rows
## [1] 132
### SELECT USED COLUMNS, ADD USER DEFINED COLUMNS, & RENAME COLUMNS TO CEDEN STANDARDS ### keep_cols <- c('SourceID','SourceRow','ActivityStartDate','CharacteristicName', 'ResultMeasureValue', 'DetectionQuantitationLimitMeasure/MeasureValue', 'SampleTissueAnatomyName', 'SubjectTaxonomicName', 'ActivityStartTime/Time', 'MonitoringLocationIdentifier','ResultIdentifier','ProjectIdentifier' ) temp_cols <- c('ActivityTypeCode', 'ActivityMediaName','ResultStatusIdentifier','ResultWeightBasisText', 'DetectionQuantitationLimitMeasure/MeasureUnitCode', 'DetectionQuantitationLimitTypeName', 'ResultAnalyticalMethod/MethodIdentifier', 'ActivityCommentText','HydrologicCondition','HydrologicEvent','ResultMeasure/MeasureUnitCode', 'ResultAnalyticalMethod/MethodName','MethodDescriptionText' ) WQP_FISH_new <- WQP_FISH %>% select( c(keep_cols,temp_cols) ) %>% #DO NOT CHANGE - selects columns specified above rename( #Rename worksheet columns to CEDEN format here: CEDEN 'COLUMNNAME' = WORKSHEET 'COLUMNNAME' #DELETE COLUMN NAMES THAT DO NOT HAVE AN EQUIVALENT COLUMN IN THE WORKSHEET 'StationCode' = 'MonitoringLocationIdentifier', 'SampleDate' = 'ActivityStartDate', 'Result' = 'ResultMeasureValue', 'SampleID' = 'ResultIdentifier', 'ProjectCode' = 'ProjectIdentifier', 'TissueName' = 'SampleTissueAnatomyName', 'RL' = 'DetectionQuantitationLimitMeasure/MeasureValue', TaxonomicName = SubjectTaxonomicName, 'SampleTime' = 'ActivityStartTime/Time', 'Analyte' = 'CharacteristicName' #total added later during standardizing ) %>% mutate( #Create Missing column or modify existing column here: CEDEN COLUMNNAME = 'SPECIFIED VALUE' or FUNCTION #DELTE COLUMN NAMES THAT DO NOT NEED TO BE CHANGED CitationCode = 'WQP_FISH_2018', CompositeRowID = NA_character_, CompositeID = NA_character_, `WeightAvg g` = NA_real_, `TLMin mm` = NA_real_, `TLMax mm` = NA_real_, `TLAvgLength mm` = NA_real_, ProgramName = NA_character_, ParentProjectName = NA_character_, QACode = NA_character_, BatchVerification = NA_character_, ComplianceCode = NA_character_, LabSubmissionCode = NA_character_, Method = paste(`ResultAnalyticalMethod/MethodIdentifier`, `ResultAnalyticalMethod/MethodName`, MethodDescriptionText, sep='; '), #can fix NA if necessary Method = gsub('NA', '', Method), # fix issues of pasting instances with NAs Method = gsub('^(; )+|(; )+$', '', Method), ResultComments = paste(paste0('HydrologicalEvent: ', HydrologicEvent), paste0('HydrologicCondition: ', HydrologicCondition), paste0('ActivityCommentText: ', ActivityCommentText), paste0('ActivityTypeCode: ', ActivityTypeCode), paste0('ResultStatusIdentifier: ', ResultStatusIdentifier), sep=' ~ '), #can fix NA if necessary Unit = paste(`ResultMeasure/MeasureUnitCode`, ifelse(grepl('Dry', ResultWeightBasisText, ignore.case=TRUE),'dw','ww'), sep=' '), NumberFishPerComp = 1 #1 bc need number for weight of averaging ) nrow(WQP_FISH_new)
## [1] 132
#str(WQP_FISH_new) #just to check data class of different columns - e.g., is Date column in POSIX format? #View(WQP_FISH_new) ### MATCH COLUMN PARAMETERS from other tabs in spreadsheet, these add columns### #MDL WQP_FISH_new <- WQP_FISH_new %>% left_join(., WQP_FISH_RES_DETECT, by = 'SampleID') # adds MDL column #project name WQP_FISH_new <- WQP_FISH_new %>% left_join(., WQP_FISH_PROJECT, by = 'ProjectCode') #station name, latitude, longitude, WBT, coordinate system WQP_FISH_new <- WQP_FISH_new %>% left_join(., WQP_FISH_STATION, by = 'StationCode') #common name from taxonomic name WQP_FISH_new <- WQP_FISH_new %>% left_join(., WQP_FISH_COMMON_NAME, by = 'TaxonomicName') ### filter locations in scope, biologic tissue and tissue in ActivityMediaName column, put dry or wet weight with unit) ### FORMAT COLUMN PARAMETERS ### # Standardize WBT (WaterBodyType) Groups - "River/Stream", "Estuary", Drain/Canal", "Wetland", "Spring", "Slough", # "Pond", "Lake/Reservoir", "Delta", "Forebay/Afterbay", "Not Recorded" # unique(WQP_FISH_new$WBT) #Identifies OLDNAMES
## [1] "Stream" "River/Stream"
#STANDARD CODE TO CHANGE GROUPING NAMES WQP_FISH_new <- WQP_FISH_new %>% mutate(WBT = recode(WBT, "Stream" = "River/Stream")) unique(WQP_FISH_new$WBT) #New naming structure for WBT Groupings
## [1] "River/Stream"
# Standardize TissueName Groups - "Fillet" or "Whole Body" # unique(WQP_FISH_new$TissueName)
## [1] "Fillet" "Organism, whole" ## [3] "Fillet dorsal piece" "Fish Fillet, Homog., Skin On"
WQP_FISH_new <- WQP_FISH_new %>% mutate(TissueName = recode(TissueName, #"Fish Fillet, Homog., Skin On" = "Fillet", not allowable per LS #"Fillet dorsal piece" = "Fillet",not allowable per LS "Organism, whole" = "Whole Body" ) ) %>% filter(TissueName %in% c('Fillet','Whole Body')) unique(WQP_FISH_new$TissueName)
## [1] "Fillet" "Whole Body"
# Standardize Analyte Groups - "Mercury, Total" (we consider Total Mercury and Methylmercury to be approx equal) # unique(WQP_FISH_new$Analyte)
## [1] "Mercury" "Methylmercury(1+)"
WQP_FISH_new <- WQP_FISH_new %>% mutate( Analyte = recode(Analyte, "Mercury" = "Mercury, Total", "Methylmercury(1+)" = "Mercury, Total" ) ) unique(WQP_FISH_new$Analyte)
## [1] "Mercury, Total"
# Format Result Column to Numeric# # Check column for text - based on text user needs to decide what to do if(!is.numeric(WQP_FISH_new$Result) & any(grepl('<|[a-df-zA-DF-Z]', WQP_FISH_new$Result))){ old <-WQP_FISH_new$Result new <-WQP_FISH_new$Result new[grepl('<|[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005" #Print what text was found and what is being done cat(paste0("'Result' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)), ".\nACTIONS TAKEN:\n", "~explain here~.\n")) WQP_FISH_new <- WQP_FISH_new %>% mutate( Result = ifelse(Result=="", NA_character_, Result), Result = as.numeric(new) ) } else { WQP_FISH_new$Result = as.numeric(WQP_FISH_new$Result) cat("'Result' column is in numeric format\n")}
## 'Result' column is in numeric format
# Format MDL Column to Numeric# # Check column for text - based on text user needs to decide what to do if(!is.numeric(WQP_FISH_new$MDL) & any(grepl('<|[a-df-zA-DF-Z]', WQP_FISH_new$MDL))){ old <-WQP_FISH_new$MDL new <- WQP_FISH_new$MDL new[grepl('[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005" #Print what text was found and what is being done cat(paste0("'Result' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)), ".\nACTIONS TAKEN:\n", "~explain here~.\n")) #WQP_FISH_new <- WQP_FISH_new %>% # mutate( #Do stuff to prep column to be converted to Numeric # MDL = as.numeric(new) # ) } else { WQP_FISH_new$MDL = as.numeric(WQP_FISH_new$MDL) cat("'MDL' column is in numeric format\n")}
## 'MDL' column is in numeric format
# Format RL Column to Numeric# # Check column for text - based on text user needs to decide what to do if(!is.numeric(WQP_FISH_new$RL) & any(grepl('<|[a-df-zA-DF-Z]', WQP_FISH_new$RL))){ old <-WQP_FISH_new$RL new <-WQP_FISH_new$RL new[grepl('[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005" #Print what text was found and what is being done cat(paste0("'Result' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)), ".\nACTIONS TAKEN:\n", "~explain here~.\n")) #WQP_FISH_new <- WQP_FISH_new %>% # mutate( #Due stuff to prep column to be converted to Numeric # RL = as.numeric(new) # ) } else { WQP_FISH_new$RL = as.numeric(WQP_FISH_new$RL) cat("'RL' column is in numeric format\n")}
## 'RL' column is in numeric format
# Standardize ResultQualCode Groups - "ND", "DNQ", NA# WQP_FISH_new <- WQP_FISH_new %>% rowwise()%>% #necessary to do the following between() as a single row comparison mutate(ResultQualCode = case_when(between(Result, MDL, RL) ~ "DNQ", Result < MDL ~ "ND", Result < RL ~ "ND", TRUE ~ "=") ) unique(WQP_FISH_new$ResultQualCode) #New naming structure for ResultQualCode Groupings
## [1] "="
# Check if Result, MDL, & RL Columns all equal <NA> or 0 - these rows have no useful information nrow(WQP_FISH_new) #Number rows before
## [1] 99
#CODE BELOW REQUIRES USER TROUBLESHOOTING DEPENDING ON AVAILABLE COLUMNS AND SPREADSHEET SPECIFIC CONDITIONS# WQP_FISH_new <- WQP_FISH_new %>% #Set 0 & negative values as blank mutate(Result = ifelse(Result <= 0, NA_real_, Result), MDL = ifelse(MDL <= 0, NA_real_, MDL), RL = ifelse(RL <= 0, NA_real_, RL)) na_results <- WQP_FISH_new %>% #Record rows where Result, MDL, & RL all equal <NA> filter( is.na(Result) & is.na(MDL) & is.na(RL) ) #View(na_results) WQP_FISH_new <- anti_join(WQP_FISH_new, na_results, by='SourceRow') #returns rows from WQP_FISH_new not matching values in no_result nrow(WQP_FISH_new) #Number rows after
## [1] 99
# Format Units Column - "mg/Kg ww" or "mg/Kg dw" unique(WQP_FISH_new$Unit) #Identifies OLDNAMES
## [1] "mg/kg dw" "ug/kg dw" "mg/kg ww"
# If more than 1 unit colmn exists (e.g., for RL and MDL columns) see WQP script for example on merging into 1 column WQP_FISH_new <- WQP_FISH_new %>% standardizeUnits(pp='mass') unique(WQP_FISH_new$Unit) #New naming structure for Unit Groupings
## [1] "mg/Kg dw" "mg/Kg ww"
# Format Date and Time Column # # THE EXAMPLE CODE BELOW ASSUMES DATE AND TIME ARE IN SAME COLUMNS - IF TIME IS IN SEPERATE COLUMN LOOK AT AQ LINKAGE DATA TEMPLATE WQP_FISH_new <- WQP_FISH_new %>% #rowise() %>% # rowise is very slow - so used sapply to make this a rowise operation mutate( #If SampleDate & CollectioTIme are not in Character format by defualt, turn it into a character class so it exports better SampleDate = ifelse(sapply(SampleDate, is.character), SampleDate, as.character(as.Date(SampleDate))), SampleTime = ifelse(sapply(SampleTime, is.character), SampleTime, format(lubridate::ymd_hms(SampleTime), "%H:%M:%S")), #COMBINE DATE AND TIME INTO SampleDateTime COLUMN SampleDateTime = ifelse(!is.na(SampleTime), paste(SampleDate, SampleTime), paste(SampleDate, '00:00:00')), #FORMAT SampleDateTime COLUMN TO DATE FORMAT SampleDateTime = lubridate::ymd_hms(SampleDateTime) ) ### REMOVE TEMPORARY COLUMNS ### WQP_FISH_new <- WQP_FISH_new %>% select(-one_of(temp_cols)) #Remove temp columns since they are no longer needed #View(WQP_FISH_new) ## SAVE FORMATTED DATA AS EXCEL FILE ## writexl::write_xlsx(WQP_FISH_new, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/WQP_FISH_ceden_format.xlsx') # In excel, to convert SampleDate column to Date format # 1 - Select the date column. # 2 - Go to the Data-tab and choose "Text to Columns". # 3 - On the first screen, leave radio button on "delimited" and click Next. # 4 - Unselect any delimiter boxes (everything blank) and click Next. # 5 - Under column data format choose Date, select YMD # 6 - Click Finish.
The R session information (including the OS info, R version and all packages used):
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## Running under: Windows 10 x64 (build 22621) ## ## Matrix products: default ## ## locale: ## [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=English_United States.utf8 ## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C ## [5] LC_TIME=English_United States.utf8 ## ## attached base packages: ## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: ## [1] lubridate_1.8.0 plotly_4.10.0 readxl_1.4.1 actuar_3.3-0 ## [5] NADA_1.6-1.1 forcats_0.5.2 stringr_1.4.1 dplyr_1.0.9 ## [9] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0 tibble_3.1.8 ## [13] ggplot2_3.3.6 tidyverse_1.3.2 fitdistrplus_1.1-8 survival_3.4-0 ## [17] MASS_7.3-58.1 ## ## loaded via a namespace (and not attached): ## [1] lattice_0.20-45 assertthat_0.2.1 digest_0.6.29 utf8_1.2.2 ## [5] R6_2.5.1 cellranger_1.1.0 backports_1.4.1 reprex_2.0.2 ## [9] evaluate_0.16 highr_0.9 httr_1.4.4 pillar_1.8.1 ## [13] rlang_1.0.5 lazyeval_0.2.2 googlesheets4_1.0.1 rstudioapi_0.14 ## [17] data.table_1.14.2 Matrix_1.5-1 splines_4.2.2 googledrive_2.0.0 ## [21] htmlwidgets_1.5.4 munsell_0.5.0 broom_1.0.1 compiler_4.2.2 ## [25] modelr_0.1.9 xfun_0.32 pkgconfig_2.0.3 htmltools_0.5.3 ## [29] tidyselect_1.1.2 fansi_1.0.3 viridisLite_0.4.1 crayon_1.5.1 ## [33] tzdb_0.3.0 dbplyr_2.2.1 withr_2.5.0 grid_4.2.2 ## [37] jsonlite_1.8.0 gtable_0.3.1 lifecycle_1.0.1 DBI_1.1.3 ## [41] magrittr_2.0.3 scales_1.2.1 writexl_1.4.0 cli_3.3.0 ## [45] stringi_1.7.8 fs_1.5.2 xml2_1.3.3 ellipsis_0.3.2 ## [49] generics_0.1.3 vctrs_0.4.1 expint_0.1-7 tools_4.2.2 ## [53] glue_1.6.2 hms_1.1.2 fastmap_1.1.0 colorspace_2.0-3 ## [57] gargle_1.2.0 rvest_1.0.3 knitr_1.40 haven_2.5.1
Sys.time()
## [1] "2024-01-05 11:42:04 PST"