This report is automatically generated with the R
package knitr
(version 1.40
)
.
source("R Functions/functions_QA data.R") ### LOAD DATA ### DRMP <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/DRMP_2016_2019_Aq_Sed_Data.xlsx', sheet='Sheet1', guess_max = 30000) nrow(DRMP) #number of rows should match the Excel file (minus the header row)
## [1] 3366
### LIST COLUMNS TO BE USED, ADD USER DEFINED COLUMNS, & RENAME COLUMNS TO CEDEN STANDARDS ### #Use 1.READ ME.xlsx, 'ColumnsForR' to list & identify columns that match corresponding CEDEN Standard columns keep_cols <- c('SourceID','SourceRow', 'ProjectCode', 'StationCode', 'SampleDate', 'CollectionTime', 'LabBatch', 'LabSampleID', 'MatrixName', 'MethodName', 'UnitName', 'Result', 'MDL', 'RL', 'ResQualCode', 'SampleID', 'SampleComments', 'QACode', 'ComplianceCode', 'LabCollectionComments', 'SampleTypeCode') ### LabSampleID shows that some data are matrix spikes '-MS' or '-MSD' or duplicates '-dup'. Remove matrix spikes and make note of duplicate in SampleTypeCode column temp_cols <- c('AnalyteName', 'FractionName', 'CollectionDeviceName', 'CollectionMethodCode') #Include columns that do not match CEDEN standards but may be useful (e.g., Unit columns for MDL & RL) #temp_cols are removed before the data is merged with other datasets DRMP_new <- DRMP %>% select( c(keep_cols,temp_cols) ) %>% #DO NOT CHANGE - selects columns specified above rename( #Rename DRMP columns to CEDEN format here: CEDEN 'COLUMNNAME' = WORKSHEET 'COLUMNNAME' #DELTE COLUMN NAMES THAT DO NOT HAVE AN EQUIVALENT COLUMN IN THE DRMP #CitationCode = , added with mutate below #CoordSystem = , added with left_join(DRMP_locations) Project = ProjectCode, #StationName = , added with left_join(DRMP_locations) SampleTime = CollectionTime, #WBT = '', added with left_join(DRMP_locations) #Analyte = , added with mutate below Unit = UnitName, ResultQualCode = ResQualCode, SampleID = SampleID, SampleComments = SampleComments, #TargetLatitude = , added with left_join(DRMP_locations) #TargetLongitude = , added with left_join(DRMP_locations) #BatchVerification = , added with mutate below #CollectionComments = , added with mutate below ResultsComments = LabCollectionComments #BatchComments = '' added with mutate below ) %>% mutate( #Create Missing column or modify existing column here: CEDEN COLUMNNAME = 'SPECIFIED VALUE' or FUNCTION #DELTE COLUMN NAMES THAT DO NOT NEED TO BE CHANGED CitationCode = 'DRMP', Analyte = paste(AnalyteName, FractionName, sep=', '), #CoordSystem = '', added with left_join(DRMP_locations) #StationName = '', added with left_join(DRMP_locations) #WBT = '', added with left_join(DRMP_locations) #TargetLatitude = '', added with left_join(DRMP_locations) #TargetLongitude = '', added with left_join(DRMP_locations) BatchVerification = NA_character_, CollectionComments = paste(CollectionDeviceName, CollectionMethodCode, sep=' ~ '), BatchComments = NA_character_ ) nrow(DRMP_new)
## [1] 3366
#str(DRMP_new) #just to check data class of different columns - e.g., is Date column in POSIX format? #View(DRMP_new) #Add columns from "DRMP_locations.xlsx" DRMP_locations <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/DRMP_locations.xlsx', sheet='Locations', guess_max = 30000) DRMP_new <- left_join(DRMP_new, DRMP_locations, by='StationCode') %>% #Adds columns StationName, WBT, LocationCode, TargetLatitude, TargetLongitude, CoordSystem based on StationCode mutate(CollectionComments = paste(LocationCode, CollectionComments, sep=' ~ ')) %>% #Add LocationCode to CollectionComments select(-LocationCode) #Remove LocationCode nrow(DRMP_new)
## [1] 3366
#Filter Satation Codes unique(DRMP_new$StationCode)
## [1] "000NONPJ_A" "000NONPJ_B" "000NONPJ_C" "000NONPJ_D" "000NONPJ_E" "000NONPJ_F" ## [7] "000NONPJ_G" "000NONPJ_H" "000NONPJ_I" "000NONPJ_J" "000NONPJ_K" "000NONPJ_L" ## [13] "000NONPJ_M" "000NONPJ_N" "000NONPJ_O" "510ADVLIM" "510ST1317" "541SJC501" ## [19] "544LILPSL" "544MDRBH4" "LABQA" "544ADVLM6" "207SRD10A" "544DMC020" ## [25] "FIELDQA" "000NONPJ" "FieldQA"
dropStatioCodes <- c("LABQA", "FIELDQA", "FieldQA", grep("000NONPJ", unique(DRMP_new$StationCode), value=T)) #Do not want these StationCodes DRMP_new <- DRMP_new %>% filter(StationCode %are not% dropStatioCodes) unique(DRMP_new$StationCode)
## [1] "510ADVLIM" "510ST1317" "541SJC501" "544LILPSL" "544MDRBH4" "544ADVLM6" "207SRD10A" ## [8] "544DMC020"
nrow(DRMP_new)
## [1] 2499
### FORMAT COLUMN PARAMETERS ### # Standardize MatrixName Groups - "Aqueous", "Sediment", "Soil" # unique(DRMP_new$MatrixName) #Identifies OLDNAMES
## [1] "samplewater" "blankwater" "Samplewater" "Sediment"
#STANDARD CODE TO CHANGE GROUPING NAMES DRMP_new <- DRMP_new %>% filter(MatrixName != 'blankwater') %>% mutate(MatrixName = recode(MatrixName, #COLUMN NAME WHERE CHANGES WILL BE MADE #"OLDNAME" = "NEWNAME" - create a new line for each OLDNAME that needs to be changed "Samplewater" = "Aqueous", "samplewater" = "Aqueous" ) ) unique(DRMP_new$MatrixName) #Lists new naming structure
## [1] "Aqueous" "Sediment"
nrow(DRMP_new)
## [1] 2471
# Standardize WBT (WaterBodyType) Groups - "River/Stream", "Drain/Canal", "Wetland", "Spring", "Slough", # "Pond", "Lake/Reservoir", "Delta", "Forebay/Afterbay", "Not Recorded" # unique(DRMP_new$WBT) #Identifies OLDNAMES
## [1] "River/Stream" "X2"
#Look for inconsistent group pairings between MatrixName and WBT (e.g., 'Soil; Stream') unique(paste(DRMP_new$MatrixName, DRMP_new$WBT, sep='; '))
## [1] "Aqueous; River/Stream" "Aqueous; X2" "Sediment; River/Stream"
#WBT & MatrixName look good # Standardize Analyte Groups - "Mercury, Total", "Mercury, Dissolved", "Mercury, Suspended", & same for Methylmercury # unique(DRMP_new$Analyte) #IF ANALYTE IS IN 2 COLUMNS e.g., (Hg & MeHg); (Total, Dissolved, Suspended) SEE WQP SCRIPT FOR EXAMPLE
## [1] "Dissolved Organic Carbon, Dissolved" "Mercury, Methyl, Dissolved" ## [3] "Mercury, Methyl, Total" "Chlorophyll a, Particulate" ## [5] "Mercury, Dissolved" "Mercury, Total" ## [7] "Total Suspended Solids, Total" "Total Suspended Solids, Volatile" ## [9] "Clay, <0.0039 mm" "Mercury, methyl, Dissolved" ## [11] "Mercury, methyl, Total" "Moisture, Total" ## [13] "Sand, 0.0625 to <2.0 mm" "Silt, 0.0039 to <0.0625 mm" ## [15] "Total Organic Carbon, Total" "Total Suspended Solids, Particulate"
DRMP_new <- DRMP_new %>% mutate(Analyte = recode(Analyte, "Mercury, methyl, Total" = "Methylmercury, Total", "Mercury, Methyl, Total" = "Methylmercury, Total", "Mercury, methyl, Dissolved" = "Methylmercury, Dissolved", "Mercury, Methyl, Dissolved" = "Methylmercury, Dissolved" ) ) %>% filter(grepl('mercury', Analyte, ignore.case=T)) nrow(DRMP_new)
## [1] 1244
#Create 'Analyte' column from Analyte & Analyte_part2 columns - then delete Analyte_part2 column## unique(DRMP_new$Analyte) #New naming structure for Analyte Groupings
## [1] "Methylmercury, Dissolved" "Methylmercury, Total" "Mercury, Dissolved" ## [4] "Mercury, Total"
# Standardize ResultQualCode Groups - "ND", "DNQ", NA# unique(DRMP_new$ResultQualCode) #Identifies OLDNAMES
## [1] "=" "DNQ" "ND"
#ResultQualCodes are good # Format Result Column to Numeric# # Check column for text - based on text user needs to decide what to do if(!is.numeric(DRMP_new$Result)){ if(all(is.na(DRMP_new$Result))){ DRMP_new <- DRMP_new %>% mutate( #Column is all blanks and will be converted to Numeric RL = as.numeric(new) ) cat("'Result' column is all blanks and was converted to numeric format\n") }else{ old <-DRMP_new$Result new <-DRMP_new$Result new[grepl('[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005" #Print what text was found and what is being done cat(paste0("'Result' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)), ".\nACTIONS TAKEN:\n", "~explain here~.\n")) #DRMP_new <- DRMP_new %>% # mutate( #Do stuff to prep column to be converted to Numeric # Result = as.numeric(new) # ) } }else{ cat("'Result' column is in numeric format\n")}
## 'Result' column is in numeric format
# Format MDL Column to Numeric# # Check column for text - based on text user needs to decide what to do if(!is.numeric(DRMP_new$MDL)){ if(all(is.na(DRMP_new$MDL))){ DRMP_new <- DRMP_new %>% mutate( #Column is all blanks and will be converted to Numeric RL = as.numeric(new) ) cat("'MDL' column is all blanks and was converted to numeric format\n") }else{ old <-DRMP_new$MDL new <-DRMP_new$MDL new[grepl('[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005" #Print what text was found and what is being done cat(paste0("'MDL' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)), ".\nACTIONS TAKEN:\n", "~explain here~.\n")) #DRMP_new <- DRMP_new %>% # mutate( #Do stuff to prep column to be converted to Numeric # MDL = as.numeric(new) # ) } }else{ cat("'MDL' column is in numeric format\n")}
## 'MDL' column is in numeric format
# Format RL Column to Numeric# # Check column for text - based on text user needs to decide what to do if(!is.numeric(DRMP_new$RL)){ if(all(is.na(DRMP_new$RL))){ DRMP_new <- DRMP_new %>% mutate( #Column is all blanks and will be converted to Numeric RL = as.numeric(new) ) cat("'RL' column is all blanks and was converted to numeric format\n") }else{ old <-DRMP_new$RL new <-DRMP_new$RL new[grepl('[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005" #Print what text was found and what is being done cat(paste0("'RL' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)), ".\nACTIONS TAKEN:\n", "~explain here~.\n")) #DRMP_new <- DRMP_new %>% # mutate( #Do stuff to prep column to be converted to Numeric # RL = as.numeric(new) # ) } }else{ cat("'RL' column is in numeric format\n")}
## 'RL' column is in numeric format
# Check if Result, MDL, & RL Columns all equal <NA> or 0 - these rows have no useful information nrow(DRMP_new) #Number rows before
## [1] 1244
#CODE BELOW REQUIRES USER TROUBLESHOOTING DEPENDING ON AVAILABLE COLUMNS AND SPREADSHEET SPECIFIC CONDITIONS # DRMP_new <- DRMP_new %>% #Set 0 & negative values as blank mutate(Result = ifelse(Result <= 0, NA_real_, Result), MDL = ifelse(MDL <= 0, NA_real_, MDL), RL = ifelse(RL <= 0, NA_real_, RL)) na_results <- DRMP_new %>% #Record rows where Result, MDL, & RL all equal <NA> filter( is.na(Result) & is.na(MDL) & is.na(RL) ) nrow(na_results)
## [1] 0
DRMP_new <- anti_join(DRMP_new, na_results, by='SourceRow') #returns rows from DRMP_new not matching values in no_result nrow(DRMP_new) #Number rows after
## [1] 1244
# LabSampleID shows that some data are matrix spikes '-MS' or '-MSD' or duplicates '-dup', either remove dups or average with sample DRMP_new$LabSampleID[grep('[A-z]',DRMP_new$LabSampleID)] #show which LabSampleIDs have alphabetic characters
## [1] "2016-2197-ms" "2016-2197-msd" "2016-2758-ms" "2016-2758-msd" "2017-0458-msd" ## [6] "2017-0458-ms" "2017-1310-dup" "2016-2204-ms" "2016-2204-msd" "2017-0472-msd" ## [11] "2017-0472-ms" "2016-2189-msd" "2016-2189-ms" "2016-2757-msd" "2016-2757-ms" ## [16] "2017-0457-msd" "2017-0457-ms" "2016-2203-msd" "2016-2203-ms" "2017-0469-dup" ## [21] "2017-0471-ms" "2017-0471-msd" "2017-1313-ms" "2017-1313-msd" "2016-2201-dup" ## [26] "2016-2191-dup" "2016-2759-dup" "2017-0459-dup" "2017-1311-ms" "2017-1311-msd" ## [31] "2017-2759-ms" "2017-2759-msd" "2017-2758-ms" "2017-2758-msd" "2018-0194-ms" ## [36] "2018-0194-msd" "2018-1106-ms" "2018-1106-msd" "2018-0193-dup" "2018-0280-ms" ## [41] "2018-0280-msd" "2018-0279-ms" "2018-0279-msd" "2018-0277-dup" "2018-0430-ms" ## [46] "2018-0430-msd" "2018-0428-dup" "2018-0437-ms" "2018-0437-msd" "2018-0525-ms" ## [51] "2018-0525-msd" "2018-0526-ms" "2018-0526-msd" "2018-0523-dup" "2018-0738-ms" ## [56] "2018-0738-msd" "2018-0739-ms" "2018-0739-msd" "2018-0736-dup" "2018-1096-ms" ## [61] "2018-1096-msd" "2018-1097-ms" "2018-1097-msd" "2018-1098-ms" "2018-1098-msd" ## [66] "2018-1094-dup" "2017-2798-MS" "2017-2798-MSD" "2018-0125-dup" "2018-0581-MS" ## [71] "2018-0581-MSD" "2018-0582-dup" "2018-0587-MS" "2018-0587-MSD" "2018-1143-dup" ## [76] "2018-1155-MS" "2018-1155-MSD" "2018-1086-ms" "2018-1086-msd" "2018-1087-ms" ## [81] "2018-1087-msd" "2017-2735-ms" "2017-2735-msd" "2017-2734-dup" "2017-2737-ms" ## [86] "2017-2737-msd" "2018-0166-dup" "2018-0172-ms" "2018-0172-msd" "2018-0173-ms" ## [91] "2018-0173-msd" "2018-0260-ms" "2018-0260-msd" "2018-0261-dup" "2018-0259-ms" ## [96] "2018-0259-msd" "2018-0413-ms" "2018-0413-msd" "2018-0412-dup" "2018-0415-ms" ## [101] "2018-0415-msd" "2018-0507-ms" "2018-0507-msd" "2018-0505-dup" "2018-0509-ms" ## [106] "2018-0509-msd" "2018-0721-ms" "2018-0721-msd" "2018-0718-dup" "2018-0719-ms" ## [111] "2018-0719-msd" "2018-1076-ms" "2018-1076-msd" "2018-1077-ms" "2018-1077-msd" ## [116] "2018-1079-dup" "2018-1074-ms" "2018-1074-msd" "2018-1078-ms" "2018-1078-msd" ## [121] "2017-2800-ms" "2017-2800-msd" "2017-2788-dup" "2018-0126-ms" "2018-0126-msd" ## [126] "2018-0125-dup" "2018-0576-ms" "2018-0576-msd" "2018-0575-dup" "2018-1143-dup" ## [131] "2018-1155-ms" "2018-1155-msd" "2018-1283-ms" "2018-1284-ms" "2018-1281-dup" ## [136] "2018-1284-msd" "2018-1283-msd" "2018-1833-msd" "2018-1833-ms" "2018-2092-dup" ## [141] "2019-0092-dup" "2019-0093-ms" "2019-0094-ms" "2019-0093-msd" "2019-0094-msd" ## [146] "2019-0211-dup" "2019-0213-msd" "2019-0213-ms" "2019-0212-ms" "2019-0212-msd" ## [151] "2019-0432-msd" "2019-0431-ms" "2019-0432-ms" "2019-0431-msd" "2019-0634-dup" ## [156] "2019-0635-ms" "2019-0635-msd" "2019-1088-dup" "2018-1274-msd" "2018-1271-dup" ## [161] "2018-1273-ms" "2018-1273-msd" "2018-1274-ms" "2018-1547-msd" "2018-1548-ms" ## [166] "2018-1545-dup" "2018-1548-msd" "2018-1547-ms" "2018-1819-ms" "2018-1818-ms" ## [171] "2018-1818-msd" "2018-1819-msd" "2018-1816-dup" "2018-1822-dup" "2018-2076-ms" ## [176] "2018-2076-msd" "2018-2074-ms" "2018-2074-msd" "2019-0417-msd" "2019-0416-msd" ## [181] "2019-0414-dup" "2019-0416-ms" "2019-0417-ms" "2019-0823-msd" "2019-0823-ms" ## [186] "2019-0822-ms" "2019-0822-msd" "2019-0819-dup" "2019-1076-msd" "2019-1076-ms" ## [191] "2019-1080-msd" "2019-1080-ms" "2018-1270-dup" "2019-0409-dup" "2019-0611-msd" ## [196] "2019-0611-ms" "2019-0616-msd" "2019-0616-ms" "2018-1258-msd" "2018-1258-ms" ## [201] "2018-1255-dup" "2018-1253-ms" "2018-1256-ms" "2018-1254-ms" "2018-1253-msd" ## [206] "2018-1256-msd" "2018-1254-msd" "2018-1260-ms" "2018-1259-msd" "2018-1260-msd" ## [211] "2018-1257-dup" "2018-1259-ms" "2018-1525-msd" "2018-1525-ms" "2018-1532-msd" ## [216] "2018-1532-ms" "2018-1799-msd" "2018-1798-ms" "2018-1798-msd" "2018-1800-dup" ## [221] "2018-1799-ms" "2018-2056-msd" "2018-2056-ms" "2018-2060-dup" "2018-2059-ms" ## [226] "2018-2059-msd" "2019-0059-ms" "2019-0059-msd" "2019-0060-ms" "2019-0060-msd" ## [231] "2019-0061-dup" "2019-0185-msd" "2019-0182-ms" "2019-0182-msd" "2019-0183-dup" ## [236] "2019-0185-ms" "2019-0403-dup" "2019-0401-ms" "2019-0401-msd" "2019-0402-ms" ## [241] "2019-0402-msd" "2019-0600-ms" "2019-0601-dup" "2019-0600-msd" "2019-0607-ms" ## [246] "2019-0607-msd" "2019-0803-ms" "2019-0804-ms" "2019-0801-dup" "2019-0804-msd" ## [251] "2019-0803-msd" "2019-1055-ms" "2019-1055-msd" "2019-1056-ms" "2019-1057-dup" ## [256] "2019-1056-msd" "2019-1267-dup" "2019-1269-ms" "2019-1269-msd" "2019-1270-ms" ## [261] "2019-1270-msd" "2019-1251-ms" "2019-1251-msd" "2019-1252-ms" "2019-1252-msd" ## [266] "2019-1253-dup" "2019-1448-dup" "2019-1449-ms" "2019-1449-msd" "2019-1450-ms" ## [271] "2019-1450-msd" "2019-1442-ms" "2019-1442-msd" "2019-1443-ms" "2019-1443-msd" ## [276] "2019-1444-dup" "2019-1779-dup" "2019-1782-ms" "2019-1782-msd" "2019-1783-ms" ## [281] "2019-1783-msd" "2019-1801-dup" "2019-1807-ms" "2019-1807-msd" "2019-1808-ms" ## [286] "2019-1808-msd" "2019-2018-ms" "2019-2018-msd" "2019-2019-ms" "2019-2019-msd" ## [291] "2019-2020-dup" "2019-2026-dup" "2019-2029-ms" "2019-2029-msd" "2019-2030-ms" ## [296] "2019-2030-msd"
unique(sub('[^A-z]*', '', DRMP_new$LabSampleID, perl=T)) #show unique alphabetic tags
## [1] "" "ms" "msd" "dup" "MS" "MSD"
DRMP_new <- DRMP_new %>% filter(!grepl('ms', LabSampleID, ignore.case=T)) #remove data marked as matrix spike 'ms', 'MS', 'msd', or 'MSD' unique(sub('[^A-z]*', '', DRMP_new$LabSampleID, perl=T)) #only dups are left
## [1] "" "dup"
nrow(DRMP_new)
## [1] 1006
#Add Field Rep note to LabSampleID & SampleTypeCode DRMP_new <- DRMP_new %>% mutate(tempLabSampleID = sub('-[A-z]*$', '', DRMP_new$LabSampleID, perl=T)) %>% # remove '-dup' and use temp column for grouping same IDs arrange(tempLabSampleID, AnalyteName) %>% # arrange so dup sample is always second in list group_by(tempLabSampleID, AnalyteName) %>% mutate(n = n(), #LabSampleID = ifelse(n>1, paste(tempLabSampleID, 'Field Rep', sep='-'), tempLabSampleID), # adds 'Field Rep' to both duplicates in LabSampleID Column LabSampleID = ifelse(n>1, paste(tempLabSampleID, paste0('Field Rep', row_number()), sep='-'), tempLabSampleID), # use this code to add replicate numb e.g., Field Rep1, Field Rep2 #SampleTypeCode = ifelse(n>1, paste(SampleTypeCode, 'Field Rep', sep=' ~ '), SampleTypeCode) SampleTypeCode = ifelse(n>1, paste(SampleTypeCode, paste0('Field Rep', row_number()), sep=' ~ '), SampleTypeCode)) %>% # adds 'Field Rep' to both duplicates in SampleTypeCode Column ungroup %>% select(-tempLabSampleID, -n) # remove temp col used for grouping & 'n' col used to identify duplicates nrow(DRMP_new)
## [1] 1006
#OPT #1 - AVERAGE SAMPLE & DUPLICATE temp1 <- DRMP_new %>% mutate(LabSampleID = ifelse(grepl('Field Rep[0-9]?', LabSampleID), sub('Field Rep[0-9]?', 'Field Rep Avg', LabSampleID), LabSampleID), SampleTypeCode = ifelse(grepl('Field Rep[0-9]?', SampleTypeCode), sub('Field Rep[0-9]?', 'Field Rep Avg', SampleTypeCode), SampleTypeCode)) %>% group_by(LabSampleID, AnalyteName) %>% mutate(Result = mean(Result)) %>% distinct(LabSampleID, AnalyteName, .keep_all=TRUE) nrow(temp1)
## [1] 948
#OPT #2 - REMOVE DUPLICATES temp2 <- DRMP_new %>% mutate(tempLabSampleID = sub('Rep[0-9]?', 'Rep', LabSampleID)) %>% # Remove digit so LabSampleIDs are the same for the same Analyte distinct(tempLabSampleID, AnalyteName, .keep_all=TRUE) %>% select(-tempLabSampleID) nrow(temp2)
## [1] 948
# Format Units Column - "ng/L", "mg/Kg" unique(DRMP_new$Unit) #Identifies OLDNAMES
## [1] "ng/L" "mg/Kg dw" "ug/Kg dw"
# If more than 1 unit colmn exists (e.g., for RL and MDL columns) see WQP script for example on merging into 1 column DRMP_new <- DRMP_new %>% standardizeUnits unique(DRMP_new$Unit) #New naming structure for Unit groupings
## [1] "ng/L" "mg/Kg dw"
# Format Date and Time Column # # NEED TO TALK ABOUT HOW WE WANT TO DO THIS - To graph in R we need Date and Time in same column # THE EXAMPLE CODE BELOW ASSUMES DATE AND TIME ARE IN SEPERATE COLUMNS DRMP_new <- DRMP_new %>% #rowise() %>% # rowise is very slow - so used sapply to make this a rowise operation mutate( #If SampleDate & CollectioTIme are not in Character format by defualt, turn it into a character class so it exports better SampleDate = ifelse(sapply(SampleDate, is.character), SampleDate, as.character(as.Date(SampleDate))), SampleTime = ifelse(sapply(SampleTime, is.character), SampleTime, format(lubridate::ymd_hms(SampleTime), "%H:%M:%S")), #COMBINE DATE AND TIME INTO SampleDateTime COLUMN SampleDateTime = ifelse(!is.na(SampleTime), paste(SampleDate, SampleTime), paste(SampleDate, '00:00:00')), #FORMAT SampleDateTime COLUMN TO DATE FORMAT SampleDateTime = lubridate::ymd_hms(SampleDateTime) ) ### REMOVE TEMPORARY COLUMNS ### DRMP_new <- DRMP_new %>% select(-one_of(temp_cols)) #Remove temp columns since they are no longer needed #View(DRMP_new) ## SAVE FORMATTED DATA AS EXCEL FILE ## writexl::write_xlsx(DRMP_new, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/DRMP_ceden_format.xlsx') # In excel, to convert SampleDate column to Date format # 1 - Select the date column. # 2 - Go to the Data-tab and choose "Text to Columns". # 3 - On the first screen, leave radio button on "delimited" and click Next. # 4 - Unselect any delimiter boxes (everything blank) and click Next. # 5 - Under column data format choose Date, select YMD # 6 - Click Finish.
The R session information (including the OS info, R version and all packages used):
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## Running under: Windows 10 x64 (build 22621) ## ## Matrix products: default ## ## locale: ## [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=C ## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C ## [5] LC_TIME=English_United States.utf8 ## system code page: 65001 ## ## attached base packages: ## [1] grid stats graphics grDevices utils datasets methods base ## ## other attached packages: ## [1] mgcv_1.8-41 nlme_3.1-160 lubridate_1.8.0 plotly_4.10.0 ## [5] readxl_1.4.1 actuar_3.3-0 NADA_1.6-1.1 forcats_0.5.2 ## [9] stringr_1.4.1 dplyr_1.0.9 purrr_0.3.4 readr_2.1.2 ## [13] tidyr_1.2.0 tibble_3.1.8 ggplot2_3.3.6 tidyverse_1.3.2 ## [17] fitdistrplus_1.1-8 survival_3.4-0 MASS_7.3-58.1 ## ## loaded via a namespace (and not attached): ## [1] httr_1.4.4 jsonlite_1.8.0 viridisLite_0.4.1 splines_4.2.2 ## [5] modelr_0.1.9 assertthat_0.2.1 highr_0.9 googlesheets4_1.0.1 ## [9] cellranger_1.1.0 yaml_2.3.5 pillar_1.8.1 backports_1.4.1 ## [13] lattice_0.20-45 glue_1.6.2 digest_0.6.29 rvest_1.0.3 ## [17] colorspace_2.0-3 htmltools_0.5.3 Matrix_1.5-1 pkgconfig_2.0.3 ## [21] broom_1.0.1 haven_2.5.1 webshot_0.5.3 scales_1.2.1 ## [25] tzdb_0.3.0 googledrive_2.0.0 generics_0.1.3 ellipsis_0.3.2 ## [29] withr_2.5.0 lazyeval_0.2.2 cli_3.3.0 magrittr_2.0.3 ## [33] crayon_1.5.1 evaluate_0.16 fs_1.5.2 fansi_1.0.3 ## [37] xml2_1.3.3 tools_4.2.2 data.table_1.14.2 hms_1.1.2 ## [41] expint_0.1-7 gargle_1.2.0 lifecycle_1.0.1 munsell_0.5.0 ## [45] reprex_2.0.2 writexl_1.4.0 compiler_4.2.2 rlang_1.0.5 ## [49] rstudioapi_0.14 htmlwidgets_1.5.4 crosstalk_1.2.0 rmarkdown_2.16 ## [53] gtable_0.3.1 DBI_1.1.3 R6_2.5.1 knitr_1.40 ## [57] fastmap_1.1.0 utf8_1.2.2 stringi_1.7.8 vctrs_0.4.1 ## [61] dbplyr_2.2.1 tidyselect_1.1.2 xfun_0.32
Sys.time()
## [1] "2023-12-29 14:45:32 PST"