This report is automatically generated with the R
package knitr
(version 1.40
)
.
## LOAD PACKAGES USED IN SCRIPTS ## source("R Functions/functions_QA data.R") ## LOAD MERGED AQ DATA ## AqMaster <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/1a_AqMaster_merged.xlsx', sheet=1, guess_max = 30000) %>% mutate(WBT = if_else(StationName == 'Robinson Pond', 'River/Stream', WBT)) #after looking at Pond/Lake Sites Lauren requested that Robinson Pond WBT be ccorrected to 'River/Stream' nrow(AqMaster)
## [1] 74496
## PREP DATA FOR GIS SCOPING ## unique(AqMaster$CoordSystem)
## [1] NA "NR" "NAD83" "WGS84" "NAD27" "nr" "OTHER" "UNKWN"
AqMaster <- AqMaster %>% # Fix CoordSystem nomenclature mutate(CoordSystem = recode(CoordSystem, "NAD 83" = 'NAD83', "NAD 1927" = 'NAD27', "NR" = 'NAD83', "nr" = 'NAD83', "OTHER" = 'NAD83', "UNKWN" = 'NAD83', .missing = 'NAD83'), TargetLatitude = ifelse(grepl('[a-zA-Z]', TargetLatitude), NA_character_, TargetLatitude), TargetLatitude = gsub("[[:space:]]","",TargetLatitude), #need to remove spaces in text so NAs aren't introduced by coercion TargetLongitude = gsub(intToUtf8(160), "", TargetLongitude), #removes non-breaking space (found in Delta Mendota Canal " -121.578550") TargetLatitude = as.numeric(TargetLatitude), TargetLongitude = ifelse(grepl('[a-zA-Z]',TargetLongitude), NA_character_, TargetLongitude), TargetLongitude = gsub("[[:space:]]","",TargetLongitude), #need to removes whitespace in text so NAs aren't introduced by coercion TargetLongitude = gsub(intToUtf8(160), "", TargetLongitude), #removes non-breaking space (found in Delta Mendota Canal " -121.578550") TargetLongitude = as.numeric(TargetLongitude), TargetLongitude = ifelse(TargetLongitude > 0, -TargetLongitude, TargetLongitude)) #fix longitudes that were incorrectly entered as a positive value unique(AqMaster$CoordSystem)
## [1] "NAD83" "WGS84" "NAD27"
missingLatLongPairs <- AqMaster %>% filter( (!is.na(TargetLatitude) & is.na(TargetLongitude)) | (is.na(TargetLatitude) & !is.na(TargetLongitude)) ) nrow(missingLatLongPairs) #should be 0
## [1] 0
# Identify Pond & Lake sites to keep in the linkage - Ideally this should have been done in the individual scripts but easiest to do here now PondLakeSites <- AqMaster %>% distinct(StationName, .keep_all=T) %>% filter( grepl('pond|lake', StationName, ignore.case=T) | WBT %in% c('Pond','Lake/Reservoir') ) %>% select(SourceID, StationName, WBT, TargetLatitude, TargetLongitude, CitationCode) %>% arrange(SourceID, StationName) # writexl::write_xlsx(PondLakeSites, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/2b.1_PondLakeSites_pulledfromAqMaster.xlsx') ## Then exclude StationNames identified in '2b.2_PondLakeSites_SitesToKeep.xlsx' as not part of the linkage PondLakeSites <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/2b.2_PondLakeSites_SitesToKeep_ManuallyFilled.xlsx', sheet=1, guess_max = 30000) excludedSites <- PondLakeSites %>% filter(grepl('no', Keep, ignore.case=T)) %>% pull(StationName) all(excludedSites %in% AqMaster$StationName) #should be TRUE - if all are TRUE then this statement result will be TRUE
## [1] TRUE
AqMaster <- AqMaster %>% filter(StationName %are not% excludedSites) all(!excludedSites %in% AqMaster$StationName) #should be TRUE - if all are FALSE then this statement result will be TRUE
## [1] TRUE
# Look at blank Lat Longs and see if any can be filled in hasLatLong <- AqMaster %>% filter( !is.na(TargetLatitude) ) %>% distinct(StationName, .keep_all=T) %>% select(StationName, TargetLatitude, TargetLongitude, CitationCode) nrow(hasLatLong)
## [1] 4350
noLatLong <- AqMaster %>% filter( is.na(TargetLatitude) ) %>% distinct(StationName, .keep_all=T) %>% select(SourceID, StationName, TargetLatitude, TargetLongitude, CitationCode) #%>% nrow(noLatLong)
## [1] 345
mergeNoandHas <- rbind(select(noLatLong, -SourceID), hasLatLong) availLatLong <- mergeNoandHas[duplicated(mergeNoandHas$StationName) | duplicated(mergeNoandHas$StationName, fromLast=TRUE),] nrow(availLatLong)
## [1] 34
# View(availLatLong %>% arrange(StationName)) #Make sure it's appropriate for the site with blank Lat Longs to get coords from corresponding site with Lat Longs #Fill in blank TargetLat and TargetLong using temp Lat Long columns - then delete temp Lat Long columns hasLatLong <- hasLatLong %>% rename(tempLat = TargetLatitude, tempLong = TargetLongitude) %>% select(-CitationCode) AqMaster <- AqMaster %>% left_join(., hasLatLong, by='StationName')%>% #add Lat Long values so that all sites with the same name get the same coords mutate(TargetLatitude = ifelse(is.na(TargetLatitude), tempLat, TargetLatitude), #fill in blank TargetLat TargetLongitude = ifelse(is.na(TargetLongitude), tempLong, TargetLongitude)) %>% #fill in blank TargetLong select(-tempLat, -tempLong) #check to see which sites still miss Lat Long hasLatLong <- AqMaster %>% filter( !is.na(TargetLatitude) ) %>% distinct(StationName, .keep_all=T) %>% select(StationName, TargetLatitude, TargetLongitude, CitationCode) nrow(hasLatLong)
## [1] 4350
noLatLong <- AqMaster %>% filter( is.na(TargetLatitude) ) %>% distinct(StationName, .keep_all=T) %>% select(SourceID, StationName, TargetLatitude, TargetLongitude, CitationCode) %>% arrange(SourceID, StationName) nrow(noLatLong)
## [1] 328
mergeNoandHas <- rbind(select(noLatLong, -SourceID),hasLatLong) availLatLong <- mergeNoandHas[duplicated(mergeNoandHas$StationName) | duplicated(mergeNoandHas$StationName, fromLast=TRUE),] nrow(availLatLong) # should be 0 now
## [1] 0
#Since noLatLong is >0 and availLatLong is zero, we need to save noLatLong as Excel file and add Lat Long by hand - then merge Lat Longs with AqMaster # writexl::write_xlsx(noLatLong %>% arrange(SourceID, StationName), path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/2c.1_Aq_NeedLatLong.xlsx') #Load woorkbook with Lat Longs that were found by hand SitesWithLatLongByHand <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/2c.2_Aq_LatLong_ManuallyFilled.xlsx') %>% mutate(TargetLatitude = if_else(!is.na(`Coords from Google`), as.numeric(gsub(",.*$", "", `Coords from Google`)), TargetLatitude), #subsitutes comma & everything after comma with "", then moves everything before comma to TargetLatitude TargetLongitude = if_else(!is.na(`Coords from Google`), as.numeric(gsub("^.*,\\s", "", `Coords from Google`)), TargetLongitude)) %>% #subsitute space, comma, & everything before comma with "", then moves everything after space to TargetLongitude filter(!is.na(TargetLatitude)) %>% #filter out missing Lat/Longs select(-SourceID, -CitationCode, -`Coords from Google`, -Notes) %>% #remove these columns so Lat Long can be merged with AqMaster rename( tempLat = TargetLatitude, #rename Lat/Long column for left_join, these columns will be removed after join tempLong = TargetLongitude) #Join Lat Longs (found by hand) to AqMaster & Save AqMaster_GISprepped <- AqMaster %>% left_join(., SitesWithLatLongByHand, by='StationName') %>% #add hand entered Lat Longs mutate(TargetLatitude = ifelse(is.na(TargetLatitude), tempLat, TargetLatitude), #fill in blank TargetLat TargetLongitude = ifelse(is.na(TargetLongitude), tempLong, TargetLongitude)) %>% #fill in blank TargetLong select(-tempLat, -tempLong) %>% # remove tempLat and tempLong columns filter(!is.na(TargetLatitude)) # remove sites that still have a blank Lat/Long writexl::write_xlsx(AqMaster_GISprepped, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/2a_AqMaster_GISprepped.xlsx') #Pull Sampling Location Info for TMDL region & Delta Subarea Demarcation in GIS ScopeDaSites <- AqMaster_GISprepped %>% mutate(uniqName = paste(StationName, round(TargetLatitude,4), round(TargetLongitude,4))) %>% # needs to be the same code used in 3rd Script when creating variable "AqMaster" in mutate(uniqName = ...) distinct(uniqName, .keep_all=T) %>% select(StationName, TargetLatitude, TargetLongitude, CoordSystem, uniqName) %>% arrange(StationName) nrow(ScopeDaSites)
## [1] 5795
writexl::write_xlsx(ScopeDaSites, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/2a_AqSitesToDemarcInGIS.xlsx') # write.csv(ScopeDaSites, file='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/2a_AqSitesToDemarcInGIS.csv', row.names=FALSE)
The R session information (including the OS info, R version and all packages used):
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## Running under: Windows 10 x64 (build 22621) ## ## Matrix products: default ## ## locale: ## [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=C ## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C ## [5] LC_TIME=English_United States.utf8 ## system code page: 65001 ## ## attached base packages: ## [1] grid stats graphics grDevices utils datasets methods base ## ## other attached packages: ## [1] lubridate_1.8.0 plotly_4.10.0 readxl_1.4.1 actuar_3.3-0 ## [5] NADA_1.6-1.1 forcats_0.5.2 stringr_1.4.1 dplyr_1.0.9 ## [9] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0 tibble_3.1.8 ## [13] ggplot2_3.3.6 tidyverse_1.3.2 fitdistrplus_1.1-8 survival_3.4-0 ## [17] MASS_7.3-58.1 ## ## loaded via a namespace (and not attached): ## [1] lattice_0.20-45 assertthat_0.2.1 digest_0.6.29 utf8_1.2.2 ## [5] R6_2.5.1 cellranger_1.1.0 backports_1.4.1 reprex_2.0.2 ## [9] evaluate_0.16 highr_0.9 httr_1.4.4 pillar_1.8.1 ## [13] rlang_1.0.5 lazyeval_0.2.2 googlesheets4_1.0.1 data.table_1.14.2 ## [17] rstudioapi_0.14 Matrix_1.5-1 rmarkdown_2.16 splines_4.2.2 ## [21] googledrive_2.0.0 htmlwidgets_1.5.4 munsell_0.5.0 broom_1.0.1 ## [25] compiler_4.2.2 modelr_0.1.9 xfun_0.32 pkgconfig_2.0.3 ## [29] htmltools_0.5.3 tidyselect_1.1.2 viridisLite_0.4.1 fansi_1.0.3 ## [33] crayon_1.5.1 tzdb_0.3.0 dbplyr_2.2.1 withr_2.5.0 ## [37] jsonlite_1.8.0 gtable_0.3.1 lifecycle_1.0.1 DBI_1.1.3 ## [41] magrittr_2.0.3 scales_1.2.1 writexl_1.4.0 cli_3.3.0 ## [45] stringi_1.7.8 fs_1.5.2 xml2_1.3.3 ellipsis_0.3.2 ## [49] generics_0.1.3 vctrs_0.4.1 expint_0.1-7 tools_4.2.2 ## [53] glue_1.6.2 hms_1.1.2 fastmap_1.1.0 yaml_2.3.5 ## [57] colorspace_2.0-3 gargle_1.2.0 rvest_1.0.3 knitr_1.40 ## [61] haven_2.5.1
Sys.time()
## [1] "2023-12-29 11:50:46 PST"