@@ -107,12 +107,13 @@ locs.split$check2 = paste0(locs.split$text,
107
107
108
108
# deduplicate the substrings per country,
109
109
# to remove redundancy from the matching process
110
- locs.split %<> % filter(! duplicated(check ))
110
+ locs.split2 = locs.split %> %
111
+ filter(! duplicated(check ))
111
112
112
113
# #
113
114
# ##list all country codes of the locality list
114
115
# #
115
- countries = count(locs.split ,
116
+ countries = count(locs.split2 ,
116
117
COUNTRY_CODE )
117
118
countries %<> % arrange(desc(n ))
118
119
@@ -122,15 +123,15 @@ countries %<>% arrange(desc(n))
122
123
123
124
# slow!
124
125
# per country, match a-z substrings of locality to geonames labels
125
- locs.split $ geoid = NA
126
- locs.split $ lat = NA
127
- locs.split $ long = NA
128
- locs.split $ geoname = NA
129
- locs.split $ geoaltname = NA
130
- locs.split $ cn = NA
131
- out = locs.split [1 ,]
126
+ locs.split2 $ geoid = NA
127
+ locs.split2 $ lat = NA
128
+ locs.split2 $ long = NA
129
+ locs.split2 $ geoname = NA
130
+ locs.split2 $ geoaltname = NA
131
+ locs.split2 $ cn = NA
132
+ out = locs.split2 [1 ,]
132
133
for (i in 1 : dim(countries )[1 ]) {
133
- apm = filter(locs.split ,
134
+ apm = filter(locs.split2 ,
134
135
COUNTRY_CODE == countries $ COUNTRY_CODE [i ])
135
136
geo = filter(data ,
136
137
`country code` == countries $ COUNTRY_CODE [i ])
@@ -249,9 +250,9 @@ exp3 = left_join(exp3,
249
250
lat ,
250
251
long ),
251
252
by = c(" LOCALITY" = " locid" ))
252
- write_tsv(exp3 ,
253
- " enriched specimen data.txt" ,
254
- na = " " )
253
+ # write_tsv(exp3,
254
+ # "enriched specimen data.txt",
255
+ # na="")
255
256
256
257
# #
257
258
# ##overlap with BGBM geonames ids
0 commit comments