1
+ export <- function (match_results ,
2
+ data ,
3
+ property ,
4
+ foldername ,
5
+ export_type ) {
6
+
7
+ if (export_type $ dwc_geo == " true" ) {
8
+ match_results %> %
9
+ export_to_dwc_geo(data ,
10
+ property ,
11
+ foldername ,
12
+ " geo" )
13
+ }
14
+
15
+ if (export_type $ ambiguous == " true" ) {
16
+ match_results %> %
17
+ ambiguous_results(omit = F ) %> %
18
+ export_to_dwc_geo(data ,
19
+ property ,
20
+ foldername ,
21
+ " ambiguous-geo" )
22
+ }
23
+
24
+ if (export_type $ fst == " true" ) {
25
+ match_results %> %
26
+ save_fst(foldername )
27
+ }
28
+
29
+ if (export_type $ dissco == " true" ) {
30
+ match_results %> %
31
+ export_dissco_annotation(data ,
32
+ property ,
33
+ foldername )
34
+ }
35
+ }
36
+
37
+ export_to_dwc_geo <- function (match_results ,
38
+ data ,
39
+ property ,
40
+ foldername ,
41
+ export_type ) {
42
+ match_results %<> %
43
+ left_join(data ,
44
+ by = c(" locid" = property ),
45
+ relationship = " many-to-many" ) %> %
46
+ mutate(locationID = paste0(" htts://www.geonames.org/" ,
47
+ geonameid ),
48
+ locationRemarks = paste0(" Score: " ,
49
+ score ,
50
+ " , # of matches: " ,
51
+ n ,
52
+ " , Geonames label: " ,
53
+ name ),
54
+ ) %> %
55
+ select(gbifID ,
56
+ occurrenceID ,
57
+ locationID ,
58
+ !! property ,
59
+ countryCode ,
60
+ locationRemarks )
61
+
62
+ filename = foldername %> %
63
+ generate_filename(export_type ,
64
+ " txt" )
65
+ write_tsv(match_results ,filename )
66
+ }
67
+
68
+ export_dissco_annotation <- function (match_results ,
69
+ data ,
70
+ property ,
71
+ foldername ) {
72
+ require(uuid )
73
+ require(jsonlite )
74
+ match_results %<> %
75
+ left_join(data ,
76
+ by = c(" locid" = property ),
77
+ relationship = " many-to-many" )
78
+ res = vector(" list" , dim(match_results )[1 ])
79
+ max = max(match_results $ score )
80
+
81
+ for (i in 1 : dim(match_results )[1 ]) {
82
+ guid = UUIDgenerate()
83
+ res [[i ]]$ data = list (id = guid ,
84
+ type = " Annotation" ,
85
+ attribution = list (id = guid ,
86
+ version = 1 ,
87
+ type = " Annotation" ,
88
+ motivation = " linking" ,
89
+ target = list (id = match_results $ gbifID [i ],
90
+ type = " digital_specimen" ,
91
+ indvProp = " dwc:locationID" ),
92
+ body = list (type = " dwc:locationID" ,
93
+ value = paste0(" https://www.geonames.org/" ,
94
+ match_results $ geonameid [i ]),
95
+ description = paste0(" geonames label: " ,
96
+ match_results $ name [i ]),
97
+ score = match_results $ score [i ]/ max )))
98
+ }
99
+
100
+ resp = toJSON(res ,
101
+ pretty = T ,
102
+ auto_unbox = T )
103
+
104
+ filename = foldername %> %
105
+ generate_filename(" dissco" ,
106
+ " json" )
107
+
108
+ write(resp ,filename )
109
+ }
110
+
111
+ generate_filename <- function (foldername ,
112
+ type ,
113
+ extension ) {
114
+ timestamp = Sys.time() %> %
115
+ as.character() %> %
116
+ gsub(" \\ ..*" ," " ,. ) %> %
117
+ gsub(" :" ," ." ,. ) %> %
118
+ gsub(" " ," _" ,. )
119
+
120
+ dir = type %> %
121
+ paste0(" data/output/" ,. )
122
+
123
+ foldername %<> %
124
+ gsub(" /occurrence.txt" ," " ,. ,fixed = T ) %> %
125
+ gsub(" .*/" ," " ,. ) %> %
126
+ paste0(dir ,
127
+ " /" ,
128
+ . ,
129
+ " _" ,
130
+ timestamp ,
131
+ " ." ,
132
+ extension )
133
+
134
+ if (! dir.exists(dir )) {
135
+ dir.create(dir )
136
+ }
137
+
138
+ return (foldername )
139
+ }
140
+
141
+ save_fst <- function (df ,
142
+ foldername ) {
143
+ require(fst )
144
+ filename = foldername %> %
145
+ generate_filename(" fst" ,
146
+ " fst" )
147
+ write_fst(df ,filename )
148
+ }
149
+
150
+ ambiguous_results <- function (match_results ,
151
+ omit ) {
152
+ ambiguous = match_results %> %
153
+ filter(n > 1 )
154
+ if (omit ) {
155
+ match_results %<> %
156
+ filter(! locid %in% ambiguous $ locid )
157
+ } else {
158
+ match_results %<> %
159
+ filter(locid %in% ambiguous $ locid )
160
+ }
161
+ return (match_results )
162
+ }
0 commit comments