1
+ /*
2
+ * mendeleyBibFix - correct formatting of bib-files that are automatically
3
+ * generated by Mendeley Desktop
4
+ *
5
+ * NOTE: Mendeley Desktop is copyright 2009-2013 by Mendeley Ltd.
6
+ * This software is not provided by Mendeley and the author has no affiliation
7
+ * with their company.
8
+ *
9
+ * Documentation:
10
+ * This is a simple function intended to correct bib-files that are
11
+ * automatically generated by Mendeley Desktop. I have found it to work
12
+ * for bib-files generated with the IEEE citation style, but it should
13
+ * work for other styles as well. It makes the following corrections:
14
+ * - changes double braces around titles to single braces
15
+ * - removes escaping of { and } (will only matter if you checked
16
+ * "Escape LaTeX special characters" in the "Bibtex" Options tab)
17
+ * - removes URL for any entry that is not specified as an exception
18
+ * (read the comment block after start of main function to read
19
+ * how to change the exceptions)
20
+ * - removes braces around months
21
+ *
22
+ * It should work correctly for files generated by Mendeley Desktop v1.16.1.
23
+ *
24
+ * A number of fixes are hard-coded, i.e., it expects to know where the braces are.
25
+ * So this code runs very fast (bib files with hundreds of entries are fixed in a
26
+ * small fraction of a second) but may not be "future-proof"
27
+ *
28
+ * You will need to compile this code to run it. A compiled version for windows is
29
+ * included on the release page of Github. If you are going to compile it yourself with gcc,
30
+ * then you will need the -std=c99 option
31
+ *
32
+ * Call syntax (windows):
33
+ * mendeleyBibFix.exe [OUTPUT_FILENAME] [INPUT_FILENAME]
34
+ *
35
+ * Both arguments are optional. If there is only one argument, then it is assumed to be
36
+ * the output filename. The default input filename is "library.bib", and the default
37
+ * output filename is "library_fixed.bib"
38
+ *
39
+ * Copyright 2016 Adam Noel. All rights reserved.
40
+ * Distributed under the New BSD license. See LICENSE.txt for license details.
41
+ *
42
+ * Created June 15, 2016
43
+ * Current version v1.0 (2016-06-15)
44
+ *
45
+ * Revision history:
46
+ *
47
+ * Revision v1.0
48
+ * - File created
49
+ *
50
+ *
51
+ */
52
+
53
+ #include <stdio.h>
54
+ #include <stdlib.h> // for exit(), malloc
55
+ #include <string.h> // for strcpy()
56
+ #include <stdbool.h> // for C++ bool naming, requires C99
57
+ #include <time.h> // For time record keeping
58
+
59
+ #define BIB_TYPE_MAX 25
60
+
61
+ // Function declarations
62
+ char * stringAllocate (long stringLength );
63
+ char * stringWrite (char * src );
64
+ unsigned long findEndOfLine (char * str , unsigned long startInd );
65
+
66
+ //
67
+ // MAIN
68
+ //
69
+ int main (int argc , char * argv [])
70
+ {
71
+ // MODIFY THIS BLOCK TO ADD/REMOVE BIB ENTRY TYPES THAT
72
+ // SHOULD HAVE A URL DISPLAYED. BY DEFAULT, ALL URLS
73
+ // ARE REMOVED FROM THE BIB-FILE.
74
+ // TO ADD AN EXCEPTION:
75
+ // 1) INCREMENT NUM_URL_EXCEPTIONS
76
+ // 2) APPEND THE NEW EXCEPTION TO THE LAST INDEX OF
77
+ // URL_EXCEPTION_TYPES (WRITE WITHOUT THE '@' PREFIX).
78
+ // TO REMOVE AN EXCEPTION:
79
+ // 1) DECREMENT NUM_URL_EXCEPTIONS
80
+ // 2) REMOVE EXCEPTION STRING WRITTEN TO URL_EXCEPTION_TYPES
81
+ // 3) CORRECT INDICES OF REMAINING EXCEPTIONS SO THAT THEY
82
+ // GO FROM 0 TO (NUM_URL_EXCEPTIONS-1)
83
+ // NOTE: MENDELEY EXPORTS A "WEB PAGE" ENTRY AS "misc"
84
+ const int NUM_URL_EXCEPTIONS = 2 ;
85
+ const char * URL_EXCEPTION_TYPES [NUM_URL_EXCEPTIONS ];
86
+ URL_EXCEPTION_TYPES [0 ] = "misc" ;
87
+ URL_EXCEPTION_TYPES [1 ] = "unpublished" ;
88
+ // END OF USER-MODIFIED BLOCK
89
+
90
+ int curException ;
91
+ bool bUrlException ;
92
+ char bibType [BIB_TYPE_MAX ];
93
+
94
+ char INPUT_DEFAULT [] = "library.bib" ;
95
+ char OUTPUT_DEFAULT [] = "library_fixed.bib" ;
96
+
97
+ char * inputName ;
98
+ char * outputName ;
99
+
100
+ FILE * inputFile ;
101
+ FILE * outputFile ;
102
+
103
+ unsigned long fileLength ;
104
+ unsigned long temp ; // Garbage variable for discarded file content length
105
+ char * inputContent ;
106
+ char * outputContent ;
107
+
108
+ unsigned long curInputInd , curInputAnchorInd ;
109
+ char curInputChar ;
110
+ unsigned long curEntryInd ;
111
+ char curEntryChar ;
112
+
113
+ unsigned long numEntry = 0 ;
114
+ char * curBibEntry ;
115
+ char * curBibFixed ;
116
+ unsigned long curBibInd , curBibLength , indEOL ;
117
+
118
+
119
+ // Timer variables
120
+ clock_t startTime , endTime ;
121
+
122
+ // Read in output filename if defined
123
+ if (argc > 2 )
124
+ {
125
+ inputName = stringWrite (argv [2 ]);
126
+ } else
127
+ {
128
+ inputName = stringWrite (INPUT_DEFAULT );
129
+ }
130
+
131
+ // Read in input filename if defined
132
+ if (argc > 1 )
133
+ {
134
+ outputName = stringWrite (argv [1 ]);
135
+ } else
136
+ {
137
+ outputName = stringWrite (OUTPUT_DEFAULT );
138
+ }
139
+
140
+ // Open input file
141
+ inputFile = fopen (inputName , "r" );
142
+ if (inputFile == NULL )
143
+ {
144
+ fprintf (stderr ,"ERROR: Input file \"%s\" not found.\n" ,inputName );
145
+ exit (EXIT_FAILURE );
146
+ }
147
+ printf ("Successfully opened input file at \"%s\".\n" , inputName );
148
+
149
+ // Read in contents of input file
150
+ fseek (inputFile , 0 , SEEK_END );
151
+ fileLength = ftell (inputFile );
152
+ fseek (inputFile ,0 ,SEEK_SET );
153
+ inputContent = malloc (fileLength + 1 );
154
+ outputContent = malloc (fileLength + 1 ); // Output will be no longer than input
155
+ if (inputContent == NULL
156
+ || outputContent == NULL )
157
+ {
158
+ fprintf (stderr ,"ERROR: Memory could not be allocated to store the input file contents.\n" );
159
+ exit (EXIT_FAILURE );
160
+ }
161
+ temp = fread (inputContent ,1 ,fileLength ,inputFile );
162
+ fclose (inputFile );
163
+ printf ("Successfully read and closed input file.\n" );
164
+
165
+ //
166
+ // Scan and fix bib entries
167
+ //
168
+ numEntry = 0 ;
169
+ startTime = clock ();
170
+ curInputInd = 0 ;
171
+ curInputAnchorInd = 0 ;
172
+ outputContent [0 ] = '\0' ; // Initialize output string as empty
173
+ while (true)
174
+ {
175
+ // Find start of next entry
176
+ while (inputContent [curInputInd ] != '@' )
177
+ {
178
+ if (inputContent [curInputInd ] == '\0' )
179
+ break ; // Reached EOF. No more entries to scan
180
+ else
181
+ curInputInd ++ ;
182
+ }
183
+
184
+ if (inputContent [curInputInd ] == '\0' )
185
+ break ;
186
+
187
+ curInputAnchorInd = curInputInd ++ ;
188
+
189
+ // Find end of entry
190
+ while (true)
191
+ {
192
+ if ((inputContent [curInputInd ] == '}'
193
+ && inputContent [curInputInd - 1 ] == '\n' )
194
+ || inputContent [curInputInd ] == '\0' )
195
+ break ; // Reached end of current entry (or EOF)
196
+ else
197
+ curInputInd ++ ;
198
+
199
+ }
200
+
201
+ if (inputContent [curInputInd ] == '\0' )
202
+ break ;
203
+
204
+ // Current entry goes from inputContent[curInputAnchorInd]
205
+ // to inputContent[curInputInd]+1
206
+ curBibLength = curInputInd - curInputAnchorInd + 2 ;
207
+ curBibEntry = malloc ((curBibLength + 1 )* sizeof (char ));
208
+ if (curBibEntry == NULL )
209
+ {
210
+ fprintf (stderr ,"ERROR: Memory could not be allocated to copy bib entry %u.\n" , numEntry );
211
+ exit (EXIT_FAILURE );
212
+ }
213
+ for (curBibInd = 0 ; curBibInd < curBibLength ; curBibInd ++ )
214
+ {
215
+ curBibEntry [curBibInd ] = inputContent [curInputAnchorInd + curBibInd ];
216
+ }
217
+ curBibEntry [curBibInd ] = '\0' ;
218
+
219
+ // curBibEntry is now a valid substring of the original input file
220
+ // Apply fixes as necessary
221
+ curBibInd = 1 ; // We know first character is '@'
222
+
223
+ // Check URL exception types
224
+ bUrlException = false;
225
+ while (curBibEntry [curBibInd ] != '{'
226
+ && curBibInd < BIB_TYPE_MAX )
227
+ {
228
+ bibType [curBibInd - 1 ] = curBibEntry [curBibInd ];
229
+ curBibInd ++ ;
230
+ }
231
+ bibType [curBibInd - 1 ] = '\0' ;
232
+
233
+ for (curException = 0 ; curException < NUM_URL_EXCEPTIONS ; curException ++ )
234
+ {
235
+ if (!strcmp (bibType ,URL_EXCEPTION_TYPES [curException ]))
236
+ {
237
+ bUrlException = true; // Current type of entry needs to keep URL
238
+ break ;
239
+ }
240
+ }
241
+
242
+ // Scan Remainder of entry
243
+ while (curBibEntry [curBibInd ] != '\0' )
244
+ {
245
+ if (curBibEntry [curBibInd ] == '\n' )
246
+ {
247
+ // We're at the start of a line in the current bib entry
248
+ // Scan ahead to see if its an entry that we need to fix
249
+ if (!strncmp (& curBibEntry [curBibInd + 1 ], "month =" ,7 ))
250
+ { // Next line lists month. Format should be mmm
251
+ // and not {mmm}
252
+ if (curBibEntry [curBibInd + 9 ] == '{'
253
+ && curBibEntry [curBibInd + 13 ] == '}' )
254
+ {
255
+ curBibEntry [curBibInd + 9 ] = curBibEntry [curBibInd + 10 ];
256
+ curBibEntry [curBibInd + 10 ] = curBibEntry [curBibInd + 11 ];
257
+ curBibEntry [curBibInd + 11 ] = curBibEntry [curBibInd + 12 ];
258
+ // Delete offsets 12 and 13
259
+ memmove (& curBibEntry [curBibInd + 12 ], & curBibEntry [curBibInd + 14 ],
260
+ curBibLength - curBibInd - 13 );
261
+ curBibLength -= 2 ;
262
+ }
263
+ } else if (!strncmp (& curBibEntry [curBibInd + 1 ], "title =" ,7 ))
264
+ { // Title is supposed to be surrounded by 1 set of braces and not 2
265
+ // Remove extra set of curly braces
266
+ indEOL = findEndOfLine (curBibEntry , curBibInd + 1 );
267
+ // Shift title over extra opening curly brace
268
+ memmove (& curBibEntry [curBibInd + 10 ], & curBibEntry [curBibInd + 11 ],
269
+ indEOL - curBibInd - 13 );
270
+ // Shift remaining text over extra closing curly brace
271
+ memmove (& curBibEntry [indEOL - 3 ], & curBibEntry [indEOL - 1 ],
272
+ curBibLength - indEOL + 2 );
273
+ curBibLength -= 2 ;
274
+ } else if (!bUrlException
275
+ && !strncmp (& curBibEntry [curBibInd + 1 ], "url =" ,5 ))
276
+ { // Entry has a URL but it should be removed. Erase the whole line
277
+ indEOL = findEndOfLine (curBibEntry , curBibInd + 1 );
278
+ memmove (& curBibEntry [curBibInd + 1 ], & curBibEntry [indEOL + 1 ],
279
+ curBibLength - indEOL + 1 );
280
+ curBibLength -= indEOL - curBibInd ;
281
+ }
282
+ } else if (!strncmp (& curBibEntry [curBibInd ], "{\\{}" ,4 ))
283
+ { // We have an incorrectly formatted opening curly brace
284
+ // Remove 3 characters of memory
285
+ memmove (& curBibEntry [curBibInd + 1 ], & curBibEntry [curBibInd + 4 ],
286
+ curBibLength - curBibInd - 2 );
287
+ curBibLength -= 3 ;
288
+ } else if (!strncmp (& curBibEntry [curBibInd ], "{\\}}" ,4 ))
289
+ { // We have an incorrectly formatted closing curly brace
290
+ // Remove 3 characters of memory
291
+ curBibEntry [curBibInd ] = '}' ;
292
+ memmove (& curBibEntry [curBibInd + 1 ], & curBibEntry [curBibInd + 4 ],
293
+ curBibLength - curBibInd - 2 );
294
+ curBibLength -= 3 ;
295
+ }
296
+
297
+ curBibInd ++ ;
298
+ }
299
+
300
+ // Write fixed entry to output string
301
+ strcat (outputContent , curBibEntry );
302
+
303
+ numEntry ++ ;
304
+ }
305
+
306
+ endTime = clock ();
307
+ printf ("Entry fixing took %f seconds\n" , (double ) (endTime - startTime )/CLOCKS_PER_SEC );
308
+
309
+ // Write output string to output file
310
+ if ((outputFile = fopen (outputName , "w" )) == NULL )
311
+ {
312
+ fprintf (stderr ,"ERROR: Cannot create output file \"%s\".\n" ,outputName );
313
+ exit (EXIT_FAILURE );
314
+ }
315
+ printf ("Successfully created output file at \"%s\".\n" , outputName );
316
+
317
+ fprintf (outputFile , "%s" , outputContent );
318
+ fclose (outputFile );
319
+ printf ("Successfully wrote and closed output file with %u entries.\n" , numEntry );
320
+
321
+ // Cleanup
322
+ free (inputContent );
323
+ free (inputName );
324
+ free (outputName );
325
+ free (curBibEntry );
326
+ free (curBibFixed );
327
+
328
+ return 0 ;
329
+ }
330
+
331
+ // Allocate memory for a string
332
+ char * stringAllocate (long stringLength )
333
+ {
334
+ char * string = malloc (stringLength + 1 );
335
+ if (string == NULL )
336
+ {
337
+ fprintf (stderr ,"ERROR: Memory could not be allocated for string copy.\n" );
338
+ exit (EXIT_FAILURE );
339
+ }
340
+
341
+ return string ;
342
+ }
343
+
344
+ // Copy string (with memory allocation)
345
+ char * stringWrite (char * src )
346
+ {
347
+ char * string = stringAllocate (strlen (src ));
348
+ strcpy (string , src );
349
+
350
+ return string ;
351
+ }
352
+
353
+ // Find next end of line in current string
354
+ unsigned long findEndOfLine (char * str , unsigned long startInd )
355
+ {
356
+ unsigned long curInd = startInd ;
357
+ while (str [curInd ] != '\n' )
358
+ curInd ++ ;
359
+
360
+ return curInd ;
361
+ }
0 commit comments