forked from adamjgnoel/mendeleyBibFix
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmendeleyBibFix.c
475 lines (428 loc) · 15.8 KB
/
mendeleyBibFix.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
/*
* mendeleyBibFix - correct formatting of bib-files that are automatically
* generated by Mendeley Desktop
*
* NOTE: Mendeley Desktop is copyright 2008-2019 by Mendeley Ltd.
* This software is not provided by Mendeley and the author has no affiliation
* with their company.
*
* Documentation:
* This is a simple function intended to correct bib-files that are
* automatically generated by Mendeley Desktop. I have found it to work
* for bib-files generated with the IEEE citation style, but it should
* work for other styles as well. It makes the following corrections:
* - changes double braces around titles to single braces
* - removes escaping of { and } (will only matter if you checked
* "Escape LaTeX special characters" in the "Bibtex" Options tab)
* - removes URL for any entry that is not specified as an exception
* (read the comment block after start of main function to read
* how to change the exceptions)
* - removes braces around months
*
* It should work correctly for files generated by Mendeley Desktop v1.16.1.
* Still functioning as of v1.19.5.
*
* A number of fixes are hard-coded, i.e., it expects to know where the braces are.
* So this code runs very fast (bib files with hundreds of entries are fixed in a
* small fraction of a second) but may not be "future-proof"
*
* You will need to compile this code to run it. A compiled version for Windows is
* included on the release page of Github. If you are going to compile it yourself with gcc,
* then you will need the -std=c99 option
*
* Call syntax (Windows):
* mendeleyBibFix.exe [OUTPUT_FILENAME] [INPUT_FILENAME]
* Call syntax (Linux or macOS):
* ./mendeleyBibFix [OUTPUT_FILENAME] [INPUT_FILENAME]
*
* Both arguments are optional. If there is only one argument, then it is assumed to be
* the output filename. The default input filename is "library.bib", and the default
* output filename is "library_fixed.bib". If you're fine with the defaults, then
* you can also just double-click on the executable without needing a terminal open.
*
* Copyright 2016-2019 Adam Noel. All rights reserved.
* Distributed under the New BSD license. See LICENSE.txt for license details.
*
* Created June 15, 2016
* Current version v1.2.2 (2019-08-26)
*
* Revision history:
*
* Revision v1.2.2 (2019-08-26)
* - modified detection of fields to search for a newline character after the "},". This
* helps to prevent the partial removal of the "annote" field when the text within
* includes curly braces.
*
* Revision v1.2.1 (2017-04-26)
* - fixed removal of "file" field to properly deal with accented names in the file name
*
* Revision v1.2 (2017-03-17)
* - added removal of "file" field, which lists location of local soft copy
*
* Revision v1.1 (2016-10-26)
* - added removal of "annote" field, which includes personal annotations
*
* Revision v1.0.3 (2016-06-19)
* - corrected detection of bib entry after a URL that gets removed
* - added workaround to enable a custom date, "to appear", "in press", or any other custom
* data at end of entry. If an entry has an ISSN but no year, then the ISSN is renamed
* to the year.
*
* Revision v1.0.2 (2016-06-15)
* - removed unused variables
*
* Revision v1.0.1 (2016-06-15)
* - corrected end of bib entry detection to not catch annotations as false alarms
*
* Revision v1.0 (2016-06-15)
* - File created
*
*
*/
#include <stdio.h>
#include <stdlib.h> // for exit(), malloc
#include <string.h> // for strcpy()
#include <stdbool.h> // for C++ bool naming, requires C99
#include <time.h> // For time record keeping
#define BIB_TYPE_MAX 25
// Function declarations
char * stringAllocate(long stringLength);
char * stringWrite(char * src);
unsigned long findEndOfLine(char * str, unsigned long startInd);
unsigned long findEndOfField(char * str, unsigned long startInd);
char * substr(const char * text, int beg, int end);
//
// MAIN
//
int main(int argc, char *argv[])
{
// MODIFY THIS BLOCK TO ADD/REMOVE BIB ENTRY TYPES THAT
// SHOULD HAVE A URL DISPLAYED. BY DEFAULT, ALL URLS
// ARE REMOVED FROM THE BIB-FILE.
// TO ADD AN EXCEPTION:
// 1) INCREMENT NUM_URL_EXCEPTIONS
// 2) APPEND THE NEW EXCEPTION TO THE LAST INDEX OF
// URL_EXCEPTION_TYPES (WRITE WITHOUT THE '@' PREFIX).
// TO REMOVE AN EXCEPTION:
// 1) DECREMENT NUM_URL_EXCEPTIONS
// 2) REMOVE EXCEPTION STRING WRITTEN TO URL_EXCEPTION_TYPES
// 3) CORRECT INDICES OF REMAINING EXCEPTIONS SO THAT THEY
// GO FROM 0 TO (NUM_URL_EXCEPTIONS-1)
// NOTE: MENDELEY EXPORTS A "WEB PAGE" ENTRY AS "misc"
const int NUM_URL_EXCEPTIONS = 2;
const char *URL_EXCEPTION_TYPES[NUM_URL_EXCEPTIONS];
URL_EXCEPTION_TYPES[0] = "misc";
URL_EXCEPTION_TYPES[1] = "unpublished";
// END OF USER-MODIFIED URL EXCEPTION BLOCK
int curException;
bool bUrlException;
char bibType[BIB_TYPE_MAX];
char INPUT_DEFAULT[] = "library.bib";
char OUTPUT_DEFAULT[] = "library_fixed.bib";
char * inputName;
char * outputName;
FILE * inputFile;
FILE * outputFile;
unsigned long fileLength;
unsigned long temp; // Garbage variable for discarded file content length
char * inputContent;
char * outputContent;
unsigned long curInputInd, curInputAnchorInd;
// Bib-entry variables
unsigned long numEntry = 0;
char * curBibEntry;
unsigned long curBibInd, curBibLength, indEOL;
// Year-tracking variables
bool bHasYear; // Current entry defined the year
bool bHasISSN; // Current entry defined the year
unsigned long issnInd; // Index of the issn in the current entry.
// This entry is renamed to the year if year is not defined
bool bIsMyPuplication; // Current entry is my publication
// Timer variables
clock_t startTime, endTime;
// Read in output filename if defined
if(argc > 2)
{
inputName = stringWrite(argv[2]);
} else
{
inputName = stringWrite(INPUT_DEFAULT);
}
// Read in input filename if defined
if(argc > 1)
{
outputName = stringWrite(argv[1]);
} else
{
outputName = stringWrite(OUTPUT_DEFAULT);
}
// Open input file
inputFile = fopen(inputName, "r");
if(inputFile == NULL)
{
fprintf(stderr,"ERROR: Input file \"%s\" not found.\n",inputName);
exit(EXIT_FAILURE);
}
printf("Successfully opened input file at \"%s\".\n", inputName);
// Read in contents of input file
fseek(inputFile, 0, SEEK_END);
fileLength = ftell(inputFile);
fseek(inputFile,0,SEEK_SET);
inputContent = malloc(fileLength + 1);
outputContent = malloc(fileLength + 1); // Output will be no longer than input
if(inputContent == NULL
|| outputContent == NULL)
{
fprintf(stderr,"ERROR: Memory could not be allocated to store the input file contents.\n");
exit(EXIT_FAILURE);
}
temp = fread(inputContent,1,fileLength,inputFile);
fclose(inputFile);
printf("Successfully read and closed input file.\n");
//
// Scan and fix bib entries
//
numEntry = 0;
startTime = clock();
curInputInd = 0;
curInputAnchorInd = 0;
outputContent[0] = '\0'; // Initialize output string as empty
while(true)
{
// Find start of next entry
while(inputContent[curInputInd] != '@')
{
if(inputContent[curInputInd] == '\0')
break; // Reached EOF. No more entries to scan
else
curInputInd++;
}
if(inputContent[curInputInd] == '\0')
break;
curInputAnchorInd = curInputInd++;
// Find end of entry
while(true)
{
if((inputContent[curInputInd] == '}'
&& inputContent[curInputInd-1] == '\n'
&& (inputContent[curInputInd+1] == '\n'
|| inputContent[curInputInd+1] == '\0'))
|| inputContent[curInputInd] == '\0')
break; // Reached end of current entry (or EOF)
else
curInputInd++;
}
if(inputContent[curInputInd] == '\0')
break;
// Current entry goes from inputContent[curInputAnchorInd]
// to inputContent[curInputInd]+1
curBibLength = curInputInd-curInputAnchorInd+2;
curBibEntry = malloc((curBibLength + 1)*sizeof(char));
if(curBibEntry == NULL)
{
fprintf(stderr,"ERROR: Memory could not be allocated to copy bib entry %lu.\n", numEntry);
exit(EXIT_FAILURE);
}
for(curBibInd = 0; curBibInd < curBibLength; curBibInd++)
{
curBibEntry[curBibInd] = inputContent[curInputAnchorInd+curBibInd];
}
curBibEntry[curBibInd] = '\0';
// curBibEntry is now a valid substring of the original input file
// Apply fixes as necessary
curBibInd = 1; // We know first character is '@'
// Check URL exception types
bUrlException = false;
while(curBibEntry[curBibInd] != '{'
&& curBibInd < BIB_TYPE_MAX)
{
bibType[curBibInd-1] = curBibEntry[curBibInd];
curBibInd++;
}
bibType[curBibInd-1] = '\0';
for(curException = 0; curException < NUM_URL_EXCEPTIONS; curException++)
{
if(!strcmp(bibType,URL_EXCEPTION_TYPES[curException]))
{
bUrlException = true; // Current type of entry needs to keep URL
break;
}
}
bHasYear = false;
bHasISSN = false;
bIsMyPuplication = false;
// Scan Remainder of entry
while(curBibEntry[curBibInd] != '\0')
{
if(curBibEntry[curBibInd] == '\n')
{
// We're at the start of a line in the current bib entry
// Scan ahead to see if its an entry that we need to fix
if(!strncmp(&curBibEntry[curBibInd+1], "month =",7))
{ // Next line lists month. Format should be mmm
// and not {mmm}
if(curBibEntry[curBibInd+9] == '{'
&& curBibEntry[curBibInd+13] == '}')
{
curBibEntry[curBibInd+9] = curBibEntry[curBibInd+10];
curBibEntry[curBibInd+10] = curBibEntry[curBibInd+11];
curBibEntry[curBibInd+11] = curBibEntry[curBibInd+12];
// Delete offsets 12 and 13
memmove(&curBibEntry[curBibInd+12], &curBibEntry[curBibInd+14],
curBibLength - curBibInd-13);
curBibLength -= 2;
}
} else if(!strncmp(&curBibEntry[curBibInd+1], "title =",7))
{ // Title is supposed to be surrounded by 1 set of braces and not 2
// Remove extra set of curly braces
indEOL = findEndOfLine(curBibEntry, curBibInd+1);
// Shift title over extra opening curly brace
memmove(&curBibEntry[curBibInd+10], &curBibEntry[curBibInd+11],
indEOL - curBibInd-13);
// Shift remaining text over extra closing curly brace
memmove(&curBibEntry[indEOL-3], &curBibEntry[indEOL-1],
curBibLength - indEOL + 2);
curBibLength -= 2;
} else if(!strncmp(&curBibEntry[curBibInd+1], "annote =",8))
{ // Entry has an annotation. Erase the whole field
indEOL = findEndOfField(curBibEntry, curBibInd+1);
memmove(&curBibEntry[curBibInd+1], &curBibEntry[indEOL+1],
curBibLength - indEOL + 1);
curBibLength -= indEOL - curBibInd;
curBibInd--; // Correct index so that line after annote is read correctly
} else if(!strncmp(&curBibEntry[curBibInd+1], "file =",6))
{ // Entry has a filename. Erase the whole line
indEOL = findEndOfLine(curBibEntry, curBibInd+1);
memmove(&curBibEntry[curBibInd+1], &curBibEntry[indEOL+1],
curBibLength - indEOL + 1);
curBibLength -= indEOL - curBibInd;
curBibInd--; // Correct index so that line after filename is read correctly
}else if(!bUrlException
&& !strncmp(&curBibEntry[curBibInd+1], "url =",5))
{ // Entry has a URL but it should be removed. Erase the whole line
indEOL = findEndOfLine(curBibEntry, curBibInd+1);
memmove(&curBibEntry[curBibInd+1], &curBibEntry[indEOL+1],
curBibLength - indEOL + 1);
curBibLength -= indEOL - curBibInd;
curBibInd--; // Correct index so that line after URL is read correctly
} else if(!strncmp(&curBibEntry[curBibInd+1], "year =",6))
{ // This entry defines the year
bHasYear = true;
} else if(!strncmp(&curBibEntry[curBibInd+1], "issn =",6))
{ // Record line where issn starts in case we need to rename it to the year
bHasISSN = true;
issnInd = curBibInd + 1;
} else if(!strncmp(&curBibEntry[curBibInd+1], "author =",8))
{
indEOL = findEndOfLine(curBibEntry, curBibInd+1);
char * subs = substr(curBibEntry, curBibInd+10, indEOL);
char * pch;
pch = strstr (subs, "Potashov");
if(pch)
{
bIsMyPuplication = true;
// printf("%s\n",subs);
}
free((void *)subs);
}
} else if(!strncmp(&curBibEntry[curBibInd], "{\\{}",4))
{ // We have an incorrectly formatted opening curly brace
// Remove 3 characters of memory
memmove(&curBibEntry[curBibInd+1], &curBibEntry[curBibInd+4],
curBibLength - curBibInd-2);
curBibLength -= 3;
} else if(!strncmp(&curBibEntry[curBibInd], "{\\}}",4))
{ // We have an incorrectly formatted closing curly brace
// Remove 3 characters of memory
curBibEntry[curBibInd] = '}';
memmove(&curBibEntry[curBibInd+1], &curBibEntry[curBibInd+4],
curBibLength - curBibInd-2);
curBibLength -= 3;
}
curBibInd++;
}
if(!bHasYear && bHasISSN)
{ // This entry does not define the year. Rename the issn to the year
curBibEntry[issnInd] = 'y';
curBibEntry[issnInd+1] = 'e';
curBibEntry[issnInd+2] = 'a';
curBibEntry[issnInd+3] = 'r';
}
if(!bIsMyPuplication)
{
// Write fixed entry to output string
strcat(outputContent, curBibEntry);
}
numEntry++;
}
endTime = clock();
printf("Entry fixing took %f seconds\n", (double) (endTime-startTime)/CLOCKS_PER_SEC);
// Write output string to output file
if((outputFile = fopen(outputName, "w")) == NULL)
{
fprintf(stderr,"ERROR: Cannot create output file \"%s\".\n",outputName);
exit(EXIT_FAILURE);
}
printf("Successfully created output file at \"%s\".\n", outputName);
fprintf(outputFile, "%s", outputContent);
fclose(outputFile);
printf("Successfully wrote and closed output file with %lu entries.\n", numEntry);
// Cleanup
free(inputContent);
free(inputName);
free(outputName);
free(curBibEntry);
return 0;
}
// Allocate memory for a string
char * stringAllocate(long stringLength)
{
char * string = malloc(stringLength+1);
if(string == NULL)
{
fprintf(stderr,"ERROR: Memory could not be allocated for string copy.\n");
exit(EXIT_FAILURE);
}
return string;
}
// Copy string (with memory allocation)
char * stringWrite(char * src)
{
char * string = stringAllocate(strlen(src));
strcpy(string, src);
return string;
}
// Find next end of line in current string
unsigned long findEndOfLine(char * str, unsigned long startInd)
{
unsigned long curInd = startInd;
while(str[curInd] != '\n')
curInd++;
return curInd;
}
// Find end of field in current string
unsigned long findEndOfField(char * str, unsigned long startInd)
{
unsigned long curInd = startInd;
while(strncmp(&str[curInd+1], "},\n",3))
curInd++;
return curInd+3;
}
char * substr(const char * text, int beg, int end)
{
int i;
char *sub = 0;
int len = end - beg;
if(text)//Проверяем не пустой ли ввод
if(text + beg)//Проверяем существование в тексте позиции beg
if(0 < len)//Проверяем корректность параметров конец должен быть больше начала
if((sub = (char *)malloc(1 + len)))//Если end превосходит последнюю
//позицию текста ничего страшного выделим чуть больше памяти чем надо
{
//Примитивное компирование, даже текстовых библиотек не надо будет
for(i = beg; text[i] != '\0' && i < end; i++)
sub[i - beg] = text[i];
sub[i - beg] = '\0';//Ноль терминатор вконце строки
}
return sub;
}