Skip to content

Commit 5bd0524

Browse files
authored
Initial commit
1 parent 2cce55d commit 5bd0524

File tree

1 file changed

+361
-0
lines changed

1 file changed

+361
-0
lines changed

mendeleyBibFix.c

+361
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,361 @@
1+
/*
2+
* mendeleyBibFix - correct formatting of bib-files that are automatically
3+
* generated by Mendeley Desktop
4+
*
5+
* NOTE: Mendeley Desktop is copyright 2009-2013 by Mendeley Ltd.
6+
* This software is not provided by Mendeley and the author has no affiliation
7+
* with their company.
8+
*
9+
* Documentation:
10+
* This is a simple function intended to correct bib-files that are
11+
* automatically generated by Mendeley Desktop. I have found it to work
12+
* for bib-files generated with the IEEE citation style, but it should
13+
* work for other styles as well. It makes the following corrections:
14+
* - changes double braces around titles to single braces
15+
* - removes escaping of { and } (will only matter if you checked
16+
* "Escape LaTeX special characters" in the "Bibtex" Options tab)
17+
* - removes URL for any entry that is not specified as an exception
18+
* (read the comment block after start of main function to read
19+
* how to change the exceptions)
20+
* - removes braces around months
21+
*
22+
* It should work correctly for files generated by Mendeley Desktop v1.16.1.
23+
*
24+
* A number of fixes are hard-coded, i.e., it expects to know where the braces are.
25+
* So this code runs very fast (bib files with hundreds of entries are fixed in a
26+
* small fraction of a second) but may not be "future-proof"
27+
*
28+
* You will need to compile this code to run it. A compiled version for windows is
29+
* included on the release page of Github. If you are going to compile it yourself with gcc,
30+
* then you will need the -std=c99 option
31+
*
32+
* Call syntax (windows):
33+
* mendeleyBibFix.exe [OUTPUT_FILENAME] [INPUT_FILENAME]
34+
*
35+
* Both arguments are optional. If there is only one argument, then it is assumed to be
36+
* the output filename. The default input filename is "library.bib", and the default
37+
* output filename is "library_fixed.bib"
38+
*
39+
* Copyright 2016 Adam Noel. All rights reserved.
40+
* Distributed under the New BSD license. See LICENSE.txt for license details.
41+
*
42+
* Created June 15, 2016
43+
* Current version v1.0 (2016-06-15)
44+
*
45+
* Revision history:
46+
*
47+
* Revision v1.0
48+
* - File created
49+
*
50+
*
51+
*/
52+
53+
#include <stdio.h>
54+
#include <stdlib.h> // for exit(), malloc
55+
#include <string.h> // for strcpy()
56+
#include <stdbool.h> // for C++ bool naming, requires C99
57+
#include <time.h> // For time record keeping
58+
59+
#define BIB_TYPE_MAX 25
60+
61+
// Function declarations
62+
char * stringAllocate(long stringLength);
63+
char * stringWrite(char * src);
64+
unsigned long findEndOfLine(char * str, unsigned long startInd);
65+
66+
//
67+
// MAIN
68+
//
69+
int main(int argc, char *argv[])
70+
{
71+
// MODIFY THIS BLOCK TO ADD/REMOVE BIB ENTRY TYPES THAT
72+
// SHOULD HAVE A URL DISPLAYED. BY DEFAULT, ALL URLS
73+
// ARE REMOVED FROM THE BIB-FILE.
74+
// TO ADD AN EXCEPTION:
75+
// 1) INCREMENT NUM_URL_EXCEPTIONS
76+
// 2) APPEND THE NEW EXCEPTION TO THE LAST INDEX OF
77+
// URL_EXCEPTION_TYPES (WRITE WITHOUT THE '@' PREFIX).
78+
// TO REMOVE AN EXCEPTION:
79+
// 1) DECREMENT NUM_URL_EXCEPTIONS
80+
// 2) REMOVE EXCEPTION STRING WRITTEN TO URL_EXCEPTION_TYPES
81+
// 3) CORRECT INDICES OF REMAINING EXCEPTIONS SO THAT THEY
82+
// GO FROM 0 TO (NUM_URL_EXCEPTIONS-1)
83+
// NOTE: MENDELEY EXPORTS A "WEB PAGE" ENTRY AS "misc"
84+
const int NUM_URL_EXCEPTIONS = 2;
85+
const char *URL_EXCEPTION_TYPES[NUM_URL_EXCEPTIONS];
86+
URL_EXCEPTION_TYPES[0] = "misc";
87+
URL_EXCEPTION_TYPES[1] = "unpublished";
88+
// END OF USER-MODIFIED BLOCK
89+
90+
int curException;
91+
bool bUrlException;
92+
char bibType[BIB_TYPE_MAX];
93+
94+
char INPUT_DEFAULT[] = "library.bib";
95+
char OUTPUT_DEFAULT[] = "library_fixed.bib";
96+
97+
char * inputName;
98+
char * outputName;
99+
100+
FILE * inputFile;
101+
FILE * outputFile;
102+
103+
unsigned long fileLength;
104+
unsigned long temp; // Garbage variable for discarded file content length
105+
char * inputContent;
106+
char * outputContent;
107+
108+
unsigned long curInputInd, curInputAnchorInd;
109+
char curInputChar;
110+
unsigned long curEntryInd;
111+
char curEntryChar;
112+
113+
unsigned long numEntry = 0;
114+
char * curBibEntry;
115+
char * curBibFixed;
116+
unsigned long curBibInd, curBibLength, indEOL;
117+
118+
119+
// Timer variables
120+
clock_t startTime, endTime;
121+
122+
// Read in output filename if defined
123+
if(argc > 2)
124+
{
125+
inputName = stringWrite(argv[2]);
126+
} else
127+
{
128+
inputName = stringWrite(INPUT_DEFAULT);
129+
}
130+
131+
// Read in input filename if defined
132+
if(argc > 1)
133+
{
134+
outputName = stringWrite(argv[1]);
135+
} else
136+
{
137+
outputName = stringWrite(OUTPUT_DEFAULT);
138+
}
139+
140+
// Open input file
141+
inputFile = fopen(inputName, "r");
142+
if(inputFile == NULL)
143+
{
144+
fprintf(stderr,"ERROR: Input file \"%s\" not found.\n",inputName);
145+
exit(EXIT_FAILURE);
146+
}
147+
printf("Successfully opened input file at \"%s\".\n", inputName);
148+
149+
// Read in contents of input file
150+
fseek(inputFile, 0, SEEK_END);
151+
fileLength = ftell(inputFile);
152+
fseek(inputFile,0,SEEK_SET);
153+
inputContent = malloc(fileLength + 1);
154+
outputContent = malloc(fileLength + 1); // Output will be no longer than input
155+
if(inputContent == NULL
156+
|| outputContent == NULL)
157+
{
158+
fprintf(stderr,"ERROR: Memory could not be allocated to store the input file contents.\n");
159+
exit(EXIT_FAILURE);
160+
}
161+
temp = fread(inputContent,1,fileLength,inputFile);
162+
fclose(inputFile);
163+
printf("Successfully read and closed input file.\n");
164+
165+
//
166+
// Scan and fix bib entries
167+
//
168+
numEntry = 0;
169+
startTime = clock();
170+
curInputInd = 0;
171+
curInputAnchorInd = 0;
172+
outputContent[0] = '\0'; // Initialize output string as empty
173+
while(true)
174+
{
175+
// Find start of next entry
176+
while(inputContent[curInputInd] != '@')
177+
{
178+
if(inputContent[curInputInd] == '\0')
179+
break; // Reached EOF. No more entries to scan
180+
else
181+
curInputInd++;
182+
}
183+
184+
if(inputContent[curInputInd] == '\0')
185+
break;
186+
187+
curInputAnchorInd = curInputInd++;
188+
189+
// Find end of entry
190+
while(true)
191+
{
192+
if((inputContent[curInputInd] == '}'
193+
&& inputContent[curInputInd-1] == '\n')
194+
|| inputContent[curInputInd] == '\0')
195+
break; // Reached end of current entry (or EOF)
196+
else
197+
curInputInd++;
198+
199+
}
200+
201+
if(inputContent[curInputInd] == '\0')
202+
break;
203+
204+
// Current entry goes from inputContent[curInputAnchorInd]
205+
// to inputContent[curInputInd]+1
206+
curBibLength = curInputInd-curInputAnchorInd+2;
207+
curBibEntry = malloc((curBibLength + 1)*sizeof(char));
208+
if(curBibEntry == NULL)
209+
{
210+
fprintf(stderr,"ERROR: Memory could not be allocated to copy bib entry %u.\n", numEntry);
211+
exit(EXIT_FAILURE);
212+
}
213+
for(curBibInd = 0; curBibInd < curBibLength; curBibInd++)
214+
{
215+
curBibEntry[curBibInd] = inputContent[curInputAnchorInd+curBibInd];
216+
}
217+
curBibEntry[curBibInd] = '\0';
218+
219+
// curBibEntry is now a valid substring of the original input file
220+
// Apply fixes as necessary
221+
curBibInd = 1; // We know first character is '@'
222+
223+
// Check URL exception types
224+
bUrlException = false;
225+
while(curBibEntry[curBibInd] != '{'
226+
&& curBibInd < BIB_TYPE_MAX)
227+
{
228+
bibType[curBibInd-1] = curBibEntry[curBibInd];
229+
curBibInd++;
230+
}
231+
bibType[curBibInd-1] = '\0';
232+
233+
for(curException = 0; curException < NUM_URL_EXCEPTIONS; curException++)
234+
{
235+
if(!strcmp(bibType,URL_EXCEPTION_TYPES[curException]))
236+
{
237+
bUrlException = true; // Current type of entry needs to keep URL
238+
break;
239+
}
240+
}
241+
242+
// Scan Remainder of entry
243+
while(curBibEntry[curBibInd] != '\0')
244+
{
245+
if(curBibEntry[curBibInd] == '\n')
246+
{
247+
// We're at the start of a line in the current bib entry
248+
// Scan ahead to see if its an entry that we need to fix
249+
if(!strncmp(&curBibEntry[curBibInd+1], "month =",7))
250+
{ // Next line lists month. Format should be mmm
251+
// and not {mmm}
252+
if(curBibEntry[curBibInd+9] == '{'
253+
&& curBibEntry[curBibInd+13] == '}')
254+
{
255+
curBibEntry[curBibInd+9] = curBibEntry[curBibInd+10];
256+
curBibEntry[curBibInd+10] = curBibEntry[curBibInd+11];
257+
curBibEntry[curBibInd+11] = curBibEntry[curBibInd+12];
258+
// Delete offsets 12 and 13
259+
memmove(&curBibEntry[curBibInd+12], &curBibEntry[curBibInd+14],
260+
curBibLength - curBibInd-13);
261+
curBibLength -= 2;
262+
}
263+
} else if(!strncmp(&curBibEntry[curBibInd+1], "title =",7))
264+
{ // Title is supposed to be surrounded by 1 set of braces and not 2
265+
// Remove extra set of curly braces
266+
indEOL = findEndOfLine(curBibEntry, curBibInd+1);
267+
// Shift title over extra opening curly brace
268+
memmove(&curBibEntry[curBibInd+10], &curBibEntry[curBibInd+11],
269+
indEOL - curBibInd-13);
270+
// Shift remaining text over extra closing curly brace
271+
memmove(&curBibEntry[indEOL-3], &curBibEntry[indEOL-1],
272+
curBibLength - indEOL + 2);
273+
curBibLength -= 2;
274+
} else if(!bUrlException
275+
&& !strncmp(&curBibEntry[curBibInd+1], "url =",5))
276+
{ // Entry has a URL but it should be removed. Erase the whole line
277+
indEOL = findEndOfLine(curBibEntry, curBibInd+1);
278+
memmove(&curBibEntry[curBibInd+1], &curBibEntry[indEOL+1],
279+
curBibLength - indEOL + 1);
280+
curBibLength -= indEOL - curBibInd;
281+
}
282+
} else if(!strncmp(&curBibEntry[curBibInd], "{\\{}",4))
283+
{ // We have an incorrectly formatted opening curly brace
284+
// Remove 3 characters of memory
285+
memmove(&curBibEntry[curBibInd+1], &curBibEntry[curBibInd+4],
286+
curBibLength - curBibInd-2);
287+
curBibLength -= 3;
288+
} else if(!strncmp(&curBibEntry[curBibInd], "{\\}}",4))
289+
{ // We have an incorrectly formatted closing curly brace
290+
// Remove 3 characters of memory
291+
curBibEntry[curBibInd] = '}';
292+
memmove(&curBibEntry[curBibInd+1], &curBibEntry[curBibInd+4],
293+
curBibLength - curBibInd-2);
294+
curBibLength -= 3;
295+
}
296+
297+
curBibInd++;
298+
}
299+
300+
// Write fixed entry to output string
301+
strcat(outputContent, curBibEntry);
302+
303+
numEntry++;
304+
}
305+
306+
endTime = clock();
307+
printf("Entry fixing took %f seconds\n", (double) (endTime-startTime)/CLOCKS_PER_SEC);
308+
309+
// Write output string to output file
310+
if((outputFile = fopen(outputName, "w")) == NULL)
311+
{
312+
fprintf(stderr,"ERROR: Cannot create output file \"%s\".\n",outputName);
313+
exit(EXIT_FAILURE);
314+
}
315+
printf("Successfully created output file at \"%s\".\n", outputName);
316+
317+
fprintf(outputFile, "%s", outputContent);
318+
fclose(outputFile);
319+
printf("Successfully wrote and closed output file with %u entries.\n", numEntry);
320+
321+
// Cleanup
322+
free(inputContent);
323+
free(inputName);
324+
free(outputName);
325+
free(curBibEntry);
326+
free(curBibFixed);
327+
328+
return 0;
329+
}
330+
331+
// Allocate memory for a string
332+
char * stringAllocate(long stringLength)
333+
{
334+
char * string = malloc(stringLength+1);
335+
if(string == NULL)
336+
{
337+
fprintf(stderr,"ERROR: Memory could not be allocated for string copy.\n");
338+
exit(EXIT_FAILURE);
339+
}
340+
341+
return string;
342+
}
343+
344+
// Copy string (with memory allocation)
345+
char * stringWrite(char * src)
346+
{
347+
char * string = stringAllocate(strlen(src));
348+
strcpy(string, src);
349+
350+
return string;
351+
}
352+
353+
// Find next end of line in current string
354+
unsigned long findEndOfLine(char * str, unsigned long startInd)
355+
{
356+
unsigned long curInd = startInd;
357+
while(str[curInd] != '\n')
358+
curInd++;
359+
360+
return curInd;
361+
}

0 commit comments

Comments
 (0)