forked from ufrisk/MemProcFS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
charutil.h
445 lines (410 loc) · 14.3 KB
/
charutil.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
// charutil.h : definitions of various character/string utility functions.
//
// (c) Ulf Frisk, 2021-2024
// Author: Ulf Frisk, [email protected]
//
#ifndef __CHARUTIL_H__
#define __CHARUTIL_H__
#ifdef _WIN32
#include <Windows.h>
typedef unsigned __int64 QWORD, *PQWORD;
#else
#include "oscompatibility.h"
#endif /* _WIN32 */
#define CHARUTIL_FLAG_NONE 0x0000
#define CHARUTIL_FLAG_ALLOC 0x0001
#define CHARUTIL_FLAG_TRUNCATE 0x0002
#define CHARUTIL_FLAG_TRUNCATE_ONFAIL_NULLSTR 0x0006
#define CHARUTIL_FLAG_STR_BUFONLY 0x0008
/*
* Check whether a string is an ansi-string (only codepoints between 0-127).
* -- sz
* -- return
*/
BOOL CharUtil_IsAnsiA(_In_ LPCSTR sz);
BOOL CharUtil_IsAnsiW(_In_ LPCWSTR wsz);
BOOL CharUtil_IsAnsiFsA(_In_ LPCSTR sz);
/*
* Convert Ascii (0-255) or Wide (16-bit LE) string into a UTF-8 string.
* NB! wsz must NOT equal or overlap pbBuffer!
* CALLER LOCALFREE (if *pusz != pbBuffer): *pusz
* -- sz/wsz = the string to convert.
* -- cch = -1 for null-terminated string; or max number of chars (excl. null).
* -- pbBuffer = optional buffer to place the result in.
* -- cbBuffer
* -- pusz = if set to null: function calculate length only and return TRUE.
result utf-8 string, either as (*pusz == pbBuffer) or LocalAlloc'ed
* buffer that caller is responsible for free.
* -- pcbu = byte length (including terminating null) of utf-8 string.
* -- flags = CHARUTIL_FLAG_NONE, CHARUTIL_FLAG_STR_BUFONLY, CHARUTIL_FLAG_ALLOC, CHARUTIL_FLAG_TRUNCATE, etc.
* -- return
*/
_Success_(return)
BOOL CharUtil_UtoU(
_In_opt_ LPCSTR usz,
_In_ DWORD cch,
_Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer,
_In_ DWORD cbBuffer,
_Out_opt_ LPSTR *pusz,
_Out_opt_ PDWORD pcbu,
_In_ DWORD flags
);
_Success_(return)
BOOL CharUtil_AtoU(
_In_opt_ LPCSTR sz,
_In_ DWORD cch,
_Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer,
_In_ DWORD cbBuffer,
_Out_opt_ LPSTR *pusz,
_Out_opt_ PDWORD pcbu,
_In_ DWORD flags
);
_Success_(return)
BOOL CharUtil_WtoU(
_In_opt_ LPCWSTR wsz,
_In_ DWORD cch,
_Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer,
_In_ DWORD cbBuffer,
_Out_opt_ LPSTR *pusz,
_Out_opt_ PDWORD pcbu,
_In_ DWORD flags
);
/*
* Convert UTF-8 string into a Windows Wide-Char string.
* Function support usz == pbBuffer - usz will then become overwritten.
* CALLER LOCALFREE (if *pusz != pbBuffer): *pusz
* -- usz/wsz = the string to convert.
* -- cch = -1 for null-terminated string; or max number of chars (excl. null).
* -- pbBuffer = optional buffer to place the result in.
* -- cbBuffer
* -- pwsz = if set to null: function calculate length only and return TRUE.
result wide-string, either as (*pwsz == pbBuffer) or LocalAlloc'ed
* buffer that caller is responsible for free.
* -- pcbw = byte length (including terminating null) of wide-char string.
* -- flags = CHARUTIL_FLAG_NONE, CHARUTIL_FLAG_STR_BUFONLY, CHARUTIL_FLAG_ALLOC, CHARUTIL_FLAG_TRUNCATE, etc.
* -- return
*/
_Success_(return)
BOOL CharUtil_UtoW(
_In_opt_ LPCSTR usz,
_In_ DWORD cch,
_Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer,
_In_ DWORD cbBuffer,
_Out_opt_ LPWSTR *pwsz,
_Out_opt_ PDWORD pcbw,
_In_ DWORD flags
);
_Success_(return)
BOOL CharUtil_WtoW(
_In_opt_ LPCWSTR wsz,
_In_ DWORD cch,
_Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer,
_In_ DWORD cbBuffer,
_Out_opt_ LPWSTR *pwsz,
_Out_opt_ PDWORD pcbw,
_In_ DWORD flags
);
/*
* Convert UTF-8, Ascii (0-255) or Wide (16-bit LE) string into a JSON string.
* Function support sz/usz/wsz == pbBuffer - sz/usz/wsz will then become overwritten.
* CALLER LOCALFREE (if *pjsz != pbBuffer): *pjsz
* -- sz/usz/wsz = the string to convert.
* -- cch = -1 for null-terminated string; or max number of chars (excl. null).
* -- pbBuffer = optional buffer to place the result in.
* -- cbBuffer
* -- pjsz = if set to null: function calculate length only and return TRUE.
result utf-8 string, either as (*pjsz == pbBuffer) or LocalAlloc'ed
* buffer that caller is responsible for free.
* -- pcbj = byte length (including terminating null) of utf-8 string.
* -- flags = CHARUTIL_FLAG_NONE, CHARUTIL_FLAG_STR_BUFONLY, CHARUTIL_FLAG_ALLOC, CHARUTIL_FLAG_TRUNCATE, etc.
* -- return
*/
_Success_(return)
BOOL CharUtil_UtoJ(
_In_opt_ LPCSTR usz,
_In_ DWORD cch,
_Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer,
_In_ DWORD cbBuffer,
_Out_opt_ LPSTR *pjsz,
_Out_opt_ PDWORD pcbj,
_In_ DWORD flags
);
_Success_(return)
BOOL CharUtil_AtoJ(
_In_opt_ LPCSTR sz,
_In_ DWORD cch,
_Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer,
_In_ DWORD cbBuffer,
_Out_opt_ LPSTR *pjsz,
_Out_opt_ PDWORD pcbj,
_In_ DWORD flags
);
_Success_(return)
BOOL CharUtil_WtoJ(
_In_opt_ LPCWSTR wsz,
_In_ DWORD cch,
_Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer,
_In_ DWORD cbBuffer,
_Out_opt_ LPSTR *pjsz,
_Out_opt_ PDWORD pcbj,
_In_ DWORD flags
);
/*
* Convert UTF-8 string into a CSV compatible string.
* If source string contain either comma(,) space( ) doublequote(") it will be
* treated as a CSV string and be put into double quotes at start/end.
* Function support usz == pbBuffer - usz will then become overwritten.
* CALLER LOCALFREE (if *pvsz != pbBuffer): *pvsz
* -- usz = the string to convert.
* -- cch = -1 for null-terminated string; or max number of chars (excl. null).
* -- pbBuffer = optional buffer to place the result in.
* -- cbBuffer
* -- pvsz = if set to null: function calculate length only and return TRUE.
result utf-8 string, either as (*pvsz == pbBuffer) or LocalAlloc'ed
* buffer that caller is responsible for free.
* -- pcbv = byte length (including terminating null) of utf-8 string.
* -- flags = CHARUTIL_FLAG_NONE, CHARUTIL_FLAG_STR_BUFONLY, CHARUTIL_FLAG_ALLOC, CHARUTIL_FLAG_TRUNCATE, etc.
* -- return
*/
_Success_(return)
BOOL CharUtil_UtoCSV(
_In_opt_ LPCSTR usz,
_In_ DWORD cch,
_Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer,
_In_ DWORD cbBuffer,
_Out_opt_ LPSTR *pvsz,
_Out_opt_ PDWORD pcbv,
_In_ DWORD flags);
/*
* Hash a string quickly using the ROT13 algorithm either to a 64-bit or 32-bit number.
* -- sz/usz/wsz = the string to hash
* -- fUpper
* -- return
*/
DWORD CharUtil_Hash32U(_In_opt_ LPCSTR usz, _In_ BOOL fUpper);
DWORD CharUtil_Hash32A(_In_opt_ LPCSTR sz, _In_ BOOL fUpper);
DWORD CharUtil_Hash32W(_In_opt_ LPCWSTR wsz, _In_ BOOL fUpper);
QWORD CharUtil_Hash64U(_In_opt_ LPCSTR usz, _In_ BOOL fUpper);
QWORD CharUtil_Hash64A(_In_opt_ LPCSTR sz, _In_ BOOL fUpper);
QWORD CharUtil_Hash64W(_In_opt_ LPCWSTR wsz, _In_ BOOL fUpper);
/*
* Hash a name string in a way that is supported by the file system.
* NB! this is not the same hash as the Windows registry uses.
* -- usz/sz/wsz
* -- iSuffix
* -- return
*/
DWORD CharUtil_HashNameFsU(_In_ LPCSTR usz, _In_opt_ DWORD iSuffix);
DWORD CharUtil_HashNameFsA(_In_ LPCSTR sz, _In_opt_ DWORD iSuffix);
DWORD CharUtil_HashNameFsW(_In_ LPCWSTR wsz, _In_opt_ DWORD iSuffix);
/*
* Hash a path string in a way that is supported by the file system.
* NB! this is not the same hash as the Windows registry uses.
* -- usz/sz/wsz
* -- iSuffix
* -- return
*/
QWORD CharUtil_HashPathFsU(_In_ LPCSTR usz);
QWORD CharUtil_HashPathFsA(_In_ LPCSTR sz);
QWORD CharUtil_HashPathFsW(_In_ LPCWSTR wsz);
/*
* Convert a string into a file name compatible string by replacing illegal
* characters with '_'. Also optionally add a suffix between 1-9 and fix
* upper-case letters. If insufficient space the result will be truncated.
* -- uszDst
* -- cbuDst
* -- uszSrc
* -- iSuffix
* -- fUpper
* -- return = number of bytes written (including terminating NULL).
*/
_Success_(return != 0)
DWORD CharUtil_FixFsNameU(
_Out_writes_(cbuDst) LPSTR uszDst,
_In_ DWORD cbuDst,
_In_ LPCSTR uszSrc,
_In_opt_ DWORD iSuffix,
_In_ BOOL fUpper
);
/*
* Convert a string into a file name compatible string by replacing illegal
* characters with '_'. Also optionally add a suffix between 1-9 and fix
* upper-case letters. One of [usz, sz, wsz] must be valid.
* -- uszOut
* -- cbuDst
* -- usz
* -- sz
* -- wsz
* -- cwsz
* -- cch = number of bytes/wchars in usz/sz/wsz or _TRUNCATE
* -- iSuffix
* -- fUpper
* -- return = number of bytes written (including terminating NULL).
*/
_Success_(return != 0)
DWORD CharUtil_FixFsName(
_Out_writes_(cbuDst) LPSTR uszOut,
_In_ DWORD cbuDst,
_In_opt_ LPCSTR usz,
_In_opt_ LPCSTR sz,
_In_opt_ LPCWSTR wsz,
_In_ DWORD cch,
_In_opt_ DWORD iSuffix,
_In_ BOOL fUpper
);
/*
* Replace all characters in a string.
* -- sz
* -- chOld
* -- chNew
*/
VOID CharUtil_ReplaceAllA(_Inout_ LPSTR sz, _In_ CHAR chOld, _In_ CHAR chNew);
/*
* Split a string into two at the first character.
* The 1st string is returned in the pusz1 caller-allocated buffer. The
* remainder is returned as return data (is a sub-string of usz). If no
* 2nd string is found null-terminator character is returned (NB! not as NULL).
* -- usz = utf-8/ascii string to split.
* -- ch = character to split at.
* -- usz1 = buffer to receive result.
* -- cbu1 = byte length of usz1 buffer
* -- return = remainder of split string.
*/
LPSTR CharUtil_SplitFirst(_In_ LPSTR usz, _In_ CHAR ch, _Out_writes_(cbu1) LPSTR usz1, _In_ DWORD cbu1);
/*
* Split a string into a list of strings at the delimiter characters.
* The function allocates neccessary memory for the result array and its values.
* CALLER LocalFree: *ppuszArray
* -- usz = utf-8/ascii string to split.
* -- chDelimiter = character to split at.
* -- pcArray = pointer to receive number of strings in result array.
* -- ppuszArray = pointer to receive result array.
* -- return = remainder of split string.
*/
_Success_(return)
BOOL CharUtil_SplitList(_Inout_opt_ LPSTR usz, _In_ CHAR chDelimiter, _Out_ PDWORD pcArray, _Out_ LPSTR **ppuszArray);
/*
* Split a "path" string into two at the first slash/backslash character.
* The 1st string is returned in the pusz1 caller-allocated buffer. The
* remainder is returned as return data (is a sub-string of usz). If no
* 2nd string is found null-terminator character is returned (NB! not as NULL).
* -- usz = utf-8/ascii string to split.
* -- usz1 = buffer to receive result.
* -- cbu1 = byte length of usz1 buffer
* -- return = remainder of split string.
*/
LPCSTR CharUtil_PathSplitFirst(_In_ LPCSTR usz, _Out_writes_(cbu1) LPSTR usz1, _In_ DWORD cbu1);
/*
* Return the sub-string after the first (back)slash character in usz.
* If no (back)slash is found original string is returned. The returned data
* must not be free'd and is only valid as long as the usz parameter is valid.
* -- usz = utf-8 or ascii string.
* -- return
*/
LPCSTR CharUtil_PathSplitNext(_In_ LPCSTR usz);
/*
* Return the sub-string after the last (back)slash character in usz.
* If no (back)slash is found original string is returned. The returned data
* must not be free'd and is only valid as long as the usz parameter is valid.
* -- usz = utf-8 or ascii string.
* -- return
*/
LPCSTR CharUtil_PathSplitLast(_In_ LPCSTR usz);
/*
* Split the string usz into two at the last (back)slash which is removed.
* If no slash is found, the input string is not modified and NULL is returned.
* NB! The input string is modified in place.
* Ex: usz: XXX/YYY/ZZZ/AAA -> usz: XXX/YYY/ZZZ + return: AAA
* -- usz = utf-8 or ascii string to be split/modified.
* -- return = last part (i.e. file name) of usz.
*/
LPSTR CharUtil_PathSplitLastInPlace(_Inout_ LPSTR usz);
/*
* Split the string usz into two at the last (back)slash which is removed.
* Ex: usz: XXX/YYY/ZZZ/AAA -> uszPath: XXX/YYY/ZZZ + return: AAA
* -- usz = utf-8 or ascii string.
* -- uszPath = buffer to receive result.
* -- cbuPath = byte length of uszPath buffer
* -- return = last part (i.e. file name) of usz.
*/
LPSTR CharUtil_PathSplitLastEx(_In_ LPCSTR usz, _Out_writes_(cbuPath) LPSTR uszPath, _In_ DWORD cbuPath);
/*
* Common typedef for a CharUtil_Str* comparison function.
*/
typedef BOOL(*CHARUTIL_STRCMP_PFN)(_In_opt_ LPCSTR usz1, _In_opt_ LPCSTR usz2, _In_ BOOL fCaseInsensitive);
/*
* Compare multiple strings with a CharUtil_Str* compare function.
* If at least one comparison is TRUE return TRUE - otherwise FALSE.
* -- pfnStrCmp
* -- usz1
* -- fCaseInsensitive
* -- cStr
* --
* ...
* -- return
*/
BOOL CharUtil_StrCmpAny(_In_opt_ CHARUTIL_STRCMP_PFN pfnStrCmp, _In_opt_ LPCSTR usz1, _In_ BOOL fCaseInsensitive, _In_ DWORD cStr, ...);
/*
* Compare multiple strings with a CharUtil_Str* compare function.
* If at least one comparison is TRUE return TRUE - otherwise FALSE.
* -- pfnStrCmp
* -- usz1
* -- fCaseInsensitive
* -- cStr
* -- pStr
* -- return
*/
BOOL CharUtil_StrCmpAnyEx(_In_opt_ CHARUTIL_STRCMP_PFN pfnStrCmp, _In_opt_ LPCSTR usz1, _In_ BOOL fCaseInsensitive, _In_ DWORD cStr, _In_ LPCSTR *pStr);
/*
* Compare multiple strings with a CharUtil_Str* compare function.
* If all comparisons are TRUE return TRUE - otherwise FALSE.
* -- pfnStrCmp
* -- usz1
* -- fCaseInsensitive
* -- cStr
* --
* ...
* -- return
*/
BOOL CharUtil_StrCmpAll(_In_opt_ CHARUTIL_STRCMP_PFN pfnStrCmp, _In_opt_ LPCSTR usz1, _In_ BOOL fCaseInsensitive, _In_ DWORD cStr, ...);
/*
* Checks if a string ends with a certain substring.
* -- usz
* -- uszEndsWith
* -- fCaseInsensitive
* -- return
*/
BOOL CharUtil_StrEndsWith(_In_opt_ LPCSTR usz, _In_opt_ LPCSTR uszEndsWith, _In_ BOOL fCaseInsensitive);
/*
* Checks if a string starts with a certain substring.
* -- usz
* -- uszStartsWith
* -- fCaseInsensitive
* -- return
*/
BOOL CharUtil_StrStartsWith(_In_opt_ LPCSTR usz, _In_opt_ LPCSTR uszStartsWith, _In_ BOOL fCaseInsensitive);
/*
* Checks if a string equals another string.
* -- usz1
* -- usz2
* -- fCaseInsensitive
* -- return
*/
BOOL CharUtil_StrEquals(_In_opt_ LPCSTR usz, _In_opt_ LPCSTR usz2, _In_ BOOL fCaseInsensitive);
/*
* Compare a wide-char string to a utf-8 string.
* NB! only the first 2*MAX_PATH characters are compared.
* -- wsz1
* -- usz2
* -- return = 0 if equals, -1/1 otherwise.
*/
int CharUtil_CmpWU(_In_opt_ LPWSTR wsz1, _In_opt_ LPSTR usz2, _In_ BOOL fCaseInsensitive);
/*
* Compare two wide-char strings.
* NB! only the first 2*MAX_PATH characters are compared.
* -- wsz1
* -- wsz2
* -- return = 0 if equals, -1/1 otherwise.
*/
int CharUtil_CmpWW(_In_opt_ LPCWSTR wsz1, _In_opt_ LPCWSTR wsz2, _In_ BOOL fCaseInsensitive);
#endif /* __CHARUTIL_H__ */