forked from h2oai/h2o-2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
h2o_util.py
664 lines (586 loc) · 26.1 KB
/
h2o_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
import subprocess
import gzip, shutil, random, time, re
import os, zipfile, simplejson as json, csv
import sys, math
from h2o_test import verboseprint
# Return file size.
def get_file_size(f):
return os.path.getsize(f)
# Splits file into chunks of given size and returns an iterator over chunks.
def iter_chunked_file(file, chunk_size=2048):
return iter(lambda: file.read(chunk_size), '')
# operations to get bit patterns for fp
# Python internally uses the native endianity and 64-bits for floats
# Java floatToBits is the thing to convert fp to long bits
# if it's real, use this to convert. All reals should match
# long bits = Double.doubleToLongBits(myDouble);
# System.out.println(Long.toBinaryString(bits));
import struct
# Q is unsigned long long. 8 bytes
# d is double float
def doubleToUnsignedLongLong(d):
s = struct.pack('>d', d)
return struct.unpack('>Q', s)[0]
# floatToBits(173.3125)
# 1127043072
# hex(_)
# '0x432d5000'
# You can reverse the order of operations to round-trip:
def unsignedLongLongToDouble(Q):
s = struct.pack('>Q', Q)
return struct.unpack('>d', s)[0]
# bitsToFloat(0x432d5000)
# 173.3125
# takes fp or list of fp and returns same with just two digits of precision
# using print rounding
def twoDecimals(l):
if isinstance(l, list):
return ["%.2f" % v for v in l]
else:
return "%.2f" % l
# a short quick version for relative comparion. But it's probably better to use approxEqual below
# the subsequent ones might be prefered, especially assertAlmostEqual(
# http://en.wikipedia.org/wiki/Relative_difference
# http://stackoverflow.com/questions/4028889/floating-point-equality-in-python
# def fp_approxEqual(a, b, rel):
# c = abs(a-b) / max(abs(a), abs(b))
# print "actual relative diff: %s allowed relative diff: %s" % (c, rel)
# return c < rel
# Generic "approximately equal" function for any object type, with customisable error tolerance.
# When called with float arguments, approxEqual(x, y[, tol[, rel]) compares x and y numerically,
# and returns True if y is within either absolute error tol or relative error rel of x,
# otherwise return False.
# The function defaults to sensible default values for tol and rel.
# or any other pair of objects, approxEqual() looks for a method __approxEqual__ and, if found,
# calls it with arbitrary optional arguments.
# This allows types to define their own concept of "close enough".
def _float_approxEqual(x, y, tol=1e-18, rel=1e-7, **kwargs):
if tol is rel is None:
raise TypeError('cannot specify both absolute and relative errors are None')
tests = []
if tol is not None: tests.append(abs(tol))
if rel is not None: tests.append(abs(rel*x))
assert tests
return abs(abs(x) - abs(y)) <= max(tests)
# from http://code.activestate.com/recipes/577124-approximately-equal/
def approxEqual(x, y, *args, **kwargs):
"""approxEqual(float1, float2[, tol=1e-18, rel=1e-7]) -> True|False
approxEqual(obj1, obj2[, *args, **kwargs]) -> True|False
Return True if x and y are approximately equal, otherwise False.
If x and y are floats, return True if y is within either absolute error
tol or relative error rel of x. You can disable either the absolute or
relative check by passing None as tol or rel (but not both).
For any other objects, x and y are checked in that order for a method
__approxEqual__, and the result of that is returned as a bool. Any
optional arguments are passed to the __approxEqual__ method.
__approxEqual__ can return NotImplemented to signal that it doesn't know
how to perform that specific comparison, in which case the other object is
checked instead. If neither object have the method, or both defer by
returning NotImplemented, approxEqual falls back on the same numeric
comparison used for floats.
>>> almost_equal(1.2345678, 1.2345677)
True
>>> almost_equal(1.234, 1.235)
False
"""
if not (type(x) is type(y) is float):
# Skip checking for __approxEqual__ in the common case of two floats.
methodname = '__approxEqual__'
# Allow the objects to specify what they consider "approximately equal",
# giving precedence to x. If either object has the appropriate method, we
# pass on any optional arguments untouched.
for a,b in ((x, y), (y, x)):
try:
method = getattr(a, methodname)
except AttributeError:
continue
else:
result = method(b, *args, **kwargs)
if result is NotImplemented:
continue
return bool(result)
# If we get here without returning, then neither x nor y knows how to do an
# approximate equal comparison (or are both floats). Fall back to a numeric
# comparison.
return _float_approxEqual(x, y, *args, **kwargs)
# note this can take 'tol' and 'rel' parms for the float case
# just wraps approxEqual in an assert with a good print message
def assertApproxEqual(x, y, msg='', **kwargs):
if not approxEqual(x, y, msg=msg, **kwargs):
m = msg + '. h2o_util.assertApproxEqual failed comparing %s and %s. %s.' % (x, y, kwargs)
raise Exception(m)
def cleanseInfNan(value):
# change the strings returned in h2o json to the IEEE number values
translate = {
'NaN': float('NaN'),
'Infinity': float('Inf'),
'-Infinity': -float('Inf'),
}
if str(value) in translate:
value = translate[str(value)]
return value
# use a random or selected fp format from the choices
# for testing different fp representations
# 'only' can be e, f or g, to restrict the choices
# it will wrap the 0-47 until the group (modulo e)
def fp_format(val=None, sel=None, only=None):
def e0(val): return "%e" % val
def e1(val): return "%20e" % val
def e2(val): return "%-20e" % val
def e3(val): return "%020e" % val
def e4(val): return "%+e" % val
def e5(val): return "%+20e" % val
def e6(val): return "%+-20e" % val
def e7(val): return "%+020e" % val
def e8(val): return "%.4e" % val
def e9(val): return "%20.4e" % val
def e10(val): return "%-20.4e" % val
def e11(val): return "%020.4e" % val
def e12(val): return "%+.4e" % val
def e13(val): return "%+20.4e" % val
def e14(val): return "%+-20.4e" % val
def e15(val): return "%+020.4e" % val
def f0(val): return "%f" % val
def f1(val): return "%20f" % val
def f2(val): return "%-20f" % val
def f3(val): return "%020f" % val
def f4(val): return "%+f" % val
def f5(val): return "%+20f" % val
def f6(val): return "%+-20f" % val
def f7(val): return "%+020f" % val
def f8(val): return "%.4f" % val
def f9(val): return "%20.4f" % val
def f10(val): return "%-20.4f" % val
def f11(val): return "%020.4f" % val
def f12(val): return "%+.4f" % val
def f13(val): return "%+20.4f" % val
def f14(val): return "%+-20.4f" % val
def f15(val): return "%+020.4f" % val
def g0(val): return "%g" % val
def g1(val): return "%20g" % val
def g2(val): return "%-20g" % val
def g3(val): return "%020g" % val
def g4(val): return "%+g" % val
def g5(val): return "%+20g" % val
def g6(val): return "%+-20g" % val
def g7(val): return "%+020g" % val
def g8(val): return "%.4g" % val
def g9(val): return "%20.4g" % val
def g10(val): return "%-20.4g" % val
def g11(val): return "%020.4g" % val
def g12(val): return "%+.4g" % val
def g13(val): return "%+20.4g" % val
def g14(val): return "%+-20.4g" % val
def g15(val): return "%+020.4g" % val
# try a neat way to use a dictionary to case select functions
# didn't want to use python advanced string format with variable as format
# because they do left/right align outside of that??
caseList=[
e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
g0, g1, g2, g3, g4, g5, g6, g7, g8, g9, g10, g11, g12, g13, g14, g15,
]
if not val:
return len(caseList)
if sel:
if sel<0 or sel>=len(caseList):
raise Exception("sel out of range in write_syn_dataset:", sel)
choice = sel
else:
# pick one randomly if no sel
choice = random.randint(0,len(caseList)-1)
# print "Using fp format case", choice
SUBGRPS = 3
SUBGRP_SIZE = len(caseList) / SUBGRPS
# should be int
assert math.floor(SUBGRP_SIZE)==SUBGRP_SIZE, "You got a code problem in h2o_util.fp_format"
if only:
# make choice modulo 3 (can update if more subgroups are added
choice = choice % SUBGRPS
assert choice >= 0 and choice < SUBGRP_SIZE
# now add a base offset = subgrou size.
# (assume we keep in sync with the subgroup sizes above)
if only=='e':
choice += 0
elif only=='f':
choice += 16
elif only=='g':
choice += 32
else:
# if a random choice, we should never get here because it's bounded to length of the list above
raise Exception("Bad param combo of only: %s and sel: % in h2o_util.fp_format()" % (only, sel))
f = caseList[choice]
return f(val)
# http://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/
# given [2, 3, 5] it returns 0 (the index of the first element) with probability 0.2,
# 1 with probability 0.3 and 2 with probability 0.5.
# The weights need not sum up to anything in particular, and can actually be
# arbitrary Python floating point numbers.
# The weights need to cover the whole list? otherwise you don't get the rest of the choises
# random_data = [6,7,8]
# weights = [2,3,5]
# d = random_data[h2o_util.weighted_choice(weights)]
def weighted_choice(weights):
rnd = random.random() * sum(weights)
for i, w in enumerate(weights):
rnd -= w
if rnd < 0:
return i
# x = choice_with_probability( [('one',0.25), ('two',0.25), ('three',0.5)] )
# need to sum to 1 or less. check error case if you go negative
def choice_with_probability(tupleList):
n = random.uniform(0, 1)
for item, prob in tupleList:
if n < prob: break
n = n - prob
if n < 0:
raise Exception("h2o_util.choice_with_probability() error, prob's sum > 1")
return item
# pick a random param from a dictionary of lists of params
def pickRandParams(paramDict, params):
randomGroupSize = random.randint(1,len(paramDict))
for i in range(randomGroupSize):
randomKey = random.choice(paramDict.keys())
randomV = paramDict[randomKey]
randomValue = random.choice(randomV)
params[randomKey] = randomValue
# this reads a single col out a csv file into a list, without using numpy
# so we can port some jenkins tests without needing numpy
def file_read_csv_col(csvPathname, col=0, skipHeader=True, datatype='float', preview=5):
# only can skip one header line. numpy provides a number N. could update to that.
with open(csvPathname, 'rb') as f:
reader = csv.reader(f, quoting=csv.QUOTE_NONE) # no extra handling for quotes
print "csv read of", csvPathname, "column", col
# print "Preview of 1st %s lines:" % preview
rowNum = 0
dataList = []
lastRowLength = None
try:
for row in reader:
if skipHeader and rowNum==0:
print "Skipping header in this csv"
else:
NA = False
if col > len(row)-1:
print "col (zero indexed): %s points past the # entries in this row %s" % (col, row)
if lastRowLength and len(row)!=lastRowLength:
print "Current row length: %s is different than last row length: %s" % (row, lastRowLength)
if col > len(row)-1:
colData = None
else:
colData = row[col]
# only print first 5 for seeing
# don't print big col cases
if rowNum < preview and len(row) <= 10:
print colData
dataList.append(colData)
rowNum += 1
if rowNum%10==0:
# print rowNum
pass
lastRowLength = len(row)
except csv.Error, e:
sys.exit('file %s, line %d: %s' % (csvPathname, reader.line_num, e))
# now we have a list of strings
# change them to float if asked for, or int
# elimate empty strings
if datatype=='float':
D1 = [float(i) for i in dataList if i]
if datatype=='int':
D1 = [int(i) for i in dataList if i]
print "D1 done"
return D1
def file_line_count(fname):
return sum(1 for line in open(fname))
def file_size_formatted(fname):
size = os.path.getsize(fname)
print "size:", size
for x in ['bytes','KB','MB','GB','TB']:
if size < 1024.0:
return "%3.1f %s" % (size, x)
size /= 1024.0
return "%3.1f %s" % (size, 'TB')
# the logfiles are zipped with directory structure
# unzip it to the zipdir, throwing away the directory structure.
# (so we don't have to know the names of the intermediate directories)
def flat_unzip(my_zip, my_dir):
resultList = []
with zipfile.ZipFile(my_zip) as zip_file:
for member in zip_file.namelist():
filename = os.path.basename(member)
# skip directories
if not filename:
continue
# copy file (taken from zipfile's extract)
source = zip_file.open(member)
target = file(os.path.join(my_dir, filename), "wb")
with source, target:
shutil.copyfileobj(source, target)
# update to have resultList just be the pathname
resultList.append(os.path.abspath(target.name))
source.close()
target.close()
return resultList
# gunzip gzfile to outfile
def file_gunzip(gzfile, outfile):
print "\nGunzip-ing", gzfile, "to", outfile
start = time.time()
zipped_file = gzip.open(gzfile, 'rb')
out_file = open(outfile, 'wb')
out_file.writelines(zipped_file)
out_file.close()
zipped_file.close()
print "\nGunzip took", (time.time() - start), "secs"
# gzip infile to gzfile
def file_gzip(infile, gzfile):
print "\nGzip-ing", infile, "to", gzfile
start = time.time()
in_file = open(infile, 'rb')
zipped_file = gzip.open(gzfile, 'wb')
zipped_file.writelines(in_file)
in_file.close()
zipped_file.close()
print "\nGzip took", (time.time() - start), "secs"
# cat file1 and file2 to outfile
def file_cat(file1, file2, outfile):
print "\nCat'ing", file1, file2, "to", outfile
start = time.time()
destination = open(outfile,'wb')
shutil.copyfileobj(open(file1,'rb'), destination)
shutil.copyfileobj(open(file2,'rb'), destination)
destination.close()
print "\nCat took", (time.time() - start), "secs"
# used in loop, so doing always print
def file_append(infile, outfile):
verboseprint("\nAppend'ing", infile, "to", outfile)
start = time.time()
in_file = open(infile,'rb')
out_file = open(outfile,'a')
out_file.write(in_file.read())
in_file.close()
out_file.close()
verboseprint("\nAppend took", (time.time() - start), "secs")
def file_shuffle(infile, outfile):
print "\nShuffle'ing", infile, "to", outfile
start = time.time()
# lines = open(infile).readlines()
# random.shuffle(lines)
# open(outfile, 'w').writelines(lines)
fi = open(infile, 'r')
fo = open(outfile, 'w')
subprocess.call(["sort", "-R"],stdin=fi, stdout=fo)
print "\nShuffle took", (time.time() - start), "secs"
fi.close()
fo.close()
# FIX! This is a hack to deal with parser bug
def file_strip_trailing_spaces(csvPathname1, csvPathname2):
infile = open(csvPathname1, 'r')
outfile = open(csvPathname2,'w') # existing file gets erased
for line in infile.readlines():
# remove various lineends and whitespace (leading and trailing)
# make it unix linend
outfile.write(line.strip(" \n\r") + "\n")
infile.close()
outfile.close()
print "\n" + csvPathname1 + " stripped to " + csvPathname2
# can R deal with comments in a csv?
def file_strip_comments(csvPathname1, csvPathname2):
infile = open(csvPathname1, 'r')
outfile = open(csvPathname2,'w') # existing file gets erased
for line in infile.readlines():
if not line.startswith('#'): outfile.write(line)
infile.close()
outfile.close()
print "\n" + csvPathname1 + " w/o comments to " + csvPathname2
def file_spaces_to_comma(csvPathname1, csvPathname2):
infile = open(csvPathname1, 'r')
outfile = open(csvPathname2,'w') # existing file gets erased
for line in infile.readlines():
outfile.write(re.sub(r' +',r',',line))
infile.close()
outfile.close()
print "\n" + csvPathname1 + " with space(s)->comma to " + csvPathname2
# UPDATE: R seems to be doing some kind of expand_cat on cols with '.' in them for NA
# (the umass/princeton) data sets. Change to 0 for now so both H2O and R use them the
# same way
def file_clean_for_R(csvPathname1, csvPathname2):
infile = open(csvPathname1, 'r')
outfile = open(csvPathname2,'w') # existing file gets erased
for line in infile.readlines():
# 1) remove comments and header???
# ignore lines with NA? cheap hack ,, doesn't work for end cols
# if not ',,' in line and not line.startswith('#') and not re.match('[A-Za-z]+',line):
if not line.startswith('#') and not re.match('[A-Za-z]+',line):
# 2) remove various lineends and whitespace (leading and trailing)..make it unix linend
line = line.strip(" \n\r") + "\n"
# 3) change spaces to comma (don't worry about spaces in enums..don't have them for now)
line = re.sub(r' +',r',',line)
# 4) change "." fields to 0
line = re.sub(',\.,',',0,',line) # middle of line
line = re.sub('^\.,','0,',line) # beginning of line
line = re.sub(',\.$',',0',line) # end of line
outfile.write(line)
infile.close()
outfile.close()
print "\n" + csvPathname1 + " cleaned for R to " + csvPathname2
# this might be slightly pessimistic, but should be superset
def might_h2o_think_whitespace(token):
# we allow $ prefix and % suffix as decorators to numbers?
whitespaceRegex = re.compile(r"""
\s*$ # begin, white space or empty space, end
""", re.VERBOSE)
if whitespaceRegex.match(token):
return True
else:
return False
# this might be slightly pessimistic, but should be superset
def might_h2o_think_number_or_whitespace(token):
# this matches white space? makes all white space count as number?
specialRegex = re.compile(r"""
\s*
[\$+-]? # single chars that might be considered numbers. alow spaces in between
\s*$
""", re.VERBOSE)
# this matches white space? makes all white space count as number?
number1Regex = re.compile(r"""
[\s\$\%]* # begin, white space or empty space. any number of leading % or $ too
[+-]? # plus or minus. maybe h2o matches multiple?
([0-9]*\.[0-9]*)? # decimal point focused. optional whole and fractional digits. h2o thinks whole thing optional?
([eE][-+]*[0-9]+)? # optional exponent. A single e matches (incorrectly) apparently repeated +- after the e doesn't matter
(\s*\[\% ]*)? # can have zero or more percent. Percent can have a space?
[\s\$\%]*$ # white space or empty space, any number of trailing % or $ too. end
""", re.VERBOSE)
# apparently these get detected as number
# +e+++10
# +e---10
# this matches white space? makes all white space count as number?
number2Regex = re.compile(r"""
[\s\$\%]* # begin, white space or empty space. any number of leading % or $ too
[+-]? # plus or minus. maybe h2o matches multiple?
([0-9]+)? # one or more digits. h2o thinks whole thing optional
(\.[0-9]*)? # optional decimal point and fractional digits
([eE][-+]*[0-9]+)? # optional exponent. a single e matches (incorrectly) apparently repeated +- after the e doesn't matter
(\s*\[\% ]*)? # can have zero or more percent. Percent can have a space?
[\s\$\%]*$ # white space or empty space, any number of trailing % or $ too. end
""", re.VERBOSE)
# can nans have the +-%$ decorators?. allow any case?
nanRegex = re.compile(r"""
[\s\$\%]* # begin, white space or empty space. any number of leading % or $ too
[+-]? # plus or minus
[Nn][Aa][Nn]? # nan or na
(\s*\[\% ]*)? # can have zero or more percent. Percent can have a space?
[\s\$\%]*$ # white space or empty space, any number of trailing % or $ too. end
""", re.VERBOSE)
if specialRegex.match(token) or number1Regex.match(token) or number2Regex.match(token) or nanRegex.match(token):
return True
else:
return False
# from nmb10 at http://djangosnippets.org/snippets/2247/
# Shows difference between two json like python objects.
# Shows properties, values from first object that are not in the second.
# Examples:
# import simplejson # or other json serializer
# first = simplejson.loads('{"first_name": "Poligraph", "last_name": "Sharikov",}')
# second = simplejson.loads('{"first_name": "Poligraphovich", "pet_name": "Sharik"}')
# df = JsonDiff(first, second)
# df.difference is ["path: last_name"]
# JsonDiff(first, second, vice_versa=True) gives you difference from both objects in the one result.
# df.difference is ["path: last_name", "path: pet_name"]
# JsonDiff(first, second, with_values=True) gives you difference of the values strings.
class JsonDiff(object):
def __init__(self, first, second, with_values=False, vice_versa=False):
self.difference = []
self.check(first, second, with_values=with_values)
if vice_versa:
self.check(second, first, with_values=with_values)
def check(self, first, second, path='', with_values=False):
if second != None:
if not isinstance(first, type(second)):
message = '%s- %s, %s' % (path, type(first), type(second))
TYPE = None
self.save_diff(message, TYPE)
if isinstance(first, dict):
for key in first:
# the first part of path must not have trailing dot.
if len(path) == 0:
new_path = key
else:
new_path = "%s.%s" % (path, key)
if isinstance(second, dict):
if second.has_key(key):
sec = second[key]
else:
# there are key in the first, that is not presented in the second
PATH = None
self.save_diff(new_path, PATH)
# prevent further values checking.
sec = None
# recursive call
self.check(first[key], sec, path=new_path, with_values=with_values)
else:
# second is not dict. every key from first goes to the difference
PATH = None
self.save_diff(new_path, PATH)
self.check(first[key], second, path=new_path, with_values=with_values)
# if object is list, loop over it and check.
elif isinstance(first, list):
for (index, item) in enumerate(first):
new_path = "%s[%s]" % (path, index)
# try to get the same index from second
sec = None
if second != None:
try:
sec = second[index]
except (IndexError, KeyError):
# goes to difference
TYPE = None
self.save_diff('%s - %s, %s' % (new_path, type(first), type(second)), TYPE)
# recursive call
self.check(first[index], sec, path=new_path, with_values=with_values)
# not list, not dict. check for equality (only if with_values is True) and return.
else:
if with_values and second != None:
if first != second:
self.save_diff('%s - %s | %s' % (path, first, second), 'diff')
return
def save_diff(self, diff_message, type_):
message = '%s: %s' % (type_, diff_message)
if diff_message not in self.difference:
self.difference.append(message)
# per Alex Kotliarov
# http://stackoverflow.com/questions/2343535/easiest-way-to-serialize-a-simple-class-object-with-simplejson
#This function will produce JSON-formatted string for
# an instance of a custom class,
# a dictionary that have instances of custom classes as leaves,
# a list of instances of custom classes
# added depth limiting to original
def json_repr(obj, curr_depth=0, max_depth=4):
"""Represent instance of a class as JSON.
Arguments:
obj -- any object
Return:
String that represent JSON-encoded object.
"""
def serialize(obj, curr_depth):
"""Recursively walk object's hierarchy. Limit to max_depth"""
if curr_depth>max_depth:
return
if isinstance(obj, (bool, int, long, float, basestring)):
return obj
elif isinstance(obj, dict):
obj = obj.copy()
for key in obj:
obj[key] = serialize(obj[key], curr_depth+1)
return obj
elif isinstance(obj, list):
return [serialize(item, curr_depth+1) for item in obj]
elif isinstance(obj, tuple):
return tuple(serialize([item for item in obj], curr_depth+1))
elif hasattr(obj, '__dict__'):
return serialize(obj.__dict__, curr_depth+1)
else:
return repr(obj) # Don't know how to handle, convert to string
return (serialize(obj, curr_depth+1))
# b = convert_json(a, 'ascii')
# a = json.dumps(serialize(obj))
# c = json.loads(a)