[doctests] Make doctests more robust

Use ellipsis so that Python 2 and Python 3 outputs will pass. Check that floating point numbers are within 1e-12 of the expected answer rather than exact matches.
sawcordwell · Dec 10, 2014 · 3d20ccb · 3d20ccb
1 parent 0970cc4
commit 3d20ccb
Show file tree

Hide file tree

Showing 5 changed files with 42 additions and 56 deletions.
diff --git a/runtests.sh b/runtests.sh
@@ -2,4 +2,4 @@
 
 # Is there any difference to using ``python setup.py nosetests``?
 nosetests --with-coverage --cover-package=mdptoolbox --with-doctest \
-    --doctest-options='+NORMALIZE_WHITESPACE,+IGNORE_EXCEPTION_DETAIL'
+    --doctest-options='+ELLIPSIS,+NORMALIZE_WHITESPACE,+IGNORE_EXCEPTION_DETAIL'
diff --git a/src/mdptoolbox/example.py b/src/mdptoolbox/example.py
@@ -140,10 +140,10 @@ def forest(S=3, r1=4, r2=2, p=0.1, is_sparse=False):
     >>> len(Psp)
     2
     >>> Psp[0]
-    <3x3 sparse matrix of type '<type 'numpy.float64'>'
+    <3x3 sparse matrix of type '<... 'numpy.float64'>'
         with 6 stored elements in Compressed Sparse Row format>
     >>> Psp[1]
-    <3x3 sparse matrix of type '<type 'numpy.int64'>'
+    <3x3 sparse matrix of type '<... 'numpy.int64'>'
         with 3 stored elements in Compressed Sparse Row format>
     >>> Rsp
     array([[ 0.,  0.],
@@ -252,10 +252,10 @@ def rand(S, A, is_sparse=False, mask=None):
     >>> len(Psp), len(Rsp)
     (5, 5)
     >>> Psp[0]
-    <100x100 sparse matrix of type '<type 'numpy.float64'>'
+    <100x100 sparse matrix of type '<... 'numpy.float64'>'
         with 3296 stored elements in Compressed Sparse Row format>
     >>> Rsp[0]
-    <100x100 sparse matrix of type '<type 'numpy.float64'>'
+    <100x100 sparse matrix of type '<... 'numpy.float64'>'
         with 3296 stored elements in Compressed Sparse Row format>
     >>> # The number of non-zero elements (nnz) in P and R are equal
     >>> Psp[1].nnz == Rsp[1].nnz

diff --git a/src/mdptoolbox/mdp.py b/src/mdptoolbox/mdp.py
@@ -546,8 +546,9 @@ class for details. Default is 1000.
     >>> P, R = mdptoolbox.example.forest()
     >>> pi = mdptoolbox.mdp.PolicyIteration(P, R, 0.9)
     >>> pi.run()
-    >>> pi.V
-    (26.244000000000014, 29.484000000000016, 33.484000000000016)
+    >>> expected = (26.244000000000014, 29.484000000000016, 33.484000000000016)
+    >>> all(expected[k] - pi.V[k] < 1e-12 for k in range(len(expected)))
+    True
     >>> pi.policy
     (0, 0, 0)
     """
@@ -820,8 +821,9 @@ class for details. Default is 10.
     >>> pim.run()
     >>> pim.policy
     (0, 0, 0)
-    >>> pim.V
-    (21.81408652334702, 25.054086523347017, 29.054086523347017)
+    >>> expected = (21.81408652334702, 25.054086523347017, 29.054086523347017)
+    >>> all(expected[k] - pim.V[k] < 1e-12 for k in range(len(expected)))
+    True
 
     """
 
@@ -942,8 +944,9 @@ class for details.
     array([[ 11.198909  ,  10.34652034],
            [ 10.74229967,  11.74105792],
            [  2.86980001,  12.25973286]])
-    >>> ql.V
-    (11.198908998901134, 11.741057920409865, 12.259732864170232)
+    >>> expected = (11.198908998901134, 11.741057920409865, 12.259732864170232)
+    >>> all(expected[k] - ql.V[k] < 1e-12 for k in range(len(expected)))
+    True
     >>> ql.policy
     (0, 1, 1)
 
@@ -957,8 +960,9 @@ class for details.
     >>> ql.Q
     array([[ 33.33010866,  40.82109565],
            [ 34.37431041,  29.67236845]])
-    >>> ql.V
-    (40.82109564847122, 34.37431040682546)
+    >>> expected = (40.82109564847122, 34.37431040682546)
+    >>> all(expected[k] - ql.V[k] < 1e-12 for k in range(len(expected)))
+    True
     >>> ql.policy
     (1, 0)
 
@@ -1110,8 +1114,9 @@ class for details. Default: 1000.
     >>> R = np.array([[5, 10], [-1, 2]])
     >>> rvi = mdptoolbox.mdp.RelativeValueIteration(P, R)
     >>> rvi.run()
-    >>> rvi.V
-    (10.0, 3.885235246411831)
+    >>> expected = (10.0, 3.885235246411831)
+    >>> all(expected[k] - rvi.V[k] < 1e-12 for k in range(len(expected)))
+    True
     >>> rvi.average_reward
     3.8852352464118312
     >>> rvi.policy
@@ -1252,8 +1257,9 @@ class for details.
     >>> vi.verbose
     False
     >>> vi.run()
-    >>> vi.V
-    (5.93215488, 9.38815488, 13.38815488)
+    >>> expected = (5.93215488, 9.38815488, 13.38815488)
+    >>> all(expected[k] - vi.V[k] < 1e-12 for k in range(len(expected)))
+    True
     >>> vi.policy
     (0, 0, 0)
     >>> vi.iter
@@ -1264,38 +1270,10 @@ class for details.
     >>> P = np.array([[[0.5, 0.5],[0.8, 0.2]],[[0, 1],[0.1, 0.9]]])
     >>> R = np.array([[5, 10], [-1, 2]])
     >>> vi = mdptoolbox.mdp.ValueIteration(P, R, 0.9)
-    >>> vi.setVerbose()
     >>> vi.run()
-        Iteration       V-variation
-          1               8.0
-          2               2.76
-          3               1.9872
-          4               1.430784
-          5               1.03016448
-          6               0.7417184256
-          7               0.534037266432
-          8               0.384506831831
-          9               0.276844918918
-          10              0.199328341621
-          11              0.143516405967
-          12              0.103331812296
-          13              0.0743989048534
-          14              0.0535672114945
-          15              0.038568392276
-          16              0.0277692424387
-          17              0.0199938545559
-          18              0.0143955752802
-          19              0.0103648142018
-          20              0.00746266622526
-          21              0.00537311968218
-          22              0.00386864617116
-          23              0.00278542524322
-          24              0.00200550617512
-          25              0.00144396444609
-          26              0.0010396544012
-    Iterating stopped, epsilon-optimal policy found.
-    >>> vi.V
-    (40.048625392716815, 33.65371175967546)
+    >>> expected = (40.048625392716815, 33.65371175967546)
+    >>> all(expected[k] - vi.V[k] < 1e-12 for k in range(len(expected)))
+    True
     >>> vi.policy
     (1, 0)
     >>> vi.iter
@@ -1310,8 +1288,9 @@ class for details.
     >>> R = np.array([[5, 10], [-1, 2]])
     >>> vi = mdptoolbox.mdp.ValueIteration(P, R, 0.9)
     >>> vi.run()
-    >>> vi.V
-    (40.048625392716815, 33.65371175967546)
+    >>> expected = (40.048625392716815, 33.65371175967546)
+    >>> all(expected[k] - vi.V[k] < 1e-12 for k in range(len(expected)))
+    True
     >>> vi.policy
     (1, 0)
 
@@ -1469,12 +1448,13 @@ class for details.
 
     Examples
     --------
-    >>> import mdptoolbox, mdptoolbox.example
+    >>> import mdptoolbox.example, numpy as np
     >>> P, R = mdptoolbox.example.forest()
     >>> vigs = mdptoolbox.mdp.ValueIterationGS(P, R, 0.9)
     >>> vigs.run()
-    >>> vigs.V
-    (25.5833879767579, 28.830654635546928, 32.83065463554693)
+    >>> expected = (25.5833879767579, 28.830654635546928, 32.83065463554693)
+    >>> all(expected[k] - vigs.V[k] < 1e-12 for k in range(len(expected)))
+    True
     >>> vigs.policy
     (0, 0, 0)
 

diff --git a/src/mdptoolbox/util.py b/src/mdptoolbox/util.py
@@ -273,6 +273,10 @@ def check(P, R):
     # what the reward arrar is reporting agree as to the number of actions
     # and states. If not then fail explaining the situation
 
+def rowsSumToOne(Z, n):
+    return((_np.abs(Z.sum(axis=1) - _np.ones(n))).max() <=
+           10 * _np.spacing(_np.float64(1)))
+
 def checkSquareStochastic(Z):
     """Check if Z is a square stochastic matrix.
     
@@ -298,8 +302,7 @@ def checkSquareStochastic(Z):
         raise InvalidMDPError(mdperr["mat_square"])
     # check that the matrix is square, and that each row sums to one
     assert s1 == s2, mdperr["mat_square"]
-    assert (_np.abs(Z.sum(axis=1) - _np.ones(s2))).max() <= 2*_np.spacing(1), \
-        mdperr["mat_stoch"]
+    assert rowsSumToOne(Z, s2), mdperr["mat_stoch"]
     # make sure that there are no values less than zero
     try:
         assert (Z >= 0).all(), mdperr["mat_nonneg"]

diff --git a/src/tests/test_examples.py b/src/tests/test_examples.py
@@ -42,7 +42,10 @@ def test_sparse_PR(self):
         assert_equal(len(P), len(self.P))
         for a in range(len(self.P)):
             assert_equal(P[a].shape, self.P[a].shape)
-            assert_equal((P[a] != sp.csr_matrix(self.P[a])).nnz, 0)
+            try:
+                assert_equal((P[a] != sp.csr_matrix(self.P[a])).nnz, 0)
+            except AttributeError:
+                assert_true((P[a].todense() == self.P[a]).all())
         assert_true((R == self.R).all())
         assert_equal(R.shape, self.R.shape)