Skip to content

Commit 521596b

Browse files
committed
Add mdclassfy method for DecisionTree
1 parent e7f5794 commit 521596b

File tree

2 files changed

+57
-5
lines changed

2 files changed

+57
-5
lines changed

python/DecisionTree.py

+55-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# @Author: LC
33
# @Date: 2016-04-08 15:26:49
44
# @Last modified by: WuLC
5-
# @Last Modified time: 2016-04-12 20:29:36
5+
# @Last Modified time: 2016-04-21 14:41:10
66
77
# @Function:implementation of decision tree described in programming-collective-intelligence in chapter 7
88
# @Referer: chapter 7 in book 《programming-collective-intelligence》
@@ -105,7 +105,7 @@ def entropy(rows):
105105
"""get the entropy of rows
106106
107107
Args:
108-
rows (TYPE): rows to be caculated about their entropy
108+
rows (list[list]): rows to be caculated the about their entropy
109109
110110
Returns:
111111
float: entropy of the rows
@@ -119,7 +119,21 @@ def entropy(rows):
119119
ent -= p*log(p, 2)
120120
return ent
121121

122-
122+
def variance(rows):
123+
"""get the variance of the rows when the type is number
124+
125+
Args:
126+
rows(list[list]): rows to be caculated their variance
127+
128+
Returns:
129+
variance of the row in terms of the last row
130+
"""
131+
s = sum(row[-1] for row in rows)
132+
mean = s/len(rows)
133+
pow_sum = sum(pow(row[-1]-mean,2) for row in rows)
134+
variance = pow_sum/len(rows)
135+
reuturn variance
136+
123137
def build_tree(rows):
124138
"""build the decision of the rows in the metric of entropy
125139
@@ -240,8 +254,46 @@ def prune(tree, mini_entropy):
240254
prune(tree.fb, mini_entropy)
241255

242256

257+
def mdclassfy(tree,observation):
258+
"""classfy observation with some missing data
259+
260+
Args:
261+
tree (TYPE): root of decision tree
262+
observation (TYPE): new observation to be classfied
263+
264+
Returns:
265+
result that the observation to be classfied
266+
"""
267+
if tree.results != None:
268+
return results
269+
col = tree.col
270+
if observation[col]==None: # empty field in the observation
271+
resutl = {}
272+
tb,fb = mdclassfy(tree.tb,observation),mdclassfy(tree.fb,observation)
273+
tb_count = sum(tb.values)
274+
fb_count = sum(fb.values)
275+
tb_fraction = tb_count/(tb_count+fb_count)
276+
fb_fraction = fb_count/(tb_count+fb_count)
277+
for k,v in tb.items(): results[k]+=v*tb_count
278+
for k,v in fb.items(): results[k]+=v*fb_count
279+
return result
280+
else:
281+
value = tree.value
282+
if isinstance(value,float) or isinstance(value,int):
283+
if observation[col] >= value:
284+
return mdclassify(tree.tb,observation)
285+
else:
286+
reutrn mdclassify(tree.fb,observation)
287+
else:
288+
if observation[col] == value:
289+
return mdclassify(tree.tb,observation)
290+
else:
291+
return mdclassify(tree.fb,observation)
292+
243293

244294

295+
296+
245297
if __name__ == '__main__':
246298
rot = DecisionTreeNode()
247299
rot = build_tree(sample_data)

python/LinearRegression.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# -*- coding: utf-8 -*-
22
# @Author: WuLC
33
# @Date: 2016-04-03 13:03:24
4-
# @Last modified by: WuLC
5-
# @Last Modified time: 2016-04-12 14:08:42
4+
# @Last modified by: LC
5+
# @Last Modified time: 2016-04-12 20:48:48
66
77
# @Function: linear regression with gradient descent of two methods: batch gradient descent and stochastic gradient descent
88
# @Referer: http://stackoverflow.com/questions/17784587/gradient-descent-using-python-and-numpy

0 commit comments

Comments
 (0)