add dump nice to regression demo

DecPaul · Mar 26, 2014 · 2aa1031 · 2aa1031
1 parent 1440dc9
commit 2aa1031
Show file tree

Hide file tree

Showing 4 changed files with 36 additions and 11 deletions.
diff --git a/demo/binary_classification/README b/demo/binary_classification/README
@@ -4,10 +4,11 @@ Run: ./runexp.sh
 
 Format of input: LIBSVM format
 
-Format of featmap.txt:
-<featureid> <featurename> <q or i>\n
+Format of ```featmap.txt: <featureid> <featurename> <q or i or int>\n ```:
+  - Feature id must be from 0 to number of features, in sorted order.
+  - i means this feature is binary indicator feature
+  - q means this feature is a quantitative value, such as age, time, can be missing
+  - int means this feature is integer value (when int is hinted, the decision boundary will be integer)
 
-q means continuous quantities, i means indicator features.
-Feature id must be from 0 to num_features, in sorted order.
 
-Detailed explaination: https://github.com/tqchen/xgboost/wiki/Binary-Classification
+Explainations: https://github.com/tqchen/xgboost/wiki/Binary-Classification
diff --git a/demo/regression/README b/demo/regression/README
@@ -2,4 +2,12 @@ Demonstrating how to use XGBoost accomplish regression tasks on computer hardwar
 
 Run: ./runexp.sh
 
-Format of input: LIBSVM format
+Format of input: LIBSVM format
+
+Format of ```featmap.txt: <featureid> <featurename> <q or i or int>\n ```:
+  - Feature id must be from 0 to number of features, in sorted order.
+  - i means this feature is binary indicator feature
+  - q means this feature is a quantitative value, such as age, time, can be missing
+  - int means this feature is integer value (when int is hinted, the decision boundary will be integer)
+
+Explainations: https://github.com/tqchen/xgboost/wiki/Regression
diff --git a/demo/regression/mapfeat.py b/demo/regression/mapfeat.py
@@ -10,12 +10,23 @@
     for i in xrange( 0,6 ):
         fo.write( ' %d:%s' %(i,arr[i+2]) )
 
-    if arr[0] not in fmap.keys():
+    if arr[0] not in fmap:
         fmap[arr[0]] = cnt
         cnt += 1
 
-    fo.write( ' %d:1' % fmap[arr[0]] )
-
+    fo.write( ' %d:1' % fmap[arr[0]] )	
     fo.write('\n')
 
 fo.close()
+
+# create feature map for machine data
+fo = open('featmap.txt', 'w')
+# list from machine.names
+names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ]; 
+
+for i in xrange(0,6):
+    fo.write( '%d\t%s\tint\n' % (i, names[i+1]))
+
+for v, k in sorted( fmap.iteritems(), key = lambda x:x[1] ):
+    fo.write( '%d\tvendor=%s\ti\n' % (k, v))
+fo.close()
diff --git a/demo/regression/runexp.sh b/demo/regression/runexp.sh
@@ -7,5 +7,10 @@ python mknfold.py machine.txt 1
 ../../xgboost machine.conf
 # output predictions of test data
 ../../xgboost machine.conf task=pred model_in=0002.model
-# print the boosters of 00002.model in dump.raw.txt
-../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt 
+# print the boosters of 0002.model in dump.raw.txt
+../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt
+# print the boosters of 0002.model in dump.nice.txt with feature map
+../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt 
+
+# cat the result
+cat dump.nice.txt