-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Yujia.Miao
committed
Dec 7, 2016
0 parents
commit 3802d4b
Showing
133 changed files
with
5,324,158 additions
and
0 deletions.
There are no files selected for viewing
157 changes: 157 additions & 0 deletions
157
mobile_neering/.ipynb_checkpoints/product_dtrain_dtest-checkpoint.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#!/usr/bin/python\n", | ||
"# -*- coding:utf8 -*-\n", | ||
"\n", | ||
"import sys\n", | ||
"import math\n", | ||
"import random\n", | ||
"import re\n", | ||
"import pymysql\n", | ||
"import pandas as pd\n", | ||
"import itertools\n", | ||
"import os" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"cateindex = {}\n", | ||
"for line in file('cateIndex'):\n", | ||
" line = line.strip().split('\\t')\n", | ||
" index, cate = line[:2]\n", | ||
" cateindex[index] = cate\n", | ||
" \n", | ||
"\n", | ||
"arr = []\n", | ||
"for k in range(len(cateindex)):\n", | ||
" for i in itertools.combinations(cateindex.keys(), k+1):\n", | ||
" arr.append(i)\n", | ||
"\n", | ||
"def gradsearch(dtype, filepath):\n", | ||
" #第一层代表 feature类别组合 \n", | ||
" for each_zuhe in arr:\n", | ||
" name = []\n", | ||
" for k in each_zuhe:\n", | ||
" name.append(cateindex[k])\n", | ||
" filename = '.'.join(name)\n", | ||
"\n", | ||
" #对每一个组合的feature生成train, test\n", | ||
" fh = file('data/'+dtype+'-'+filename,'w+')\n", | ||
" for line in file(filepath):\n", | ||
" line = line.strip().split('\\t')\n", | ||
" label, features = line[0], line[1]\n", | ||
" featurearr = features.split(' ')\n", | ||
" feature_each = []\n", | ||
" for feature in featurearr:\n", | ||
" if feature[:3] in each_zuhe:\n", | ||
" feature_each.append(feature)\n", | ||
" if len(feature_each) >0 :\n", | ||
" data = '\\t'.join([label, ' '.join(feature_each)]) + '\\n'\n", | ||
" fh.write(data)\n", | ||
"gradsearch('train', 'dtrain') \n", | ||
"gradsearch('test', 'dtest') " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"python xgboostGblinear.py data/train-geo data/test-geo result/geo.model > result/geo.result\n", | ||
"python xgboostGblinear.py data/train-brand data/test-brand result/brand.model > result/brand.result\n", | ||
"python xgboostGblinear.py data/train-os data/test-os result/os.model > result/os.result\n", | ||
"python xgboostGblinear.py data/train-session_count data/test-session_count result/session_count.model > result/session_count.result\n", | ||
"python xgboostGblinear.py data/train-appid data/test-appid result/appid.model > result/appid.result\n", | ||
"python xgboostGblinear.py data/train-geo.brand data/test-geo.brand result/geo.brand.model > result/geo.brand.result\n", | ||
"python xgboostGblinear.py data/train-geo.os data/test-geo.os result/geo.os.model > result/geo.os.result\n", | ||
"python xgboostGblinear.py data/train-geo.session_count data/test-geo.session_count result/geo.session_count.model > result/geo.session_count.result\n", | ||
"python xgboostGblinear.py data/train-geo.appid data/test-geo.appid result/geo.appid.model > result/geo.appid.result\n", | ||
"python xgboostGblinear.py data/train-brand.os data/test-brand.os result/brand.os.model > result/brand.os.result\n", | ||
"python xgboostGblinear.py data/train-brand.session_count data/test-brand.session_count result/brand.session_count.model > result/brand.session_count.result\n", | ||
"python xgboostGblinear.py data/train-brand.appid data/test-brand.appid result/brand.appid.model > result/brand.appid.result\n", | ||
"python xgboostGblinear.py data/train-os.session_count data/test-os.session_count result/os.session_count.model > result/os.session_count.result\n", | ||
"python xgboostGblinear.py data/train-os.appid data/test-os.appid result/os.appid.model > result/os.appid.result\n", | ||
"python xgboostGblinear.py data/train-session_count.appid data/test-session_count.appid result/session_count.appid.model > result/session_count.appid.result\n", | ||
"python xgboostGblinear.py data/train-geo.brand.os data/test-geo.brand.os result/geo.brand.os.model > result/geo.brand.os.result\n", | ||
"python xgboostGblinear.py data/train-geo.brand.session_count data/test-geo.brand.session_count result/geo.brand.session_count.model > result/geo.brand.session_count.result\n", | ||
"python xgboostGblinear.py data/train-geo.brand.appid data/test-geo.brand.appid result/geo.brand.appid.model > result/geo.brand.appid.result\n", | ||
"python xgboostGblinear.py data/train-geo.os.session_count data/test-geo.os.session_count result/geo.os.session_count.model > result/geo.os.session_count.result\n", | ||
"python xgboostGblinear.py data/train-geo.os.appid data/test-geo.os.appid result/geo.os.appid.model > result/geo.os.appid.result\n", | ||
"python xgboostGblinear.py data/train-geo.session_count.appid data/test-geo.session_count.appid result/geo.session_count.appid.model > result/geo.session_count.appid.result\n", | ||
"python xgboostGblinear.py data/train-brand.os.session_count data/test-brand.os.session_count result/brand.os.session_count.model > result/brand.os.session_count.result\n", | ||
"python xgboostGblinear.py data/train-brand.os.appid data/test-brand.os.appid result/brand.os.appid.model > result/brand.os.appid.result\n", | ||
"python xgboostGblinear.py data/train-brand.session_count.appid data/test-brand.session_count.appid result/brand.session_count.appid.model > result/brand.session_count.appid.result\n", | ||
"python xgboostGblinear.py data/train-os.session_count.appid data/test-os.session_count.appid result/os.session_count.appid.model > result/os.session_count.appid.result\n", | ||
"python xgboostGblinear.py data/train-geo.brand.os.session_count data/test-geo.brand.os.session_count result/geo.brand.os.session_count.model > result/geo.brand.os.session_count.result\n", | ||
"python xgboostGblinear.py data/train-geo.brand.os.appid data/test-geo.brand.os.appid result/geo.brand.os.appid.model > result/geo.brand.os.appid.result\n", | ||
"python xgboostGblinear.py data/train-geo.brand.session_count.appid data/test-geo.brand.session_count.appid result/geo.brand.session_count.appid.model > result/geo.brand.session_count.appid.result\n", | ||
"python xgboostGblinear.py data/train-geo.os.session_count.appid data/test-geo.os.session_count.appid result/geo.os.session_count.appid.model > result/geo.os.session_count.appid.result\n", | ||
"python xgboostGblinear.py data/train-brand.os.session_count.appid data/test-brand.os.session_count.appid result/brand.os.session_count.appid.model > result/brand.os.session_count.appid.result\n", | ||
"python xgboostGblinear.py data/train-geo.brand.os.session_count.appid data/test-geo.brand.os.session_count.appid result/geo.brand.os.session_count.appid.model > result/geo.brand.os.session_count.appid.result\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"for each_zuhe in arr:\n", | ||
" name = []\n", | ||
" for k in each_zuhe:\n", | ||
" name.append(cateindex[k])\n", | ||
" filename = '.'.join(name)\n", | ||
" dtrain = 'data/'+ 'train' + '-' + filename\n", | ||
" dtest = 'data/'+ 'test' + '-' + filename\n", | ||
" commend = \"python xgboostGblinear.py\" +\" \" + dtrain + \" \" + dtest + \" \" +\"result/\" +filename+\".model\" +\" \"+\">\"+\" \"+ \"result/\" +filename+\".result\" \n", | ||
" print commend" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python [conda root]", | ||
"language": "python", | ||
"name": "conda-root-py" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.12" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 1 | ||
} |
Submodule Data-Intelligence
added at
81c39c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
train_neg=`cat dtrain | awk '$1=="0" '| wc -l` | ||
train_pos=`cat dtrain | awk '$1=="1" '| wc -l` | ||
x=`expr $train_neg / $train_pos` | ||
echo train neg:pos is $x, pos is: $train_pos , neg is : $train_neg | ||
|
||
test_neg=`cat dtest | awk '$1=="0" '| wc -l` | ||
test_pos=`cat dtest | awk '$1=="1" '| wc -l` | ||
y=`expr $test_neg / $test_pos` | ||
echo test neg:pos is $y, pos is:$test_pos , neg is : $test_neg | ||
|
||
train=`cat dtrain | wc -l` | ||
tet=`cat dtest | wc -l` | ||
z=`expr $train / $tet` | ||
echo tran:test is $z |
Oops, something went wrong.