Skip to content

Commit

Permalink
miao
Browse files Browse the repository at this point in the history
  • Loading branch information
Yujia.Miao committed Dec 7, 2016
0 parents commit 3802d4b
Show file tree
Hide file tree
Showing 133 changed files with 5,324,158 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#!/usr/bin/python\n",
"# -*- coding:utf8 -*-\n",
"\n",
"import sys\n",
"import math\n",
"import random\n",
"import re\n",
"import pymysql\n",
"import pandas as pd\n",
"import itertools\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"cateindex = {}\n",
"for line in file('cateIndex'):\n",
" line = line.strip().split('\\t')\n",
" index, cate = line[:2]\n",
" cateindex[index] = cate\n",
" \n",
"\n",
"arr = []\n",
"for k in range(len(cateindex)):\n",
" for i in itertools.combinations(cateindex.keys(), k+1):\n",
" arr.append(i)\n",
"\n",
"def gradsearch(dtype, filepath):\n",
" #第一层代表 feature类别组合 \n",
" for each_zuhe in arr:\n",
" name = []\n",
" for k in each_zuhe:\n",
" name.append(cateindex[k])\n",
" filename = '.'.join(name)\n",
"\n",
" #对每一个组合的feature生成train, test\n",
" fh = file('data/'+dtype+'-'+filename,'w+')\n",
" for line in file(filepath):\n",
" line = line.strip().split('\\t')\n",
" label, features = line[0], line[1]\n",
" featurearr = features.split(' ')\n",
" feature_each = []\n",
" for feature in featurearr:\n",
" if feature[:3] in each_zuhe:\n",
" feature_each.append(feature)\n",
" if len(feature_each) >0 :\n",
" data = '\\t'.join([label, ' '.join(feature_each)]) + '\\n'\n",
" fh.write(data)\n",
"gradsearch('train', 'dtrain') \n",
"gradsearch('test', 'dtest') "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"python xgboostGblinear.py data/train-geo data/test-geo result/geo.model > result/geo.result\n",
"python xgboostGblinear.py data/train-brand data/test-brand result/brand.model > result/brand.result\n",
"python xgboostGblinear.py data/train-os data/test-os result/os.model > result/os.result\n",
"python xgboostGblinear.py data/train-session_count data/test-session_count result/session_count.model > result/session_count.result\n",
"python xgboostGblinear.py data/train-appid data/test-appid result/appid.model > result/appid.result\n",
"python xgboostGblinear.py data/train-geo.brand data/test-geo.brand result/geo.brand.model > result/geo.brand.result\n",
"python xgboostGblinear.py data/train-geo.os data/test-geo.os result/geo.os.model > result/geo.os.result\n",
"python xgboostGblinear.py data/train-geo.session_count data/test-geo.session_count result/geo.session_count.model > result/geo.session_count.result\n",
"python xgboostGblinear.py data/train-geo.appid data/test-geo.appid result/geo.appid.model > result/geo.appid.result\n",
"python xgboostGblinear.py data/train-brand.os data/test-brand.os result/brand.os.model > result/brand.os.result\n",
"python xgboostGblinear.py data/train-brand.session_count data/test-brand.session_count result/brand.session_count.model > result/brand.session_count.result\n",
"python xgboostGblinear.py data/train-brand.appid data/test-brand.appid result/brand.appid.model > result/brand.appid.result\n",
"python xgboostGblinear.py data/train-os.session_count data/test-os.session_count result/os.session_count.model > result/os.session_count.result\n",
"python xgboostGblinear.py data/train-os.appid data/test-os.appid result/os.appid.model > result/os.appid.result\n",
"python xgboostGblinear.py data/train-session_count.appid data/test-session_count.appid result/session_count.appid.model > result/session_count.appid.result\n",
"python xgboostGblinear.py data/train-geo.brand.os data/test-geo.brand.os result/geo.brand.os.model > result/geo.brand.os.result\n",
"python xgboostGblinear.py data/train-geo.brand.session_count data/test-geo.brand.session_count result/geo.brand.session_count.model > result/geo.brand.session_count.result\n",
"python xgboostGblinear.py data/train-geo.brand.appid data/test-geo.brand.appid result/geo.brand.appid.model > result/geo.brand.appid.result\n",
"python xgboostGblinear.py data/train-geo.os.session_count data/test-geo.os.session_count result/geo.os.session_count.model > result/geo.os.session_count.result\n",
"python xgboostGblinear.py data/train-geo.os.appid data/test-geo.os.appid result/geo.os.appid.model > result/geo.os.appid.result\n",
"python xgboostGblinear.py data/train-geo.session_count.appid data/test-geo.session_count.appid result/geo.session_count.appid.model > result/geo.session_count.appid.result\n",
"python xgboostGblinear.py data/train-brand.os.session_count data/test-brand.os.session_count result/brand.os.session_count.model > result/brand.os.session_count.result\n",
"python xgboostGblinear.py data/train-brand.os.appid data/test-brand.os.appid result/brand.os.appid.model > result/brand.os.appid.result\n",
"python xgboostGblinear.py data/train-brand.session_count.appid data/test-brand.session_count.appid result/brand.session_count.appid.model > result/brand.session_count.appid.result\n",
"python xgboostGblinear.py data/train-os.session_count.appid data/test-os.session_count.appid result/os.session_count.appid.model > result/os.session_count.appid.result\n",
"python xgboostGblinear.py data/train-geo.brand.os.session_count data/test-geo.brand.os.session_count result/geo.brand.os.session_count.model > result/geo.brand.os.session_count.result\n",
"python xgboostGblinear.py data/train-geo.brand.os.appid data/test-geo.brand.os.appid result/geo.brand.os.appid.model > result/geo.brand.os.appid.result\n",
"python xgboostGblinear.py data/train-geo.brand.session_count.appid data/test-geo.brand.session_count.appid result/geo.brand.session_count.appid.model > result/geo.brand.session_count.appid.result\n",
"python xgboostGblinear.py data/train-geo.os.session_count.appid data/test-geo.os.session_count.appid result/geo.os.session_count.appid.model > result/geo.os.session_count.appid.result\n",
"python xgboostGblinear.py data/train-brand.os.session_count.appid data/test-brand.os.session_count.appid result/brand.os.session_count.appid.model > result/brand.os.session_count.appid.result\n",
"python xgboostGblinear.py data/train-geo.brand.os.session_count.appid data/test-geo.brand.os.session_count.appid result/geo.brand.os.session_count.appid.model > result/geo.brand.os.session_count.appid.result\n"
]
}
],
"source": [
"for each_zuhe in arr:\n",
" name = []\n",
" for k in each_zuhe:\n",
" name.append(cateindex[k])\n",
" filename = '.'.join(name)\n",
" dtrain = 'data/'+ 'train' + '-' + filename\n",
" dtest = 'data/'+ 'test' + '-' + filename\n",
" commend = \"python xgboostGblinear.py\" +\" \" + dtrain + \" \" + dtest + \" \" +\"result/\" +filename+\".model\" +\" \"+\">\"+\" \"+ \"result/\" +filename+\".result\" \n",
" print commend"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
1 change: 1 addition & 0 deletions mobile_neering/Data-Intelligence
Submodule Data-Intelligence added at 81c39c
14 changes: 14 additions & 0 deletions mobile_neering/compute.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
train_neg=`cat dtrain | awk '$1=="0" '| wc -l`
train_pos=`cat dtrain | awk '$1=="1" '| wc -l`
x=`expr $train_neg / $train_pos`
echo train neg:pos is $x, pos is: $train_pos , neg is : $train_neg

test_neg=`cat dtest | awk '$1=="0" '| wc -l`
test_pos=`cat dtest | awk '$1=="1" '| wc -l`
y=`expr $test_neg / $test_pos`
echo test neg:pos is $y, pos is:$test_pos , neg is : $test_neg

train=`cat dtrain | wc -l`
tet=`cat dtest | wc -l`
z=`expr $train / $tet`
echo tran:test is $z
Loading

0 comments on commit 3802d4b

Please sign in to comment.