Skip to content

Commit

Permalink
clean the code
Browse files Browse the repository at this point in the history
  • Loading branch information
moneyDboat committed Sep 11, 2018
1 parent 5fc92e2 commit 04775e6
Show file tree
Hide file tree
Showing 13 changed files with 220 additions and 126 deletions.
289 changes: 169 additions & 120 deletions .idea/workspace.xml

Large diffs are not rendered by default.

File renamed without changes.
27 changes: 27 additions & 0 deletions emb_build/tran_emb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import word2vec
import fire

paths = ['raw_word.txt', 'raw_article.txt']
sizes = [300]


def tran(path):
model = word2vec.load(path)
vocab, vectors = model.vocab, model.vectors
print(path)
print('shape of word embeddings : ')
print(vectors.shape)

new_path = path.split('.')[0] + '_.txt'
print('Transform start....')
f = open(new_path, 'w')
for word, vector in zip(vocab, vectors):
f.write(str(word) + ' ' + ' '.join(map(str, vector)) + '\n')
print('Transform Complete!\n')


for path in paths:
for size in sizes:
emb_path = path.split('.')[0].split('_')[1] + '_' + str(size) + '.bin'
word2vec.word2vec(path, emb_path, min_count=5, size=size, verbose=True)
tran(emb_path)
2 changes: 1 addition & 1 deletion fine_tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import time
import torch.nn.functional as F
import models
import util
import data
import pandas as pd
import os
import fire
Expand Down
2 changes: 1 addition & 1 deletion gen_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import torch
import models
from config import DefaultConfig
import util
import data
import fire
import numpy as np
import torch.nn.functional as F
Expand Down
4 changes: 2 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import time
import torch.nn.functional as F
import models
import util
import data
from config import DefaultConfig
import pandas as pd
import os
Expand All @@ -30,7 +30,7 @@ def main(**kwargs):
args.device = None
torch.manual_seed(args.seed) # set random seed for cpu

train_iter, val_iter, test_iter, args.vocab_size, vectors = util.load_data(args)
train_iter, val_iter, test_iter, args.vocab_size, vectors = data.load_data(args)

args.print_config()

Expand Down
2 changes: 1 addition & 1 deletion main_boost.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import time
import torch.nn.functional as F
import models
import util
import data
from config import DefaultConfig
import pandas as pd
import os
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion test_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import torch
import models
import util
import data
import numpy as np
import torch.nn.functional as F
import pandas as pd
Expand Down
18 changes: 18 additions & 0 deletions util/split_val.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pandas as pd
import numpy as np
import random

train_data = pd.read_csv('/data/yujun/captain/datasets1/train_set.csv')
val_data = pd.read_csv('/data/yujun/captain/datasets1/val_set.csv')
test_data = pd.read_csv('/data/yujun/datasets/daguanbei_data/test_set.csv')
print('raw data loaded!')

train_data[['word_seg', 'class']].to_csv('word/train_set.csv')
val_data[['word_seg', 'class']].to_csv('word/val_set.csv')
test_data[['id', 'word_seg']].to_csv('word/test_set.csv')
print('word data made!')

train_data[['article', 'class']].to_csv('article/train_set.csv')
val_data[['article', 'class']].to_csv('article/val_set.csv')
test_data[['id', 'article']].to_csv('article/test_set.csv')
print('article data made!')

0 comments on commit 04775e6

Please sign in to comment.