-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpushTheTempo.py
executable file
·97 lines (88 loc) · 3.02 KB
/
pushTheTempo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python
from __future__ import print_function
import csv
import elasticsearch
from elasticsearch import Elasticsearch
ELASTIC_HOST = 'localhost'
ELASTIC_INDEX = 'barbora'
ELASTIC_DOC_TYPE = 'product'
def rebuild_index(filename):
es = Elasticsearch([ELASTIC_HOST])
# Drop existing index
try:
response = es.indices.delete(ELASTIC_INDEX)
if response['acknowledged'] is not True:
raise Exception
except elasticsearch.exceptions.NotFoundError:
pass
# Create index
response = es.indices.create(
ELASTIC_INDEX,
body={
'settings': {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "standard",
"filter": [
"standard",
"lowercase",
"my_length",
"my_stemmer"
]
}
},
"filter": {
"my_stemmer": {
"type": "stemmer",
"name": "lithuanian"
},
"my_length": {
"type": "length",
"min": 3
}
}
}
},
'mappings': {
"product":{
"properties":{
"name": {
"type":"text",
"analyzer":"my_analyzer"
},
"category3": {
"type":"text",
"analyzer":"my_analyzer"
},
"category1": {
"type":"text",
"analyzer":"my_analyzer"
},
"category2": {
"type":"text",
"analyzer":"my_analyzer"
},
"item_brand": {
"type":"text",
"analyzer":"my_analyzer"
}
}
}
}
}
)
PRODUCT_ATTRIBUTES = ['id', 'name', 'price', 'unit', 'price_per_unit', 'item_netto_weight', 'category3', 'url', 'category1', 'category2', 'item_brand']
i = 0
with open(filename, 'r') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
doc = {};
for attr_name in PRODUCT_ATTRIBUTES:
doc[attr_name] = row[attr_name];
res = es.index(index=ELASTIC_INDEX, doc_type=ELASTIC_DOC_TYPE, id=i, body=doc)
i += 1
if (res['created'] == False):
print(doc)
if __name__ == '__main__':
rebuild_index('barbora.csv')