Skip to content

Commit

Permalink
#PRS-11 [Finishes #112811591 #organizations/ohrana_gov_by]
Browse files Browse the repository at this point in the history
  • Loading branch information
muhtar05 committed Mar 15, 2016
1 parent 0a487d6 commit d164b22
Show file tree
Hide file tree
Showing 2 changed files with 1,029 additions and 896 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@ def parse_page(self, response):
href = response.xpath(pattern)
if href:
url = response.urljoin(href.extract()[0])
yield scrapy.Request(url, callback=self.parse_page_struct, meta={'region': region})
yield scrapy.Request(url, callback=self.parse_page_struct, meta={'region': region, 'region_url':response.url})

def parse_page_struct(self, response):
region = response.meta['region']
region_url = response.meta['region_url']

if region == "brest":
items = response.xpath("//div[@class='entry-content']/blockquote/p")
Expand Down Expand Up @@ -92,7 +93,7 @@ def parse_page_struct(self, response):
phones.append(p)

otdel['phone'] = phones
otdel['url'] = response.url
otdel['url'] = region_url
yield otdel
elif region == "vitebsk":
items_content = response.xpath("//div[@class='entry-content']")
Expand Down Expand Up @@ -128,7 +129,7 @@ def parse_page_struct(self, response):
otdel['name'] = name
otdel['address'] = address
otdel['phone'] = phones
otdel['url'] = response.url
otdel['url'] = region_url
otdel['phone_code'] = u""
yield otdel

Expand All @@ -137,7 +138,7 @@ def parse_page_struct(self, response):
otdel2['name'] = name2
otdel2['address'] = address2
otdel2['phone'] = phones2
otdel2['url'] = response.url
otdel2['url'] = region_url
yield otdel2

elif region == "gomel":
Expand All @@ -151,7 +152,7 @@ def parse_page_struct(self, response):
otdel['name'] = raw_list[0]
otdel['address'] = addr_an_ph[:pos_ph]
otdel['phone'] = [addr_an_ph[pos_ph:]]
otdel['url'] = response.url
otdel['url'] = region_url
otdel['phone_code'] = u""
yield otdel
elif region == "grodno":
Expand All @@ -164,9 +165,31 @@ def parse_page_struct(self, response):
otdel['name'] = name
otdel['address'] = address
otdel['phone'] = phones
otdel['url'] = response.url
otdel['url'] = region_url
otdel['phone_code'] = u""
yield otdel
pat = u"td[1]/p[re:test(text(),'%s')]" % u'Отдел'
name_add = item.xpath(pat)
if len(name_add)>0:
otdel_add = OhranagovbyItem()
otdel_add['name'] = name_add.xpath("text()").extract()
#following-sibling::p[1]/text()
address_add = name_add.xpath("following-sibling::p[1]/text()").extract()
phones_add = name_add.xpath("following-sibling::p[2]/text()").extract()
if not address_add:
address_add = address

if len(phones_add) == 0 or phones_add[0] == u'\xa0':
phones_add = phones

otdel_add['address'] = address_add
otdel_add['phone'] = phones_add
otdel_add['url'] = region_url
otdel_add['phone_code'] = u""
yield otdel_add




elif region == "minsk":
links = response.xpath("//div[@class='entry-content']/ul/li/strong/a/@href")
Expand All @@ -186,7 +209,7 @@ def parse_page_from_mou(self, response):
item['name'] = response.xpath("//header/h1/text()").extract()[0]
item['address'] = response.xpath("//div[@class='entry-content']/div[1]").extract()
item['phone'] = response.xpath("//div[@class='entry-content']/p[1]").extract()
item['url'] = response.url
item['url'] = u"http://mou.ohrana.gov.by/"
item['phone_code'] = u""
yield item

Expand All @@ -207,8 +230,7 @@ def parse_mogilev(self, response):
item['address'] = re.sub(pat, '', info[1].extract())
item['phone'] = [re.sub(pat, '', info[2].extract())]
item['phone_code'] = u""
# item['phone'] = info[2].extract()
item['url'] = response.url
item['url'] = u"http://mogilev.ohrana.gov.by/"
yield item

def parse_minsk(self, response):
Expand All @@ -225,6 +247,6 @@ def parse_minsk(self, response):
item['name'] = response.xpath("//header/h1/text()").extract()[0]
item['address'] = address
item['phone'] = response.xpath(ptn_ph).extract()
item['url'] = response.url
item['url'] = u"http://minsk.ohrana.gov.by/"
item['phone_code'] = u""
yield item
Loading

0 comments on commit d164b22

Please sign in to comment.