forked from hhursev/recipe-scrapers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhellofresh.py
64 lines (49 loc) · 1.75 KB
/
hellofresh.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import re
from ._abstract import AbstractScraper
from ._utils import get_minutes, normalize_string
class HelloFresh(AbstractScraper):
@classmethod
def host(self, domain='com'):
return 'hellofresh.%s' % domain
def title(self):
return self.soup.find('h1').get_text()
def total_time(self):
return get_minutes(self.soup.find(
'span',
{'data-translation-id': "recipe-detail.preparation-time"}
).parent.parent)
def yields(self):
return ""
def ingredients(self):
ingredients_container = self.soup.find(
'div',
{'data-test-id': 'recipeDetailFragment.ingredients'}
)
ingredients = ingredients_container.findAll('p')
return [
' '.join([
normalize_string(ingredient_first_part.get_text()),
normalize_string(ingredient_second_part.get_text())
])
for ingredient_first_part, ingredient_second_part
in zip(ingredients[0::2], ingredients[1::2])
]
def instructions(self):
instructions_regex = re.compile('recipeDetailFragment.instructions.step-(\d)')
instructions_container = self.soup.findAll(
'div',
{'data-test-id': instructions_regex}
)
instructions = [
subdiv.findAll('p')
for subdiv in instructions_container
]
instructions = sum(instructions, []) # flatten
return '\n'.join([
' '.join([
str(instruction_order) + ')',
normalize_string(instruction.get_text())
])
for instruction_order, instruction
in zip(range(1, len(instructions) + 1), instructions)
])