Skip to content

Commit

Permalink
Added another Holger Berndt's patch. It adds: 1) filetype search. 2) …
Browse files Browse the repository at this point in the history
…last_search_url property. 3) date indexed search.
  • Loading branch information
pkrumins committed Feb 7, 2010
1 parent d224d94 commit c46cc6a
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 4 deletions.
6 changes: 5 additions & 1 deletion contributors.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
This file contains people who have helped xgoogle project:

* Holger Berndt
Thanks for adding 'lang' and 'tld' arguments to Google Search
Thanks for adding:
* 'lang' and 'tld' arguments to Google Search
* 'filetype' search
* 'last_search_url' property
* 'date indexed' search


PS. If I missed you, please email me at [email protected], and I'll add you here!
Expand Down
52 changes: 49 additions & 3 deletions xgoogle/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ def __init__(self, query, random_agent=False, debug=False, lang="en", tld="com",
self.results_info = None
self.eor = False # end of results
self._page = 0
self._first_indexed_in_previous = None
self._filetype = None
self._last_search_url = None
self._results_per_page = 10
self._last_from = 0
self._lang = lang
Expand All @@ -87,6 +90,10 @@ def num_results(self):
self.eor = True
return self.results_info['total']

@property
def last_search_url(self):
return self._last_search_url

def _get_page(self):
return self._page

Expand All @@ -95,6 +102,36 @@ def _set_page(self, page):

page = property(_get_page, _set_page)

def _get_first_indexed_in_previous(self):
return self._first_indexed_in_previous

def _set_first_indexed_in_previous(self, interval):
if interval == "day":
self._first_indexed_in_previous = 'd'
elif interval == "week":
self._first_indexed_in_previous = 'w'
elif interval == "month":
self._first_indexed_in_previous = 'm'
elif interval == "year":
self._first_indexed_in_previous = 'y'
else:
# a floating point value is a number of months
try:
num = float(interval)
except ValueError:
raise SearchError, "Wrong parameter to first_indexed_in_previous: %s" % (str(interval))
self._first_indexed_in_previous = 'm' + str(interval)

first_indexed_in_previous = property(_get_first_indexed_in_previous, _set_first_indexed_in_previous, doc="possible values: day, week, month, year, or a float value of months")

def _get_filetype(self):
return self._filetype

def _set_filetype(self, filetype):
self._filetype = filetype

filetype = property(_get_filetype, _set_filetype, doc="file extension to search for")

def _get_results_per_page(self):
return self._results_per_page

Expand Down Expand Up @@ -144,12 +181,21 @@ def _get_results_page(self):
else:
url = GoogleSearch.NEXT_PAGE_1

safe_url = url % { 'query': urllib.quote_plus(self.query),
safe_url = [url % { 'query': urllib.quote_plus(self.query),
'start': self._page * self._results_per_page,
'num': self._results_per_page,
'tld' : self._tld,
'lang' : self._lang }

'lang' : self._lang }]

# possibly extend url with optional properties
if self._first_indexed_in_previous:
safe_url.extend(["&as_qdr=", self._first_indexed_in_previous])
if self._filetype:
safe_url.extend(["&as_filetype=", self._filetype])

safe_url = "".join(safe_url)
self._last_search_url = safe_url

try:
page = self.browser.get_page(safe_url)
except BrowserError, e:
Expand Down

0 comments on commit c46cc6a

Please sign in to comment.