Skip to content

Commit

Permalink
update it to write packages to a text file
Browse files Browse the repository at this point in the history
Codecs is needed for reading/writing unicode chars(range 128+) too.
  • Loading branch information
bohemia420 committed Feb 7, 2015
1 parent 473df23 commit 2689ecb
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions scripts/pull_R_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,19 @@

from pyquery import PyQuery as pq
import urllib
import codecs


text_file = codecs.open("Packages.txt",encoding='utf-8',mode="w")
d = pq(url='http://cran.r-project.org/web/views/MachineLearning.html',opener=lambda url, **kw: urllib.urlopen(url).read())
index = 0

for e in d("li").items():
package_name = e("a").html()
package_link = e("a")[0].attrib['href']
if '..' in package_link:
package_link = package_link.replace("..",'http://cran.r-project.org/web')
dd = pq(url=package_link,opener=lambda url, **kw: urllib.urlopen(url).read())
package_description = dd("h2").html()
print "* [%s](%s) - %s" % (package_name,package_link,package_description)
text_file.write(" [%s](%s) - %s \n" % (package_name,package_link,package_description))
# print "* [%s](%s) - %s" % (package_name,package_link,package_description)

index += 1
index += 1

0 comments on commit 2689ecb

Please sign in to comment.