-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathget_views.rb
53 lines (47 loc) · 2.05 KB
/
get_views.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/ruby
base_url = 'http://cran.r-project.org/web/views/'
task_views = [
['Bayesian.html', 'Bayesian'],
['ChemPhys.html', 'ChemPhys'],
['ClinicalTrials.html', 'ClinicalTrials'],
['Cluster.html', 'Cluster'],
['Distributions.html', 'Distributions'],
['Econometrics.html', 'Econometrics'],
['Environmetrics.html', 'Environmetrics'],
['ExperimentalDesign.html', 'ExperimentalDesign'],
['Finance.html', 'Finance'],
['Genetics.html', 'Genetics'],
['Graphics.html', 'Graphics'],
['gR.html', 'gR'],
['HighPerformanceComputing.html', 'HighPerformanceComputing'],
['MachineLearning.html', 'MachineLearning'],
['MedicalImaging.html', 'MedicalImaging'],
['Multivariate.html', 'Multivariate'],
['NaturalLanguageProcessing.html', 'NaturalLanguageProcessing'],
['OfficialStatistics.html', 'OfficialStatistics'],
['Optimization.html', 'Optimization'],
['Pharmacokinetics.html', 'Pharmacokinetics'],
['Phylogenetics.html', 'Phylogenetics'],
['Psychometrics.html', 'Psychometrics'],
['ReproducibleResearch.html', 'ReproducibleResearch'],
['Robust.html', 'Robust'],
['SocialSciences.html', 'SocialSciences'],
['Spatial.html', 'Spatial'],
['Survival.html', 'Survival'],
['TimeSeries.html', 'TimeSeries']
]
data_file = File.new('data/views.csv', 'w')
data_file.puts("\"View\",\"LinkedPackage\"")
task_views.each do |task_view|
sleep(10)
url = base_url + task_view[0]
view = task_view[1]
`curl #{url} > tmp.html`
html = File.open('tmp.html', 'r') {|f| f.read()}
packages = html.scan(Regexp.new('<li><a href="\.\./packages/[^/]+/index.html">([^<]+)</a></li>')).flatten
packages.each do |package|
data_file.puts "\"#{view}\",\"#{package}\""
end
`rm tmp.html`
end
data_file.close