-
Notifications
You must be signed in to change notification settings - Fork 0
/
beaconDetectorThreads.rb
146 lines (142 loc) · 4.29 KB
/
beaconDetectorThreads.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
require 'sqlite3'
require 'fastimage'
require 'thread'
load 'define.rb'
load 'database.rb'
def is_1pixel_image?(url)
abort "NULL URL" if url==nil
pixels=nil
last=url.split("/").last
# [email protected]("beaconURLs","singlePixel","url",url)
# if isthere!=nil # I've already seen that url
# return (isthere.first.to_s == "1") if isthere.kind_of?(Array)
# return (isthere.to_s == "1")
# else # no... wget it
begin
pixels=FastImage.size("http://"+url, :timeout=>2)
rescue Exception => e
if not e.message.include? "Network is unreachable"
puts "is_1pixel_image: "+e.message+"\n"+url
# @db.insert("beaconURLs",[url,0])
end
end
# end
return pixels,url
end
def store(line,filename)
fw=File.new(filename+"beaconDetectorTHREADS.csv","a")
fw.puts filename+"\t"+line.to_s
fw.close
end
def offload()
puts @results.size.to_s+" results"
while @results.size>0
pixels=result[0]
url=result[1]
res=result[2]
abort "NULL url "[email protected] if url==nil
if pixels!=nil
@count+=1 if pixels=="[1, 1]"
else
pixels="-1"
end
abort "NULL user "[email protected] if res['IPport']==nil
if res['type']==nil
puts "NULL type"+url
res['type']==-1
end
@db.insertBEACON("beacons",[pixels,url,res['IPport'],res['uIP'],res['host'],res['httpRef'],res['status'],res['dataSz'],res['dur'],res['ua'],res['tmstp'],res['type'],res['mob'],res['dev'],res['browser']]) if url!=nil
@dictionary[url]=pixels
# puts pixels+" "+url.to_s+" "+res.to_s
end
end
startScript=Time.now
filename=ARGV[0]
@defines=Defines.new(filename.gsub("BEACONS_",""))
dbname=filename.rpartition("/")[0]+"/beaconsChecked_"+filename.gsub("BEACONS_","").rpartition("/")[2]+".db"
min,max=Process.getrlimit(Process::RLIMIT_NOFILE)
puts "Initial open sockets boundaries: ["+min.to_s+","+max.to_s+"]"
puts "Stretching the number of simultaneously open sockets"
Process.setrlimit(Process::RLIMIT_NOFILE,max,max)
socketsLimit=Process.getrlimit(Process::RLIMIT_NOFILE)
puts "Now I can open no more than "+(max/1000*1000).to_s+" sockets simultaneously"
@db = Database.new(@defines,dbname)
@db.create("beacons",'imageSize VARCHAR, url VARCHAR PRIMARY KEY,user VARCHAR, userIP VARCHAR, host VARCHAR, httpRef VARCHAR, status VARCHAR, dataSz VARCHAR, dur VARCHAR, ua VARCHAR, tmstp VARCHAR, type VARCHAR, mob VARCHAR, dev VARCHAR, browser VARCHAR')
[email protected]("beacons",nil,nil,nil,true)
@dictionary=Hash.new
for res in results
@dictionary[res["url"]]=res["imageSize"]
end
puts "Loaded previous snapshot of "[email protected]_s+" elements"
#system("rm -f "+filename+"beaconDetectorTHREADS.csv")
totalLines= `wc -l "#{filename}"`.strip.split(' ')[0]
@count=0
h=Hash.new
threads=Queue.new
@results=Queue.new
startRead=Time.now
File.foreach(filename) {|line| #slurp file
next if h[line.chop]!=nil
newHash=Hash.new
url=nil
line.split(', "').each{
|part| parts=part.gsub("}","").gsub("\n","").gsub("{","").gsub('" ',"").gsub("\"","").split("=>")
if parts.first=="url"
url=parts.last
else
newHash[parts.first]=parts.last
end
}
next if url.size>300
abort "WRONG! NULL URL" if url==nil
h[url]=newHash
}
total=0
found=0
puts "File was read in "+(Time.now - startRead).to_s+ " seconds"
puts "Will examine "+h.keys.size.to_s+" urls"
start=Time.now
h.keys.each{|url|
if @dictionary!=nil and @dictionary[url]!=nil
found+=1
next
end
begin
threads.push(Thread.new{
pixel,urlChecked=is_1pixel_image?(url)
@results.push([pixel,urlChecked,h[url]])
})
rescue ThreadError => e
puts "ThreadError "+e.to_s+"\n"+total.to_s+" lines"
while threads.size>0
thr=threads.pop
thr.join
end
#try again
threads.push(Thread.new{
pixel,urlChecked=is_1pixel_image?(url)
@results.push([pixel,urlChecked,h[url]])
})
end
total+=1
if threads.size==(max/1000*1000)
puts "\t"+total.to_s+"/"+totalLines+" lines so far... "+(Time.now - start).to_s+" seconds"
print threads.size.to_s+" threads"
while threads.size>0
thr=threads.pop
thr.join
end
offload()
start=Time.now
end
}
while threads.size>0
thr=threads.pop
thr.join
end
offload()
puts "\n-------------RESULTS-------------\n I found "[email protected]_s+" Beacons from "+total.to_s+" examined URLs from "+totalLines.to_s+" lines of "+filename
puts "Already found: "+found.to_s
store(@count,filename)
puts "Finished in "+(Time.now - startScript).to_s+" seconds"