-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNetHandler.java
238 lines (205 loc) · 8.31 KB
/
NetHandler.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import javax.imageio.ImageIO;
import java.awt.*;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by Adam Najman on 7/27/2014.
*
* NetHandler:
* This class is used to do the heavy lifting for our program.
* Once supplied with inputs, the user can call fetch() to download
* the source code from the appropriate web server, parse the HTML
* find the Image, and save it in memory. This, in turn, will be supplied
* to the GUI to display for the user.
*
* VARIABLES:
*
* String comicName = Name of comic, used to referencing and debugging.
*
* URL comicURL = Where the web comic is hosted. ex www.dilbert.com
*
* Image comicImage = What we want to extract.
* NOT DEFINED IN CONSTRUCTOR
* EXPECT NULL UNTIL AFTER fetch() IS CALLED.
*
* Pattern comicPattern = The RegEx pattern used to find our image location.
*
* int regExGroupLocation = We need to supply the group location of the image after parsing.
* This might be able to be deduced with some ML or AI algorithm,
* but I'm not that smart. This is provided a priori. (Found by experimentation)
*
* String prependURLString = Some img links are stored with relative referencing. This means we have to
* prepend the url with the host name. This will be provided on an as-need
* basis.
*
*/
public class NetHandler {
private String comicName = null;
private URL comicURL = null;
private Image comicImage = null;
private Pattern comicPattern = null;
private int regExGroupLocation = 0;
private String prependURLString = null;
/*
* NetHandler Constructor
*
* INPUT:
* String str = Our comic's name (Dilbert, Garfield, etc)
*
* URL url = Hosting location of the Web comic. NOT IMAGE LOCATION (www.xkcd.com)
*
* String pattern = The RegEx to find the image in the HTML soup.
*
* int pos = The group number we anticipate to find the image URL.
*/
public NetHandler(String str, URL url, String pattern, int pos){
comicName = str;
comicURL = url;
comicPattern = Pattern.compile(pattern);
regExGroupLocation = pos;
}
/*
* NetHandler Constructor with prependURLString
*
* INPUT:
* String str = Our comic's name (Dilbert, Garfield, etc)
*
* URL url = Hosting location of the Web comic. NOT IMAGE LOCATION (www.xkcd.com)
*
* String pattern = The RegEx to find the image in the HTML soup.
*
* int pos = The group number we anticipate to find the image URL.
*
* String pre = The url header we need for image prepending.
*/
public NetHandler(String str, URL url, String pattern, int pos, String pre){
comicName = str;
comicURL = url;
comicPattern = Pattern.compile(pattern);
regExGroupLocation = pos;
prependURLString = pre;
}
/*
* NetHandler Constructor with LinkPod
*
* The linkpod class is generally a wrapper for the types
* NetHandler needs. This was employed to abstract out a nasty
* code pattern present by hard-coding the RegEc and Url strings
* into the program. Now maintenance can be performed without
* re-compiling.
*
*/
public NetHandler(LinkPod linkPod){
comicName = linkPod.comicName;
comicURL = linkPod.url;
comicPattern = Pattern.compile(linkPod.patternString);
regExGroupLocation = linkPod.group;
if (linkPod.prepend != null){
System.out.println(linkPod.comicName + " has prepend");
prependURLString = linkPod.prepend;
}
}
/*
* Code from Prof Goldberg
* Used to fetch and interpret Web Server response
* This should (hopefully) be HTML
*
* Object composition of:
* BufferedReader -> InputStreamReader -> URL().openStream()
*
* Useful for while(!null) stream loops.
*
* INPUT:
* String url = The url you want to query
*
* OUTPUT:
* (Object) BufferedReader = Interface for web server response.
*
*/
public static BufferedReader read(String url) throws Exception {
return new BufferedReader(
new InputStreamReader(
new URL(url).openStream()));
} // read
/*
* Fetch acts as the "magic" in the NetHandler
*
* Once built, the NetHandler connects the internet and makes a
* request for it's url. It then parses the response looking for matches
* for the supplied RegEx. From there, it saves the image found at the
* pre-defined group number.
*
*/
public void fetch(){
System.out.println("fetching info for: " + comicName);
try{
// Reader to read web response
BufferedReader reader = read(comicURL.toString());
String line = reader.readLine(); //temp for while() loop
//collection for possible matches
ArrayList<String> comicArray = new ArrayList<String>();
while(line != null){ //until we've hit the end
Matcher comicMatcher = comicPattern.matcher(line); // build matches
if(comicMatcher.find()){ // if we've found one,
//System.out.println("FOUND!!!!!!!!!!!!!");
//System.out.println(line);
Pattern p = Pattern.compile("([^\"\']*)"); // parse out erroneous html
Matcher m = p.matcher(line); // and
int count = 0; // (count var for debug mode)
while(m.find()) { // match again
String tmp = m.group(); // (temp for while())
if(tmp != null) comicArray.add(tmp); // add our image to the collection
//System.out.println("HERE IT IS: " + count + " " + tmp);
count++;
}
break;
}
//System.out.println(line);
line = reader.readLine();
}
System.out.println(comicName + " MATCHED TO: "
+ comicArray.get(regExGroupLocation));
URL comicImageUrl;
if(prependURLString == null){ // once the group number is discovered
comicImageUrl = new URL( // we determine if it needs a prefix
comicArray.get(regExGroupLocation)); // (for the image location)
} else {
comicImageUrl = new URL(prependURLString // if so, add that prefix
+ comicArray.get(regExGroupLocation));
}
comicImage = ImageIO.read(comicImageUrl); // download and write image
} catch (IOException e){
System.out.println("ERROR FETCHING " + comicName);
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
} // end try{}
} // end fetch()
// Accessor section for class Variables...
public String getComicName() {
return comicName;
}
public URL getComicURL(){
return comicURL;
}
public Image getComicImage(){
return comicImage;
}
// REMINDER: The NetHandler is passed a RegEx string during construction,
// but this method will return a compiled Pattern.
public Pattern getComicPattern(){
return comicPattern;
}
public int getRegExGroupLocation(){
return regExGroupLocation;
}
public String getPrependURLString(){
if (prependURLString.equals(null)) return "";
return prependURLString;
}
}