forked from mthli/Java
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHrefMatch.java
More file actions
56 lines (50 loc) · 1.54 KB
/
HrefMatch.java
File metadata and controls
56 lines (50 loc) · 1.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
package match;
import java.io.*;
import java.net.*;
import java.util.regex.*;
/**
* This program displays all URLs in a web page by matching a regular expression that describes the
* <a href=...> HTML tag. Start the program as <br>
* java match.HrefMatch URL
* @version 1.01 2004-06-04
* @author Cay Horstmann
*/
public class HrefMatch
{
public static void main(String[] args)
{
try
{
// get URL string from command line or use default
String urlString;
if (args.length > 0) urlString = args[0];
else urlString = "http://java.sun.com";
// open reader for URL
InputStreamReader in = new InputStreamReader(new URL(urlString).openStream());
// read contents into string builder
StringBuilder input = new StringBuilder();
int ch;
while ((ch = in.read()) != -1)
input.append((char) ch);
// search for all occurrences of pattern
String patternString = "<a\\s+href\\s*=\\s*(\"[^\"]*\"|[^\\s>]*)\\s*>";
Pattern pattern = Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(input);
while (matcher.find())
{
int start = matcher.start();
int end = matcher.end();
String match = input.substring(start, end);
System.out.println(match);
}
}
catch (IOException e)
{
e.printStackTrace();
}
catch (PatternSyntaxException e)
{
e.printStackTrace();
}
}
}