/*
* HttpRequest.java
*
* Brazil project web application toolkit,
* export version: 2.3
* Copyright (c) 1999-2009 Sun Microsystems, Inc.
*
* Sun Public License Notice
*
* The contents of this file are subject to the Sun Public License Version
* 1.0 (the "License"). You may not use this file except in compliance with
* the License. A copy of the License is included as the file "license.terms",
* and also available at http://www.sun.com/
*
* The Original Code is from:
* Brazil project web application toolkit release 2.3.
* The Initial Developer of the Original Code is: cstevens.
* Portions created by cstevens are Copyright (C) Sun Microsystems, Inc.
* All Rights Reserved.
*
* Contributor(s): cstevens, drach, suhler.
*
* Version: 2.16
* Created by cstevens on 99/09/15
* Last modified by suhler on 09/06/03 10:51:18
*
* Version Histories:
*
* 2.16 09/06/03-10:51:18 (suhler)
* added timeouts for the remote server.
*
* 2.15 09/01/30-16:13:43 (suhler)
* Redo: remove socket Factory, replace with socket pools
*
* 2.14 09/01/29-11:41:39 (suhler)
* redo checkpoint, about to refactor socketpool stuff for https
*
* 2.13 08/12/24-08:14:52 (suhler)
* Rearrange to make subclassing cleaner:
* - pass "factory" to socket pool constructor
* - add getPool() method to get our socket pool
* - tweak main() for better diagnostics
*
* 2.12 08/03/17-09:45:45 (suhler)
* use our own factory method
*
* 2.11 08/03/17-09:35:39 (suhler)
* - add static instance creation factory.
* - add socket pool diagnostics
*
* 2.10 08/02/04-13:50:51 (suhler)
* Don't import the Server call. We don't need it, and it prevents this
* class from being used in other contexts
* .
*
* 2.9 07/06/25-11:24:21 (suhler)
* make default charset public
*
* 2.8 07/06/22-13:55:49 (suhler)
* change semantics of getEncoding:
* - returns HTTP defualt encoding if noe can be found
* - supplied encoding always wins
* make public static getEncoding() method
*
* 2.7 07/03/26-13:53:18 (suhler)
* doc updates
*
* 2.6 07/03/26-13:44:17 (suhler)
* add sample main() to act as a simple "wget"
*
* 2.5 04/11/30-15:19:40 (suhler)
* fixed sccs version string
*
* 2.4 03/08/01-16:18:01 (suhler)
* fixes for javadoc
*
* 2.3 03/05/12-16:26:13 (suhler)
* Merged changes between child workspace "/home/suhler/brazil/naws" and
* parent workspace "/net/mack.eng/export/ws/brazil/naws".
*
* 1.20.1.1 03/04/17-10:03:47 (suhler)
* no changes made
*
* 2.2 03/04/15-17:29:08 (drach)
* Add protected modifier to variable connected so subclasses outside
* package can access it.
*
* 2.1 02/10/01-16:36:54 (suhler)
* version change
*
* 1.20 02/07/23-08:31:15 (suhler)
* check for no content type
*
* 1.19 02/07/11-15:37:33 (suhler)
* add encoding diagnostics
*
* 1.18 02/07/11-15:03:40 (suhler)
* add getContent() and getEncoding() convenience methods for dealing
* with charset encoding
*
* 1.17 02/04/29-17:04:41 (suhler)
* added public static boolean displayAllHeaders to turn on
* http header debugging during development.
*
* 1.16 02/04/24-13:36:21 (suhler)
* doc lint
*
* 1.15 02/02/26-14:42:14 (suhler)
* doc lint
*
* 1.14 02/02/26-14:32:38 (suhler)
* typo
*
* 1.13 02/02/26-14:25:52 (suhler)
* added "addHeaders" convenience method for adding http headers from
* properties objects
*
* 1.12 00/07/11-11:23:47 (cstevens)
* Some servers send "HTTP/1.0 100 Continue" in response to an HTTP/1.1 POST!
*
* 1.11 00/07/06-15:03:10 (cstevens)
* Although HTTP/1.1 chunking spec says that there is one "\r\n" between
* chunks, some servers (for example, maps.yahoo.com) send more than one blank
* line between chunks. So, read and skip all the blank lines seen between
* chunks.
*
* 1.10 99/11/30-09:48:14 (suhler)
* remove diagnostics
*
* 1.9 99/11/09-20:23:23 (cstevens)
* bugs revealed by writing tests.
*
* 1.8 99/10/26-18:56:38 (cstevens)
* Change MimeHeaders so it uses "put" instead of "set", to be compatible with
* names chosen by Hashtable and StringMap.
*
* 1.7 99/10/14-14:16:31 (cstevens)
* merge issues.
*
* 1.6 99/10/14-13:19:18 (cstevens)
* Merged changes between child workspace "/home/cstevens/ws/brazil/naws" and
* parent workspace "/export/ws/brazil/naws".
*
* 1.4.1.2 99/10/14-13:01:06 (cstevens)
* Documentation.
* Fold TimedThread and the default HttpSocketPool into this file, since they are
* not used outside of this file (at this time).
*
* 1.5 99/10/11-12:38:38 (suhler)
* Merged changes between child workspace "/home/suhler/brazil/naws" and
* parent workspace "/net/mack.eng/export/ws/brazil/naws".
*
* 1.4.1.1 99/10/08-16:54:45 (cstevens)
* documentation
* Move logic for removing point-to-point headers into the HttpRequest as a
* static method.
*
* 1.4 99/10/07-13:17:55 (cstevens)
* Documentation for HttpRequest (in progress).
*
* 1.3.1.1 99/10/06-12:31:57 (suhler)
* comment out debugging
*
* 1.3 99/09/15-15:57:16 (cstevens)
* debugging
*
* 1.2 99/09/15-14:52:02 (cstevens)
* import *;
*
* 1.2 99/09/15-14:39:36 (Codemgr)
* SunPro Code Manager data about conflicts, renames, etc...
* Name history : 2 1 request/HttpRequest.java
* Name history : 1 0 util/http/HttpRequest.java
*
* 1.1 99/09/15-14:39:35 (cstevens)
* date and time created 99/09/15 14:39:35 by cstevens
*
*/
package sunlabs.brazil.util.http;
import sunlabs.brazil.util.regexp.Regexp;
import java.io.ByteArrayOutputStream;
import java.io.EOFException;
import java.io.UnsupportedEncodingException;
import java.io.IOException;
import java.io.BufferedOutputStream;
import java.io.BufferedInputStream;
import java.io.InputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.PushbackInputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.UnknownHostException;
import java.net.Socket;
import java.util.Properties;
import java.util.Vector;
import java.util.StringTokenizer;
/**
* Sends an HTTP request to some target host and gets the answer back.
* Similar to the URLConnection
class.
*
* Caches connections to hosts, and reuses them if possible. Talks * HTTP/1.1 to the hosts, in order to keep alive connections as much * as possible. *
* The sequence of events for using an HttpRequest
is similar
* to how URLConnection
is used:
*
HttpRequest
object is constructed.
*
* In the common case, all the setup parameters are initialized to sensible
* values and won't need to be modified. Most users will only need to
* construct a new HttpRequest
object and then call
* getInputStream
to read the contents. The rest of the
* member variables and methods are only needed for advanced behavior.
*
* The HttpRequest
class is intended to be a replacement for the
* URLConnection
class. It operates at a lower level and makes
* fewer decisions on behavior. Some differences between the
* HttpRequest
class and the URLConnection
class
* follow:
System.getProperties
) that modify the behavior of
* HttpRequest
.
* HttpRequest
does not automatically follow redirects.
* HttpRequest
does not turn HTTP responses with a status
* code other than "200 OK" into IOExceptions
. Sometimes
* it may be necessary and even quite useful to examine the results of
* an "unsuccessful" HTTP request.
* HttpRequest
issues HTTP/1.1 requests and handles
* HTTP/0.9, HTTP/1.0, and HTTP/1.1 responses.
* URLConnection
class leaks open sockets if there is
* an error reading the response or if the target does not use
* Keep-Alive, and depends upon the garabge collector to close and
* release the open socket in these cases, which is unreliable because
* it may lead to intermittently running out of sockets if the garbage
* collector doesn't run often enough.
* URLConnection
, there are bugs in its implementation
* (as of JDK1.2) that may cause the program to block forever and/or
* read an insufficient amount of data before trying to reuse the
* underlying socket.
*
* A number of the fields in the HttpRequest
object are public,
* by design. Most of the methods mentioned above are convenience methods;
* the underlying data fields are meant to be accessed for more complicated
* operations, such as changing the socket factory or accessing the raw HTTP
* response line. Note however, that the order of the methods described
* above is important. For instance, the user cannot examine the response
* headers (by calling getResponseHeader
or by examining the
* variable responseHeaders
) without first having connected to
* the host.
*
* However, if the user wants to modify the default behavior, the
* HttpRequest
uses the value of a number of variables and
* automatically sets some HTTP headers when sending the request. The user
* can change these settings up until the time connect
is
* called, as follows:
HttpRequest
issues HTTP/1.1
* requests. The user can set version
to change this to
* HTTP/1.0.
* method
is null
(the default),
* the HttpRequest
decides what the HTTP request method
* should be as follows: If the user has called
* getOutputStream
, then the method will be "POST",
* otherwise the method will be "GET".
* connect
opens a connection to the proxy.
* connect
opens a connection to the remote host.
* HttpRequest
.
* HttpRequest
sets the appropriate connection
* header to "Keep-Alive" to keep alive the connection to the host or
* proxy (respectively). By setting the appropriate connection header,
* the user can control whether the HttpRequest
tries to
* use Keep-Alives.
* HttpRequest
,
* and is set automatically if the user does not set it.
* getOutputStream
and writes some
* data to it, the "Content-Length" header will be set to the amount of
* data that has been written at the time that connect
* is called.
* close
to release
* the socket. Although it happens under the covers, the user should be
* aware that if an IOException occurs or once data has been read normally
* from the remote host, close
is called automatically. This
* is to ensure that the minimal number of sockets are left open at any time.
*
* The input stream that getInputStream
provides automatically
* hides whether the remote host is providing HTTP/1.1 "chunked" encoding or
* regular streaming data. The user can simply read until reaching the
* end of the input stream, which signifies that all the available data from
* this request has been read. If reading from a "chunked" source, the
* data is automatically de-chunked as it is presented to the user. Currently,
* no access is provided to the underlying raw input stream.
* @version %I%
*/
public class HttpRequest {
/**
* Timeout (in msec) to drain an input stream that has been closed before
* the entire HTTP response has been read.
*
* If the user closes the HttpRequest
before reading all of
* the data, but the remote host has agreed to keep this socket alive, we
* need to read and discard the rest of the response before issuing a new
* request. If it takes longer than DRAIN_TIMEOUT
to read
* and discard the data, we will just forcefully close the connection to
* the remote host rather than waiting to read any more.
*
* Default value is 10000. */ public static int DRAIN_TIMEOUT = 10000; /** * Maximum length of a line in the HTTP response headers (sanity check). *
* If an HTTP response line is longer than this, the response is * considered to be malformed. *
* Default value is 2000. */ public static int LINE_LIMIT = 2000; /** * The default HTTP version string to send to the remote host when * issuing requests. *
* The default value can be overridden on a per-request basis by
* setting the version
instance variable.
*
* Default value is "HTTP/1.1".
*
* @see #version
*/
public static String defaultHTTPVersion = "HTTP/1.1";
/**
* A list of socketPools. Each socket pool is responible for
* managing connections for a particular type of transport. The
* default pool handles standard TCP sockets. Additional transport
* providers may me added (see {@link appendPool} and {@link prependPool}
* below.
*/
public static Vector socketPools = new Vector();
static {
socketPools.addElement(new SimpleHttpSocketPool());
}
/**
* The cache of idle sockets to use for this request.
* Once a request has been handled, the
* now-idle socket can be remembered and reused later if another HTTP
* request is made to the same remote host.
*/
HttpSocketPool pool = null; // the socket pool to use for this target
/**
* The URL used to construct this HttpRequest
.
*/
public URL url;
/**
* The host extracted from the URL used to construct this
* HttpRequest
.
*
* @see #url
*/
public String host;
/**
* The port extracted from the URL used to construct this
* HttpRequest
.
*
* @see #url
*/
public int port;
/**
* If non-null
, sends this HTTP request via the specified
* proxy host and port.
* May be changed
* by the user at any time up until the HTTP request is actually sent.
*
* @see #proxyPort
* @see #setProxy
* @see #connect
*/
public String proxyHost = null;
/**
* The proxy port.
*
* @see #proxyHost
*/
public int proxyPort;
protected boolean connected;
boolean eof;
HttpSocket hs;
/**
* The HTTP method, such as "GET", "POST", or "HEAD".
*
* May be set by the user at any time up until the HTTP request is * actually sent. */ public String method; /** * The HTTP version string. *
* Initialized from defaultHTTPVersion
, but may be changed
* by the user at any time up until the HTTP request is actually sent.
*/
public String version;
/**
* The headers for the HTTP request. All of these headers will be sent
* when the connection is actually made.
*/
public MimeHeaders requestHeaders;
/**
* setting this to "true" causing all http headers to be printed
* on the standard error stream; useful for debugging client/server
* interactions.
*/
public static boolean displayAllHeaders = false;
public boolean displayHeaders = false; // set on a per-request basis
ByteArrayOutputStream postData;
String uri;
String connectionHeader;
HttpInputStream in;
InputStream under;
/**
* The status line from the HTTP response. This field is not valid until
* after connect
has been called and the HTTP response has
* been read.
*/
public String status;
/**
* The headers that were present in the HTTP response. This field is
* not valid until after connect
has been called and the
* HTTP response has been read.
*/
public MimeHeaders responseHeaders;
/*
* Cached value of keep-alive from the response headers.
*/
boolean keepAlive;
/**
* An artifact of HTTP/1.1 chunked encoding. At the end of an HTTP/1.1
* chunked response, there may be more MimeHeaders. It is only possible
* to access these MimeHeaders after all the data from the input stream
* returned by getInputStream
has been read. At that point,
* this field will automatically be initialized to the set of any headers
* that were found. If not reading from an HTTP/1.1 chunked source, then
* this field is irrelevant and will remain null
.
*/
public MimeHeaders responseTrailers;
/**
* How many seconds to wait for a target ressponse.
* 0 (the default) waits forever.
*/
int requestTimeout=0;
/**
* Creates a new HttpRequest
object that will send an
* HTTP request to fetch the resource represented by the URL.
*
* The host specified by the URL is not contacted at this time.
*
* @param url
* A fully qualified "http:" URL.
*
* @throws IllegalArgumentException
* if
* The host specified by the URL is not contacted at this time.
*
* @param url
* A string representing a fully qualified "http:" URL.
*
* @throws IllegalArgumentException
* if
* If this method is called, it must be called before
* If this method is called, it must be called before
* If this method is called, it must be called before
* If this method is called, it must be called before
* Currently the implementation is not as good as it could be. The
* user should avoid uploading huge amounts of data, for some definition
* of huge.
*/
public OutputStream
getOutputStream() throws IOException {
if (postData == null) {
postData = new ByteArrayOutputStream();
}
return postData;
}
/**
* Connect to the target host (or proxy), send the request, and read the
* response headers. Any setup routines must be called before the call
* to this method, and routines to examine the result must be called after
* this method.
*
*
* @throws UnknownHostException
* if the target host (or proxy) could not be contacted.
*
* @throws IOException
* if there is a problem writing the HTTP request or reading
* the HTTP response headers.
*/
public void
connect() throws UnknownHostException, IOException {
Timer watchdog=null;
if (connected) {
return;
}
connected = true;
prepareHeaders();
openSocket(true);
if (requestTimeout > 0) {
watchdog = new Timer(requestTimeout, in);
watchdog.start();
}
try {
try {
sendRequest();
readStatusLine();
} catch (IOException e) {
if (hs.firstTime) {
throw e;
}
closeSocket(false);
openSocket(false);
sendRequest();
readStatusLine();
}
responseHeaders.read(in);
if (watchdog != null) {
watchdog.cancel();
watchdog.interrupt();
}
if (displayHeaders) {
System.err.println(status);
responseHeaders.print(System.err);
System.err.println();
}
} catch (IOException e) {
closeSocket(false);
if (watchdog!= null && watchdog.timedOut()) {
throw new TimeoutException("waiting for server response");
} else {
throw e;
}
}
parseResponse();
}
/**
* Set the timeout for getting a remote response.
* If the origin server hasn't responded with at least the
* response headers in this time, terminate the request.
* The timeout may be up until a call to connect().
* A value of '0' turns off the timeout.
*
* If a timeout occurs, a TimeoutException is thrown.
*
* @param sec timeout, in seconds.
*/
public void setTimeout(int sec) {
requestTimeout=sec;
}
void
prepareHeaders() {
if (postData != null) {
if (method == null) {
method = "POST";
}
setRequestHeader("Content-Length",
Integer.toString(postData.size()));
}
if (method == null) {
method = "GET";
}
if (proxyHost == null) {
uri = url.getFile();
connectionHeader = "Connection";
} else {
uri = url.toString();
connectionHeader = "Proxy-Connection";
}
requestHeaders.putIfNotPresent(connectionHeader, "Keep-Alive");
requestHeaders.putIfNotPresent("Host", host + ":" + port);
}
void
openSocket(boolean reuse) throws IOException {
String targetHost;
int targetPort;
if (proxyHost != null) {
targetHost = proxyHost;
targetPort = proxyPort;
} else {
targetHost = host;
targetPort = port;
}
hs = pool.get(targetHost, targetPort, reuse);
under = hs.in;
in = new HttpInputStream(under);
}
void
closeSocket(boolean reuse) {
if (hs != null) {
HttpSocket tmp = hs;
hs = null;
keepAlive &= reuse;
/*
* Before we can reuse a keep-alive socket, we must first drain
* the input stream if there is any data left in it. The soft
* 'eof' flag will have been set if we have already read all the
* data that we're supposed to read and the socket is ready to be
* recycled now.
*/
if (keepAlive && !eof) {
new BackgroundCloser(tmp, under, DRAIN_TIMEOUT).start();
} else {
pool.close(tmp, keepAlive);
}
}
}
class BackgroundCloser extends Thread {
HttpSocket hs;
InputStream in;
int timeout;
Killer killer;
BackgroundCloser(HttpSocket hs, InputStream in, int timeout) {
this.hs = hs;
this.in = in;
this.timeout = timeout;
}
public void start() {
killer = new Killer(this);
killer.start();
super.start();
}
public void run() {
try {
byte[] buf = new byte[4096];
while (true) {
if (in.read(buf, 0, buf.length) < 0) {
break;
}
}
} catch (IOException e) {
keepAlive = false;
}
pool.close(hs, keepAlive);
killer.interrupt();
}
}
static class Killer extends Thread {
BackgroundCloser b;
int timeout;
Killer(BackgroundCloser b) {
this.b = b;
}
public void run() {
try {
Thread.sleep(b.timeout);
b.interrupt();
} catch (Exception e) {}
}
}
void
sendRequest() throws IOException {
if (displayHeaders) {
System.err.print(method + " " + uri + " " + version + "\r\n");
requestHeaders.print(System.err);
System.err.print("\r\n");
}
PrintStream p = new PrintStream(hs.out);
p.print(method + " " + uri + " " + version + "\r\n");
requestHeaders.print(p);
p.print("\r\n");
if (postData != null) {
postData.writeTo(p);
postData = null; // Release memory.
}
p.flush();
}
void
readStatusLine() throws IOException {
while (true) {
status = in.readLine(LINE_LIMIT);
if (status == null) {
throw new EOFException();
}
if (status.startsWith("HTTP/1.1 100")
|| status.startsWith("HTTP/1.0 100")) {
/*
* Ignore the "100 Continue" response that some HTTP/1.1
* servers send. We can't depend upon it being sent, because
* we might be talking to an HTTP/1.0 server or an HTTP/1.1
* server that doesn't send the "100 Continue" response, so
* we can't use the response for any decision making, such as
* not sending the post data.
*
* www.u-net.com sends "HTTP/1.0 100 Continue"!
*/
while (true) {
status = in.readLine();
if ((status == null) || (status.length() == 0)) {
break;
}
}
} else if (status.startsWith("HTTP/1.")) {
return;
} else if (status.length() == 0) {
// System.out.println(this + ": got a blank line");
} else if (status.length() == LINE_LIMIT) {
throw new IOException("malformed server response");
} else if (hs.firstTime) {
/*
* Some servers don't send back any headers, even if they
* accept a HTTP/1.0 or greater request! We have to push
* back this line, so it can be re-read as the body.
* Since this is coming back with no headers, the content
* length will be unknown and so the socket will be closed.
*/
// System.out.println("receiving HTTP/0.9 response");
PushbackInputStream pin = new PushbackInputStream(hs.in,
status.length() + 4);
pin.unread('\n');
pin.unread('\r');
for (int i = status.length(); --i >= 0; ) {
pin.unread(status.charAt(i));
}
/*
* And push back a blank line, so the user thinks it got to
* the end of the headers
*/
pin.unread('\n');
pin.unread('\r');
status = "HTTP/1.0 200 OK";
hs.in = pin;
under = pin;
in = new HttpInputStream(under);
break;
} else {
/*
* If we see funny responses (missing headers, etc.) from a
* socket that we've reused, then we probably got out of sync
* with the remote host (e.g., didn't read enough from the
* last response), and should abort this request.
*/
throw new IOException("malformed server response");
}
}
}
void
parseResponse() {
String str;
str = getResponseHeader(connectionHeader);
if (str != null) {
keepAlive = str.equalsIgnoreCase("Keep-Alive");
} else if (status.startsWith("HTTP/1.1")) {
keepAlive = true;
} else {
keepAlive = false;
}
str = getResponseHeader("Transfer-Encoding");
if ((str != null) && str.equals("chunked")) {
under = new UnchunkingInputStream(this);
in = new RecycleInputStream(this, under);
return;
}
int contentLength = getContentLength();
if (contentLength < 0) {
/*
* Some servers leave off the content length for return codes
* known to require no content.
*/
if (status.indexOf("304") > 0 || status.indexOf("204") > 0) {
responseHeaders.put("Content-Length", "0");
contentLength = 0;
}
}
if ((contentLength == 0) || method.equals("HEAD")) {
under = new NullInputStream();
in = new HttpInputStream(under);
closeSocket(keepAlive);
} else if (contentLength > 0) {
under = new LimitInputStream(this, contentLength);
in = new RecycleInputStream(this, under);
} else {
keepAlive = false;
in = new RecycleInputStream(this, under);
}
}
/**
* Gets an input stream that can be used to read the body of the
* HTTP response. Unlike the other convenience methods for accessing
* the HTTP response, this one automatically connects to the
* target host if not already connected.
*
* The input stream that
* The user can either call this method or
* When all the response data is read from the input stream, the
* input stream is automatically closed (recycled). If the user is
* not going to read all the response data from input stream, the user
* must call
* Note that the input stream is automatically closed if the input
* stream throws an exception while reading.
*
* In order to interrupt a pending I/O operation in another thread
* (for example, to stop a request that is taking too long), the user
* should call
* Closing the request multiple times is allowed.
*
* In order to make sure that open sockets are not left lying around
* the user should use code similar to the following:
*
* If this method is called, it must be called after
* For advanced features, the user can directly access the
*
* If this method is called, it must be called after
* For advanced features, such as enumerating over all response headers,
* the user should directly access the
* If this method is called, it must be called after
* There are many limitations: only HTTP GET requests are supported, the
* output filename is derived autmatically from the URL and can't be
* overridden, if a destination file already exists, it is overwritten.
*/
public static void
main(String[] args) throws Exception {
String proxyHost = null;
int proxyPort = 0;
boolean isVerbose = false;
if (args.length == 0) {
System.err.println("Usage: [-v(erbose) -h(headers) -p
* It is this method's responsibility to to fill in all the public
* member variables of the
* For each call to this method, there should eventually be a call to
*
* It is this method's responsibility to release resources used
* by the
* After calling this method, the user should not refer to the specified
* url
is not an "http:" URL.
*/
public HttpRequest(URL url) {
init(url);
}
void init(URL url) {
pool = null;
for(int i=0;iurl
is not a well-formed "http:" URL.
* @deprecated Use the static getRequest method instead
*/
public
HttpRequest(String url) {
this(toURL(url));
}
/*
* Artifact of Java: cannot implement HttpRequest(String) as follows
* because this(new URL(url))
must be first line in
* constructor; it can't be inside of try statement:
*
* public HttpRequest(String url) {
* try {
* this(new URL(url));
* } catch (MalformedURLException e) {
* throw new IllegalArgumentException(url);
* }
* }
*/
static URL
toURL(String url) {
try {
return new URL(url);
} catch (MalformedURLException e) {
throw new IllegalArgumentException(url);
}
}
/**
* Sets the HTTP method to the specified value. Some of the normal
* HTTP methods are "GET", "POST", "HEAD", "PUT", "DELETE", but the
* user can set the method to any value desired.
* connect
* is called. Otherwise it will have no effect.
*
* @param method
* The string for the HTTP method, or null
to
* allow this HttpRequest
to pick the method for
* itself.
*/
public void
setMethod(String method)
{
this.method = method;
}
/**
* Sets the proxy for this request. The HTTP proxy request will be sent
* to the specified proxy host.
* connect
* is called. Otherwise it will have no effect.
*
* @param proxyHost
* The proxy that will handle the request, or null
* to not use a proxy.
*
* @param proxyPort
* The port on the proxy, for the proxy request. Ignored if
* proxyHost
is null
.
*/
public void
setProxy(String proxyHost, int proxyPort) {
this.proxyHost = proxyHost;
if (proxyPort > 0) {
this.proxyPort = proxyPort;
} else {
this.proxyPort = pool.getDefaultPort();
}
}
/**
* Sets a request header in the HTTP request that will be issued. In
* order to do fancier things like appending a value to an existing
* request header, the user may directly access the
* requestHeaders
variable.
* connect
* is called. Otherwise it will have no effect.
*
* @param key
* The header name.
*
* @param value
* The value for the request header.
*
* @see #requestHeaders
*/
public void
setRequestHeader(String key, String value) {
requestHeaders.put(key, value);
}
/**
* Gets an output stream that can be used for uploading data to the
* host.
* connect
* is called. Otherwise it will have no effect.
* getInputStream
provides
* automatically hides the differences between "Content-Length", no
* "Content-Length", and "chunked" for HTTP/1.0 and HTTP/1.1 responses.
* In all cases, the user can simply read until reaching the end of the
* input stream, which signifies that all the available data from this
* request has been read. (If reading from a "chunked" source, the data
* is automatically de-chunked as it is presented to the user. There is
* no way to access the raw underlying stream that contains the HTTP/1.1
* chunking packets.)
*
* @throws IOException
* if there is problem connecting to the target.
*
* @see #connect
*/
public HttpInputStream
getInputStream() throws IOException {
connect();
return in;
}
/**
* Gracefully closes this HTTP request when user is done with it.
* close
on the
* input stream obtained from the getInputStream
* method -- the results are the same.
* close
to
* release the resources associated with the open request. Otherwise
* the program may consume all available sockets, waiting forever for
* the user to finish reading.
* disconnect
or interrupt the blocked thread.
* The user should not call close
in this case because
* close
will not interrupt the pending I/O operation.
*
* OutputStream out = ...
* HttpRequest http = HttpRequest.getRequest("http://bob.com/index.html");
* try {
* HttpInputStream in = http.getInputStream();
* in.copyTo(out);
* } finally {
* // Copying to "out" could have failed. Close "http" in case
* // not all the data has been read from it yet.
* http.close();
* }
*
*/
public void
close() {
closeSocket(true);
}
/**
* Interrupts this HTTP request. Can be used to halt an in-progress
* HTTP request from another thread, by causing it to
* throw an InterruptedIOException
during the connect
* or while reading from the input stream, depending upon what state
* this HTTP request is in when it is disconnected.
*
* @see #close
*/
public void
disconnect() {
closeSocket(false);
}
/**
* Gets the HTTP response status code. From responses like:
*
* HTTP/1.0 200 OK
* HTTP/1.0 401 Unauthorized
*
* this method extracts the integers 200
and 401
* respectively. Returns -1
if the response status code
* was malformed.
* connect
* has been called. Otherwise the information is not yet available and
* this method will return -1
.
* status
variable.
*
* @return The integer status code from the HTTP response.
*
* @see #connect
* @see #status
*/
public int
getResponseCode() {
try {
int start = status.indexOf(' ') + 1;
int end = status.indexOf(' ', start + 1);
if (end < 0) {
/*
* Sometimes the status line has the status code but no
* status phrase.
*/
end = status.length();
}
return Integer.parseInt(status.substring(start, end));
} catch (Exception e) {
return -1;
}
}
/**
* Gets the value associated with the given case-insensitive header name
* from the HTTP response.
* connect
* has been called. Otherwise the information is not available and
* this method will return null
.
* responseHeaders
* variable.
*
* @param key
* The case-insensitive name of the response header.
*
* @return The value associated with the given name, or null
* if there is no such header in the response.
*
* @see #connect
* @see #responseHeaders
*/
public String
getResponseHeader(String key) {
return responseHeaders.get(key);
}
/**
* Convenience method to get the "Content-Length" header from the
* HTTP response.
* connect
* has been called. Otherwise the information is not available and
* this method will return -1
.
*
* @return The content length specified in the response headers, or
* -1
if the length was not specified or malformed
* (not a number).
*
* @see #connect
* @see #getResponseHeader
*/
public int
getContentLength() {
try {
return Integer.parseInt(responseHeaders.get("Content-Length"));
} catch (Exception e) {
return -1;
}
}
/**
* Removes all the point-to-point (hop-by-hop) headers from
* the given mime headers.
*
* @param headers
* The mime headers to be modified.
*
* @param response
* true
to remove the point-to-point response
* headers, false
to remove the point-to-point
* request headers.
*
* @see RFC 2068
*/
public static void
removePointToPointHeaders(MimeHeaders headers, boolean response) {
headers.remove("Connection");
headers.remove("Proxy-Connection");
headers.remove("Keep-Alive");
headers.remove("Upgrade");
if (response == false) {
headers.remove("Proxy-Authorization");
} else {
headers.remove("Proxy-Authenticate");
headers.remove("Public");
headers.remove("Transfer-Encoding");
}
}
/**
* Convenience method for adding request headers by looking them
* up in a properties object.
* @param tokens a white space delimited set of tokens that refer
* to headers that will be added to the HTTP request.
* @param props Keys of the form [token].name
and
* [token].value
are used to lookup additional
* HTTP headers to be added to the request.
* @return The number of headers added to the request
* @see #setRequestHeader
*/
public int
addHeaders(String tokens, Properties props) {
int count = 0;
StringTokenizer st = new StringTokenizer(tokens);
while (st.hasMoreTokens()) {
String token = st.nextToken();
String name = props.getProperty(token + ".name");
String value = props.getProperty(token + ".value");
if (name!=null && value!=null) {
setRequestHeader(name, value);
count++;
}
}
return count;
}
/**
* Get the content as a string. Uses the character
* encoding specified in "encoding", if specified, or the
* encoding implied by the http headers.
* @param encoding The ISO character encoding to use.
* @return The content as a string.
*/
public String getContent(String encoding)
throws IOException, UnsupportedEncodingException {
HttpInputStream in = getInputStream();
ByteArrayOutputStream out = new ByteArrayOutputStream();
in.copyTo(out);
in.close();
String result = out.toString(encoding!=null ? encoding : getEncoding());
out.close();
return result;
}
/**
* Return the content as a string.
*/
public String getContent()
throws IOException, UnsupportedEncodingException {
return getContent(null);
}
/**
* Get the ISO character encoding (if any) associated with this
* text stream,
* or the default http charset if none found.
* Response headers must be available.
*/
public String getEncoding() {
return getEncoding(responseHeaders);
}
public static final String DEFAULT_CHARSET="ISO-8859-1";
static final Regexp encExp =
new Regexp("^text/.*;[ \t]*charset=([^ \t;]*)",true);
public static String getEncoding(MimeHeaders headers) {
String type = headers.get("content-type");
if (type == null) {
return DEFAULT_CHARSET;
} else {
type = type.trim();
}
String subs[] = new String[2];
if (encExp.match(type, subs)) {
return subs[1];
} else {
return DEFAULT_CHARSET;
}
}
/**
* Grab http document(s) and save them in the filesystem.
* This is a simple batch HTTP url fetcher. Usage:
*
* java ... sunlabs.brazil.request.HttpRequest [-v(erbose)] [-h(headers)] [-p
*
*
* The options and url's may be given in any order. Use "-p" by itself
* to disable the proxy for all following requests.
* HttpRequest
* class.
*
* @author Colin Stevens (colin.stevens@sun.com)
* @version %I%
*/
public interface HttpSocketPool {
/**
* Return true if this is one of the url's my pool can handle
*/
public boolean isMine(URL url);
/**
* Return the default port (this doesn't belong here)
*/
public int getDefaultPort();
/**
* Returns an HttpSocket
that can be used to communicate
* with the specified port on the named host.
* HttpSocket
before returning.
* close
when the HttpSocket
isn't needed
* anymore.
*
* @param host
* The host name.
* @param port
* The port number.
* @param reuse
* true
to request that this pool attempt to
* find and reuse an existing idle connection,
* false
* to request that this pool establish a new connection to
* the named host.
*
* @return The HttpSocket
.
*
* @throws IOException
* if there is a problem connecting the specified port on
* the named host. The IOException
s (and
* subclasses) that might be thrown depend upon how the
* socket connection is established. See the socket
* documentation for further details. Some subclasses that
* might be thrown are as follows:
*
* @throws java.io.UnknownHostException
* if the host name cannot be resolved.
*
* @throws java.io.ConnectionException
* if the named host is not listening on "port".
*
* @throws java.io.InterruptedIOException
* if the connection times out or this thread is
* interrupted by Thread.interrupt
.
*/
public HttpSocket
get(String host, int port, boolean reuse)
throws IOException;
/**
* Releases an HttpSocket
to this pool when it is not
* in use any more.
* HttpSocket
, e.g. closing the underlying socket.
* HttpSocket
any more.
*
* @param hs
* The HttpSocket
to release.
*
* @param reuse
* true
: the specified HttpSocket
* should be put back into the idle pool,
* false
* if it should be released immediately.
*/
public void close(HttpSocket hs, boolean reuse);
}
public static class
SimpleHttpSocketPool implements Runnable, HttpRequest.HttpSocketPool {
public int maxIdle; // size of the socket pool
public int maxAge; // max age of idle socket (mseconds)
public int reapInterval; // interval (in msec) to run reaper thread
// pool of idle connections
Vector idle = new Vector();
/**
* Start the background thread that removes old connections
*/
Thread reaper;
/**
* Create a socket pool. A socket pool is responsible for
* providing the underlying "socket" like transport for the
* http protocol, and managing the lifetimes of the connections.
*
* @param maxIdle (ms) Max number of idle connections
* @param maxAge (ms) Max idle time for a connection
* @param reapInterval (ms) How often to check for expired sockets
*/
public
SimpleHttpSocketPool(int maxIdle, int maxAge, int reapInterval) {
this.maxIdle = maxIdle;
this.maxAge = maxAge;
this.reapInterval = reapInterval;
reaper = new Thread(this);
reaper.setDaemon(true);
reaper.start();
}
/**
* Use the default pool settings
*/
public
SimpleHttpSocketPool() {
this(20, 30000, 10000);
}
public boolean isMine(URL url) {
return url.getProtocol().equals("http");
}
public int getDefaultPort() {
return 80;
}
/**
* Get a potentially "pooled" target object.
* Call this instead of the constructor to use the pool.
* @param host the target content server (or web proxy)
* @param port target web server port
*/
public HttpSocket
get(String host, int port, boolean reuse)
throws IOException, UnknownHostException {
host = host.toLowerCase();
if (reuse) {
synchronized (idle) {
/*
* Start at end to reuse the most recent socket, which is
* hopefully the most likely to still be alive.
*/
int i = idle.size();
while (--i >= 0) {
HttpSocket hs = (HttpSocket) idle.elementAt(i);
// System.err.println("Checking pool item : " + hs);
if (hs.host.equals(host) && (hs.port == port)) {
idle.removeElementAt(i);
// System.err.println(" reusing: " + hs);
hs.timesUsed++;
return hs;
}
}
}
}
Socket s = getSocket(host, port);
HttpSocket hs = new HttpSocket(host, port, s);
// System.err.println(" new: " + hs);
return hs;
}
/**
* Subclasses can override this
*/
protected Socket
getSocket(String host, int port) throws IOException {
return new Socket(host, port);
}
public void
close(HttpSocket hs, boolean reuse) {
if (reuse) {
// System.err.println(" recycling: " + hs);
synchronized (idle) {
if (idle.size() >= maxIdle) {
HttpSocket bump = (HttpSocket) idle.firstElement();
idle.removeElementAt(0);
bump.close();
}
hs.firstTime = false;
hs.lastUsed = System.currentTimeMillis();
idle.addElement(hs);
}
} else {
// System.err.println(" closing: " + hs);
hs.close();
}
}
int lastSize = -1;
public void
run() {
while(true) {
try {
Thread.sleep(reapInterval);
} catch (InterruptedException e) {
continue;
}
/*
* Expire after age seconds.
* Start looking at the front - the "oldest" sockets first.
*/
long expired = System.currentTimeMillis() - maxAge;
// System.out.println("Reaper...");
synchronized (idle) {
while (idle.size() > 0) {
HttpSocket hs = (HttpSocket) idle.firstElement();
if (hs.lastUsed >= expired) {
break;
}
idle.removeElementAt(0);
// System.out.println("Reaping: " + hs);
hs.close();
}
}
}
}
public String
toString() {
StringBuffer sb = new StringBuffer("SimpleHttpSocketPool ");
synchronized (idle) {
if (idle == null) {
return "(null)";
}
for (int i = 0; i < idle.size(); i++) {
HttpSocket hs = (HttpSocket) idle.elementAt(i);
sb.append(hs.toString() + ", ");
}
}
return sb.toString();
}
}
/**
* This class is used as the bag of information kept about a open, idle
* socket. It is Used by the SimpleSocketPool.
* a better place for it is found.
*/
public static class HttpSocket {
String host;
int port;
Socket sock;
boolean firstTime = true;
long lastUsed;
int timesUsed = 1;
InputStream in;
OutputStream out;
private static int count = 0;
private int serial;
public
HttpSocket(String host, int port, Socket sock)
throws IOException, UnknownHostException {
this.sock = sock;
this.host = host;
this.port = port;
in = new BufferedInputStream(sock.getInputStream());
out = new BufferedOutputStream(sock.getOutputStream());
serial = count++;
}
public void
close() {
in = null;
out = null;
if (sock != null) {
try {
sock.close();
} catch (IOException e) {}
}
sock = null;
}
public String
toString() {
return host + ":" + port + " serial: " + serial + " used: " +
timesUsed + " (" + sock + ")";
}
}
/**
* Timeout occured waiting for a socket response
*/
public static class TimeoutException extends IOException {
public TimeoutException(String msg) {
super(msg);
}
}
}
class RecycleInputStream extends HttpInputStream {
HttpRequest target;
boolean closed;
public
RecycleInputStream(HttpRequest target, InputStream in) {
super(in);
this.target = target;
}
/**
* Reads from the underlying input stream, which might be a raw
* input stream, a limit input stream, or an unchunking input stream.
* If we get EOF or there is an error reading, close the socket.
*/
public int
read() throws IOException {
if (closed) {
return -1;
}
try {
int ch = in.read();
if (ch < 0) {
close(false);
}
return ch;
} catch (IOException e) {
close(false);
throw e;
}
}
public int
read(byte[] buf, int off, int len) throws IOException {
if (closed) {
return -1;
}
try {
int count = in.read(buf, off, len);
if (count < 0) {
close(false);
}
return count;
} catch (IOException e) {
close(false);
throw e;
}
}
private void
close(boolean reuse) {
if (closed == false) {
closed = true;
target.closeSocket(reuse);
}
}
public void
close() {
close(true);
}
}
class NullInputStream extends InputStream {
public int
read() {
return -1;
}
public int
read(char[] buf, int off, int len) {
return -1;
}
}
class LimitInputStream extends HttpInputStream {
HttpRequest target;
int limit;
public
LimitInputStream(HttpRequest target, int limit) {
super(target.hs.in);
this.target = target;
this.limit = limit;
}
public int
read() throws IOException {
if (limit <= 0) {
return -1;
}
int ch = in.read();
if ((ch >= 0) && (--limit <= 0)) {
target.eof = true;
target.closeSocket(true);
}
return ch;
}
public int
read(byte[] buf, int off, int len) throws IOException {
if (limit <= 0) {
return -1;
}
len = Math.min(len, limit);
int count = in.read(buf, off, len);
if (count < 0) {
limit = 0;
return -1;
}
limit -= count;
if (limit <= 0) {
target.eof = true;
target.closeSocket(true);
}
return count;
}
}
class UnchunkingInputStream extends HttpInputStream {
HttpRequest target;
boolean eof;
int bytesLeft;
public
UnchunkingInputStream(HttpRequest target) {
super(target.in);
this.target = target;
}
public int
read() throws IOException {
if ((bytesLeft <= 0) && (getChunkSize() == false)) {
return -1;
}
bytesLeft--;
return in.read();
}
public int
read(byte[] buf, int off, int len) throws IOException {
int total = 0;
while (true) {
if ((bytesLeft <= 0) && (getChunkSize() == false)) {
break;
}
int count = super.read(buf, off, Math.min(bytesLeft, len));
total += count;
off += count;
bytesLeft -= count;
len -= count;
if ((len <= 0) || (available() == 0)) {
break;
}
}
return (total == 0) ? -1 : total;
}
private boolean
getChunkSize() throws IOException {
if (eof) {
return false;
}
/*
* Although HTTP/1.1 chunking spec says that there is one "\r\n"
* between chunks, some servers (for example, maps.yahoo.com)
* send more than one blank line between chunks. So, read and skip
* all the blank lines seen between chunks.
*/
String line;
do {
// Sanity check: limit chars when expecting a chunk size.
line = ((HttpInputStream) in).readLine(HttpRequest.LINE_LIMIT);
} while ((line != null) && (line.length() == 0));
try {
bytesLeft = Integer.parseInt(line.trim(), 16);
} catch (Exception e) {
throw new IOException("malformed chunk");
}
if (bytesLeft == 0) {
eof = true;
target.responseTrailers = new MimeHeaders((HttpInputStream) in);
target.eof = true;
target.closeSocket(true);
return false;
}
return true;
}
}
class Timer extends Thread {
int timeout; // input stream to close after timeout
InputStream close; // The stream to close
boolean timedOut=false;
public Timer(int seconds, InputStream close) {
this.close = close;
this.timeout = seconds;
System.out.println("Watchdog: " + seconds);
}
public void run() {
try {
sleep(timeout * 1000);
} catch (InterruptedException e) {} {}
if (close != null) {
timedOut=true;
try {
System.out.println("Watchdog: kill");
close.close();
} catch (IOException e) {}
}
}
public void cancel() {
close = null;
System.out.println("Watchdog: cancel");
}
public boolean timedOut() {
return timedOut;
}
}