/* * HttpRequest.java * * Brazil project web application toolkit, * export version: 2.3 * Copyright (c) 1999-2009 Sun Microsystems, Inc. * * Sun Public License Notice * * The contents of this file are subject to the Sun Public License Version * 1.0 (the "License"). You may not use this file except in compliance with * the License. A copy of the License is included as the file "license.terms", * and also available at http://www.sun.com/ * * The Original Code is from: * Brazil project web application toolkit release 2.3. * The Initial Developer of the Original Code is: cstevens. * Portions created by cstevens are Copyright (C) Sun Microsystems, Inc. * All Rights Reserved. * * Contributor(s): cstevens, drach, suhler. * * Version: 2.16 * Created by cstevens on 99/09/15 * Last modified by suhler on 09/06/03 10:51:18 * * Version Histories: * * 2.16 09/06/03-10:51:18 (suhler) * added timeouts for the remote server. * * 2.15 09/01/30-16:13:43 (suhler) * Redo: remove socket Factory, replace with socket pools * * 2.14 09/01/29-11:41:39 (suhler) * redo checkpoint, about to refactor socketpool stuff for https * * 2.13 08/12/24-08:14:52 (suhler) * Rearrange to make subclassing cleaner: * - pass "factory" to socket pool constructor * - add getPool() method to get our socket pool * - tweak main() for better diagnostics * * 2.12 08/03/17-09:45:45 (suhler) * use our own factory method * * 2.11 08/03/17-09:35:39 (suhler) * - add static instance creation factory. * - add socket pool diagnostics * * 2.10 08/02/04-13:50:51 (suhler) * Don't import the Server call. We don't need it, and it prevents this * class from being used in other contexts * . * * 2.9 07/06/25-11:24:21 (suhler) * make default charset public * * 2.8 07/06/22-13:55:49 (suhler) * change semantics of getEncoding: * - returns HTTP defualt encoding if noe can be found * - supplied encoding always wins * make public static getEncoding() method * * 2.7 07/03/26-13:53:18 (suhler) * doc updates * * 2.6 07/03/26-13:44:17 (suhler) * add sample main() to act as a simple "wget" * * 2.5 04/11/30-15:19:40 (suhler) * fixed sccs version string * * 2.4 03/08/01-16:18:01 (suhler) * fixes for javadoc * * 2.3 03/05/12-16:26:13 (suhler) * Merged changes between child workspace "/home/suhler/brazil/naws" and * parent workspace "/net/mack.eng/export/ws/brazil/naws". * * 1.20.1.1 03/04/17-10:03:47 (suhler) * no changes made * * 2.2 03/04/15-17:29:08 (drach) * Add protected modifier to variable connected so subclasses outside * package can access it. * * 2.1 02/10/01-16:36:54 (suhler) * version change * * 1.20 02/07/23-08:31:15 (suhler) * check for no content type * * 1.19 02/07/11-15:37:33 (suhler) * add encoding diagnostics * * 1.18 02/07/11-15:03:40 (suhler) * add getContent() and getEncoding() convenience methods for dealing * with charset encoding * * 1.17 02/04/29-17:04:41 (suhler) * added public static boolean displayAllHeaders to turn on * http header debugging during development. * * 1.16 02/04/24-13:36:21 (suhler) * doc lint * * 1.15 02/02/26-14:42:14 (suhler) * doc lint * * 1.14 02/02/26-14:32:38 (suhler) * typo * * 1.13 02/02/26-14:25:52 (suhler) * added "addHeaders" convenience method for adding http headers from * properties objects * * 1.12 00/07/11-11:23:47 (cstevens) * Some servers send "HTTP/1.0 100 Continue" in response to an HTTP/1.1 POST! * * 1.11 00/07/06-15:03:10 (cstevens) * Although HTTP/1.1 chunking spec says that there is one "\r\n" between * chunks, some servers (for example, maps.yahoo.com) send more than one blank * line between chunks. So, read and skip all the blank lines seen between * chunks. * * 1.10 99/11/30-09:48:14 (suhler) * remove diagnostics * * 1.9 99/11/09-20:23:23 (cstevens) * bugs revealed by writing tests. * * 1.8 99/10/26-18:56:38 (cstevens) * Change MimeHeaders so it uses "put" instead of "set", to be compatible with * names chosen by Hashtable and StringMap. * * 1.7 99/10/14-14:16:31 (cstevens) * merge issues. * * 1.6 99/10/14-13:19:18 (cstevens) * Merged changes between child workspace "/home/cstevens/ws/brazil/naws" and * parent workspace "/export/ws/brazil/naws". * * 1.4.1.2 99/10/14-13:01:06 (cstevens) * Documentation. * Fold TimedThread and the default HttpSocketPool into this file, since they are * not used outside of this file (at this time). * * 1.5 99/10/11-12:38:38 (suhler) * Merged changes between child workspace "/home/suhler/brazil/naws" and * parent workspace "/net/mack.eng/export/ws/brazil/naws". * * 1.4.1.1 99/10/08-16:54:45 (cstevens) * documentation * Move logic for removing point-to-point headers into the HttpRequest as a * static method. * * 1.4 99/10/07-13:17:55 (cstevens) * Documentation for HttpRequest (in progress). * * 1.3.1.1 99/10/06-12:31:57 (suhler) * comment out debugging * * 1.3 99/09/15-15:57:16 (cstevens) * debugging * * 1.2 99/09/15-14:52:02 (cstevens) * import *; * * 1.2 99/09/15-14:39:36 (Codemgr) * SunPro Code Manager data about conflicts, renames, etc... * Name history : 2 1 request/HttpRequest.java * Name history : 1 0 util/http/HttpRequest.java * * 1.1 99/09/15-14:39:35 (cstevens) * date and time created 99/09/15 14:39:35 by cstevens * */ package sunlabs.brazil.util.http; import sunlabs.brazil.util.regexp.Regexp; import java.io.ByteArrayOutputStream; import java.io.EOFException; import java.io.UnsupportedEncodingException; import java.io.IOException; import java.io.BufferedOutputStream; import java.io.BufferedInputStream; import java.io.InputStream; import java.io.FileOutputStream; import java.io.OutputStream; import java.io.PrintStream; import java.io.PushbackInputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.UnknownHostException; import java.net.Socket; import java.util.Properties; import java.util.Vector; import java.util.StringTokenizer; /** * Sends an HTTP request to some target host and gets the answer back. * Similar to the URLConnection class. *

* Caches connections to hosts, and reuses them if possible. Talks * HTTP/1.1 to the hosts, in order to keep alive connections as much * as possible. *

* The sequence of events for using an HttpRequest is similar * to how URLConnection is used: *

    *
  1. A new HttpRequest object is constructed. *
  2. The setup parameters are modified: * *
  3. The host (or proxy) is contacted and the HTTP request is issued: * *
  4. The response headers and body are examined: * *
  5. The connection is closed: * *
*

* In the common case, all the setup parameters are initialized to sensible * values and won't need to be modified. Most users will only need to * construct a new HttpRequest object and then call * getInputStream to read the contents. The rest of the * member variables and methods are only needed for advanced behavior. *

* The HttpRequest class is intended to be a replacement for the * URLConnection class. It operates at a lower level and makes * fewer decisions on behavior. Some differences between the * HttpRequest class and the URLConnection class * follow:

*

* A number of the fields in the HttpRequest object are public, * by design. Most of the methods mentioned above are convenience methods; * the underlying data fields are meant to be accessed for more complicated * operations, such as changing the socket factory or accessing the raw HTTP * response line. Note however, that the order of the methods described * above is important. For instance, the user cannot examine the response * headers (by calling getResponseHeader or by examining the * variable responseHeaders) without first having connected to * the host. *

* However, if the user wants to modify the default behavior, the * HttpRequest uses the value of a number of variables and * automatically sets some HTTP headers when sending the request. The user * can change these settings up until the time connect is * called, as follows:

*
variable {@link #version} *
By default, the HttpRequest issues HTTP/1.1 * requests. The user can set version to change this to * HTTP/1.0. *
variable {@link #method} *
If method is null (the default), * the HttpRequest decides what the HTTP request method * should be as follows: If the user has called * getOutputStream, then the method will be "POST", * otherwise the method will be "GET". *
variable {@link #proxyHost} *
If the proxy host is specified, the HTTP request will be * sent via the specified proxy: * Otherwise, the HTTP request will go directly to the host: *
header "Connection" or "Proxy-Connection" *
The HttpRequest sets the appropriate connection * header to "Keep-Alive" to keep alive the connection to the host or * proxy (respectively). By setting the appropriate connection header, * the user can control whether the HttpRequest tries to * use Keep-Alives. *
header "Host" *
The HTTP/1.1 protocol requires that the "Host" header be set * to the name of the machine being contacted. By default, this is * derived from the URL used to construct the HttpRequest, * and is set automatically if the user does not set it. *
header "Content-Length" *
If the user calls getOutputStream and writes some * data to it, the "Content-Length" header will be set to the amount of * data that has been written at the time that connect * is called. *
*
* Once all data has been read from the remote host, the underlying socket * may be automatically recycled and used again for subsequent requests to * the same remote host. If the user is not planning on reading all the data * from the remote host, the user should call close to release * the socket. Although it happens under the covers, the user should be * aware that if an IOException occurs or once data has been read normally * from the remote host, close is called automatically. This * is to ensure that the minimal number of sockets are left open at any time. *

* The input stream that getInputStream provides automatically * hides whether the remote host is providing HTTP/1.1 "chunked" encoding or * regular streaming data. The user can simply read until reaching the * end of the input stream, which signifies that all the available data from * this request has been read. If reading from a "chunked" source, the * data is automatically de-chunked as it is presented to the user. Currently, * no access is provided to the underlying raw input stream. * @version %I% */ public class HttpRequest { /** * Timeout (in msec) to drain an input stream that has been closed before * the entire HTTP response has been read. *

* If the user closes the HttpRequest before reading all of * the data, but the remote host has agreed to keep this socket alive, we * need to read and discard the rest of the response before issuing a new * request. If it takes longer than DRAIN_TIMEOUT to read * and discard the data, we will just forcefully close the connection to * the remote host rather than waiting to read any more. *

* Default value is 10000. */ public static int DRAIN_TIMEOUT = 10000; /** * Maximum length of a line in the HTTP response headers (sanity check). *

* If an HTTP response line is longer than this, the response is * considered to be malformed. *

* Default value is 2000. */ public static int LINE_LIMIT = 2000; /** * The default HTTP version string to send to the remote host when * issuing requests. *

* The default value can be overridden on a per-request basis by * setting the version instance variable. *

* Default value is "HTTP/1.1". * * @see #version */ public static String defaultHTTPVersion = "HTTP/1.1"; /** * A list of socketPools. Each socket pool is responible for * managing connections for a particular type of transport. The * default pool handles standard TCP sockets. Additional transport * providers may me added (see {@link appendPool} and {@link prependPool} * below. */ public static Vector socketPools = new Vector(); static { socketPools.addElement(new SimpleHttpSocketPool()); } /** * The cache of idle sockets to use for this request. * Once a request has been handled, the * now-idle socket can be remembered and reused later if another HTTP * request is made to the same remote host. */ HttpSocketPool pool = null; // the socket pool to use for this target /** * The URL used to construct this HttpRequest. */ public URL url; /** * The host extracted from the URL used to construct this * HttpRequest. * * @see #url */ public String host; /** * The port extracted from the URL used to construct this * HttpRequest. * * @see #url */ public int port; /** * If non-null, sends this HTTP request via the specified * proxy host and port. * May be changed * by the user at any time up until the HTTP request is actually sent. * * @see #proxyPort * @see #setProxy * @see #connect */ public String proxyHost = null; /** * The proxy port. * * @see #proxyHost */ public int proxyPort; protected boolean connected; boolean eof; HttpSocket hs; /** * The HTTP method, such as "GET", "POST", or "HEAD". *

* May be set by the user at any time up until the HTTP request is * actually sent. */ public String method; /** * The HTTP version string. *

* Initialized from defaultHTTPVersion, but may be changed * by the user at any time up until the HTTP request is actually sent. */ public String version; /** * The headers for the HTTP request. All of these headers will be sent * when the connection is actually made. */ public MimeHeaders requestHeaders; /** * setting this to "true" causing all http headers to be printed * on the standard error stream; useful for debugging client/server * interactions. */ public static boolean displayAllHeaders = false; public boolean displayHeaders = false; // set on a per-request basis ByteArrayOutputStream postData; String uri; String connectionHeader; HttpInputStream in; InputStream under; /** * The status line from the HTTP response. This field is not valid until * after connect has been called and the HTTP response has * been read. */ public String status; /** * The headers that were present in the HTTP response. This field is * not valid until after connect has been called and the * HTTP response has been read. */ public MimeHeaders responseHeaders; /* * Cached value of keep-alive from the response headers. */ boolean keepAlive; /** * An artifact of HTTP/1.1 chunked encoding. At the end of an HTTP/1.1 * chunked response, there may be more MimeHeaders. It is only possible * to access these MimeHeaders after all the data from the input stream * returned by getInputStream has been read. At that point, * this field will automatically be initialized to the set of any headers * that were found. If not reading from an HTTP/1.1 chunked source, then * this field is irrelevant and will remain null. */ public MimeHeaders responseTrailers; /** * How many seconds to wait for a target ressponse. * 0 (the default) waits forever. */ int requestTimeout=0; /** * Creates a new HttpRequest object that will send an * HTTP request to fetch the resource represented by the URL. *

* The host specified by the URL is not contacted at this time. * * @param url * A fully qualified "http:" URL. * * @throws IllegalArgumentException * if url is not an "http:" URL. */ public HttpRequest(URL url) { init(url); } void init(URL url) { pool = null; for(int i=0;iHttpRequest object that will send an * HTTP request to fetch the resource represented by the URL. *

* The host specified by the URL is not contacted at this time. * * @param url * A string representing a fully qualified "http:" URL. * * @throws IllegalArgumentException * if url is not a well-formed "http:" URL. * @deprecated Use the static getRequest method instead */ public HttpRequest(String url) { this(toURL(url)); } /* * Artifact of Java: cannot implement HttpRequest(String) as follows * because this(new URL(url)) must be first line in * constructor; it can't be inside of try statement: * * public HttpRequest(String url) { * try { * this(new URL(url)); * } catch (MalformedURLException e) { * throw new IllegalArgumentException(url); * } * } */ static URL toURL(String url) { try { return new URL(url); } catch (MalformedURLException e) { throw new IllegalArgumentException(url); } } /** * Sets the HTTP method to the specified value. Some of the normal * HTTP methods are "GET", "POST", "HEAD", "PUT", "DELETE", but the * user can set the method to any value desired. *

* If this method is called, it must be called before connect * is called. Otherwise it will have no effect. * * @param method * The string for the HTTP method, or null to * allow this HttpRequest to pick the method for * itself. */ public void setMethod(String method) { this.method = method; } /** * Sets the proxy for this request. The HTTP proxy request will be sent * to the specified proxy host. *

* If this method is called, it must be called before connect * is called. Otherwise it will have no effect. * * @param proxyHost * The proxy that will handle the request, or null * to not use a proxy. * * @param proxyPort * The port on the proxy, for the proxy request. Ignored if * proxyHost is null. */ public void setProxy(String proxyHost, int proxyPort) { this.proxyHost = proxyHost; if (proxyPort > 0) { this.proxyPort = proxyPort; } else { this.proxyPort = pool.getDefaultPort(); } } /** * Sets a request header in the HTTP request that will be issued. In * order to do fancier things like appending a value to an existing * request header, the user may directly access the * requestHeaders variable. *

* If this method is called, it must be called before connect * is called. Otherwise it will have no effect. * * @param key * The header name. * * @param value * The value for the request header. * * @see #requestHeaders */ public void setRequestHeader(String key, String value) { requestHeaders.put(key, value); } /** * Gets an output stream that can be used for uploading data to the * host. *

* If this method is called, it must be called before connect * is called. Otherwise it will have no effect. *

* Currently the implementation is not as good as it could be. The * user should avoid uploading huge amounts of data, for some definition * of huge. */ public OutputStream getOutputStream() throws IOException { if (postData == null) { postData = new ByteArrayOutputStream(); } return postData; } /** * Connect to the target host (or proxy), send the request, and read the * response headers. Any setup routines must be called before the call * to this method, and routines to examine the result must be called after * this method. *

* * @throws UnknownHostException * if the target host (or proxy) could not be contacted. * * @throws IOException * if there is a problem writing the HTTP request or reading * the HTTP response headers. */ public void connect() throws UnknownHostException, IOException { Timer watchdog=null; if (connected) { return; } connected = true; prepareHeaders(); openSocket(true); if (requestTimeout > 0) { watchdog = new Timer(requestTimeout, in); watchdog.start(); } try { try { sendRequest(); readStatusLine(); } catch (IOException e) { if (hs.firstTime) { throw e; } closeSocket(false); openSocket(false); sendRequest(); readStatusLine(); } responseHeaders.read(in); if (watchdog != null) { watchdog.cancel(); watchdog.interrupt(); } if (displayHeaders) { System.err.println(status); responseHeaders.print(System.err); System.err.println(); } } catch (IOException e) { closeSocket(false); if (watchdog!= null && watchdog.timedOut()) { throw new TimeoutException("waiting for server response"); } else { throw e; } } parseResponse(); } /** * Set the timeout for getting a remote response. * If the origin server hasn't responded with at least the * response headers in this time, terminate the request. * The timeout may be up until a call to connect(). * A value of '0' turns off the timeout. *

* If a timeout occurs, a TimeoutException is thrown. * * @param sec timeout, in seconds. */ public void setTimeout(int sec) { requestTimeout=sec; } void prepareHeaders() { if (postData != null) { if (method == null) { method = "POST"; } setRequestHeader("Content-Length", Integer.toString(postData.size())); } if (method == null) { method = "GET"; } if (proxyHost == null) { uri = url.getFile(); connectionHeader = "Connection"; } else { uri = url.toString(); connectionHeader = "Proxy-Connection"; } requestHeaders.putIfNotPresent(connectionHeader, "Keep-Alive"); requestHeaders.putIfNotPresent("Host", host + ":" + port); } void openSocket(boolean reuse) throws IOException { String targetHost; int targetPort; if (proxyHost != null) { targetHost = proxyHost; targetPort = proxyPort; } else { targetHost = host; targetPort = port; } hs = pool.get(targetHost, targetPort, reuse); under = hs.in; in = new HttpInputStream(under); } void closeSocket(boolean reuse) { if (hs != null) { HttpSocket tmp = hs; hs = null; keepAlive &= reuse; /* * Before we can reuse a keep-alive socket, we must first drain * the input stream if there is any data left in it. The soft * 'eof' flag will have been set if we have already read all the * data that we're supposed to read and the socket is ready to be * recycled now. */ if (keepAlive && !eof) { new BackgroundCloser(tmp, under, DRAIN_TIMEOUT).start(); } else { pool.close(tmp, keepAlive); } } } class BackgroundCloser extends Thread { HttpSocket hs; InputStream in; int timeout; Killer killer; BackgroundCloser(HttpSocket hs, InputStream in, int timeout) { this.hs = hs; this.in = in; this.timeout = timeout; } public void start() { killer = new Killer(this); killer.start(); super.start(); } public void run() { try { byte[] buf = new byte[4096]; while (true) { if (in.read(buf, 0, buf.length) < 0) { break; } } } catch (IOException e) { keepAlive = false; } pool.close(hs, keepAlive); killer.interrupt(); } } static class Killer extends Thread { BackgroundCloser b; int timeout; Killer(BackgroundCloser b) { this.b = b; } public void run() { try { Thread.sleep(b.timeout); b.interrupt(); } catch (Exception e) {} } } void sendRequest() throws IOException { if (displayHeaders) { System.err.print(method + " " + uri + " " + version + "\r\n"); requestHeaders.print(System.err); System.err.print("\r\n"); } PrintStream p = new PrintStream(hs.out); p.print(method + " " + uri + " " + version + "\r\n"); requestHeaders.print(p); p.print("\r\n"); if (postData != null) { postData.writeTo(p); postData = null; // Release memory. } p.flush(); } void readStatusLine() throws IOException { while (true) { status = in.readLine(LINE_LIMIT); if (status == null) { throw new EOFException(); } if (status.startsWith("HTTP/1.1 100") || status.startsWith("HTTP/1.0 100")) { /* * Ignore the "100 Continue" response that some HTTP/1.1 * servers send. We can't depend upon it being sent, because * we might be talking to an HTTP/1.0 server or an HTTP/1.1 * server that doesn't send the "100 Continue" response, so * we can't use the response for any decision making, such as * not sending the post data. * * www.u-net.com sends "HTTP/1.0 100 Continue"! */ while (true) { status = in.readLine(); if ((status == null) || (status.length() == 0)) { break; } } } else if (status.startsWith("HTTP/1.")) { return; } else if (status.length() == 0) { // System.out.println(this + ": got a blank line"); } else if (status.length() == LINE_LIMIT) { throw new IOException("malformed server response"); } else if (hs.firstTime) { /* * Some servers don't send back any headers, even if they * accept a HTTP/1.0 or greater request! We have to push * back this line, so it can be re-read as the body. * Since this is coming back with no headers, the content * length will be unknown and so the socket will be closed. */ // System.out.println("receiving HTTP/0.9 response"); PushbackInputStream pin = new PushbackInputStream(hs.in, status.length() + 4); pin.unread('\n'); pin.unread('\r'); for (int i = status.length(); --i >= 0; ) { pin.unread(status.charAt(i)); } /* * And push back a blank line, so the user thinks it got to * the end of the headers */ pin.unread('\n'); pin.unread('\r'); status = "HTTP/1.0 200 OK"; hs.in = pin; under = pin; in = new HttpInputStream(under); break; } else { /* * If we see funny responses (missing headers, etc.) from a * socket that we've reused, then we probably got out of sync * with the remote host (e.g., didn't read enough from the * last response), and should abort this request. */ throw new IOException("malformed server response"); } } } void parseResponse() { String str; str = getResponseHeader(connectionHeader); if (str != null) { keepAlive = str.equalsIgnoreCase("Keep-Alive"); } else if (status.startsWith("HTTP/1.1")) { keepAlive = true; } else { keepAlive = false; } str = getResponseHeader("Transfer-Encoding"); if ((str != null) && str.equals("chunked")) { under = new UnchunkingInputStream(this); in = new RecycleInputStream(this, under); return; } int contentLength = getContentLength(); if (contentLength < 0) { /* * Some servers leave off the content length for return codes * known to require no content. */ if (status.indexOf("304") > 0 || status.indexOf("204") > 0) { responseHeaders.put("Content-Length", "0"); contentLength = 0; } } if ((contentLength == 0) || method.equals("HEAD")) { under = new NullInputStream(); in = new HttpInputStream(under); closeSocket(keepAlive); } else if (contentLength > 0) { under = new LimitInputStream(this, contentLength); in = new RecycleInputStream(this, under); } else { keepAlive = false; in = new RecycleInputStream(this, under); } } /** * Gets an input stream that can be used to read the body of the * HTTP response. Unlike the other convenience methods for accessing * the HTTP response, this one automatically connects to the * target host if not already connected. *

* The input stream that getInputStream provides * automatically hides the differences between "Content-Length", no * "Content-Length", and "chunked" for HTTP/1.0 and HTTP/1.1 responses. * In all cases, the user can simply read until reaching the end of the * input stream, which signifies that all the available data from this * request has been read. (If reading from a "chunked" source, the data * is automatically de-chunked as it is presented to the user. There is * no way to access the raw underlying stream that contains the HTTP/1.1 * chunking packets.) * * @throws IOException * if there is problem connecting to the target. * * @see #connect */ public HttpInputStream getInputStream() throws IOException { connect(); return in; } /** * Gracefully closes this HTTP request when user is done with it. *

* The user can either call this method or close on the * input stream obtained from the getInputStream * method -- the results are the same. *

* When all the response data is read from the input stream, the * input stream is automatically closed (recycled). If the user is * not going to read all the response data from input stream, the user * must call close to * release the resources associated with the open request. Otherwise * the program may consume all available sockets, waiting forever for * the user to finish reading. *

* Note that the input stream is automatically closed if the input * stream throws an exception while reading. *

* In order to interrupt a pending I/O operation in another thread * (for example, to stop a request that is taking too long), the user * should call disconnect or interrupt the blocked thread. * The user should not call close in this case because * close will not interrupt the pending I/O operation. *

* Closing the request multiple times is allowed. *

* In order to make sure that open sockets are not left lying around * the user should use code similar to the following: *

     * OutputStream out = ...
     * HttpRequest http = HttpRequest.getRequest("http://bob.com/index.html");
     * try {
     *     HttpInputStream in = http.getInputStream();
     *     in.copyTo(out);
     * } finally {
     *     // Copying to "out" could have failed.  Close "http" in case
     *     // not all the data has been read from it yet.
     *     http.close();
     * }
     * 
*/ public void close() { closeSocket(true); } /** * Interrupts this HTTP request. Can be used to halt an in-progress * HTTP request from another thread, by causing it to * throw an InterruptedIOException during the connect * or while reading from the input stream, depending upon what state * this HTTP request is in when it is disconnected. * * @see #close */ public void disconnect() { closeSocket(false); } /** * Gets the HTTP response status code. From responses like: *
     * HTTP/1.0 200 OK
     * HTTP/1.0 401 Unauthorized
     * 
* this method extracts the integers 200 and 401 * respectively. Returns -1 if the response status code * was malformed. *

* If this method is called, it must be called after connect * has been called. Otherwise the information is not yet available and * this method will return -1. *

* For advanced features, the user can directly access the * status variable. * * @return The integer status code from the HTTP response. * * @see #connect * @see #status */ public int getResponseCode() { try { int start = status.indexOf(' ') + 1; int end = status.indexOf(' ', start + 1); if (end < 0) { /* * Sometimes the status line has the status code but no * status phrase. */ end = status.length(); } return Integer.parseInt(status.substring(start, end)); } catch (Exception e) { return -1; } } /** * Gets the value associated with the given case-insensitive header name * from the HTTP response. *

* If this method is called, it must be called after connect * has been called. Otherwise the information is not available and * this method will return null. *

* For advanced features, such as enumerating over all response headers, * the user should directly access the responseHeaders * variable. * * @param key * The case-insensitive name of the response header. * * @return The value associated with the given name, or null * if there is no such header in the response. * * @see #connect * @see #responseHeaders */ public String getResponseHeader(String key) { return responseHeaders.get(key); } /** * Convenience method to get the "Content-Length" header from the * HTTP response. *

* If this method is called, it must be called after connect * has been called. Otherwise the information is not available and * this method will return -1. * * @return The content length specified in the response headers, or * -1 if the length was not specified or malformed * (not a number). * * @see #connect * @see #getResponseHeader */ public int getContentLength() { try { return Integer.parseInt(responseHeaders.get("Content-Length")); } catch (Exception e) { return -1; } } /** * Removes all the point-to-point (hop-by-hop) headers from * the given mime headers. * * @param headers * The mime headers to be modified. * * @param response * true to remove the point-to-point response * headers, false to remove the point-to-point * request headers. * * @see RFC 2068 */ public static void removePointToPointHeaders(MimeHeaders headers, boolean response) { headers.remove("Connection"); headers.remove("Proxy-Connection"); headers.remove("Keep-Alive"); headers.remove("Upgrade"); if (response == false) { headers.remove("Proxy-Authorization"); } else { headers.remove("Proxy-Authenticate"); headers.remove("Public"); headers.remove("Transfer-Encoding"); } } /** * Convenience method for adding request headers by looking them * up in a properties object. * @param tokens a white space delimited set of tokens that refer * to headers that will be added to the HTTP request. * @param props Keys of the form [token].name and * [token].value are used to lookup additional * HTTP headers to be added to the request. * @return The number of headers added to the request * @see #setRequestHeader */ public int addHeaders(String tokens, Properties props) { int count = 0; StringTokenizer st = new StringTokenizer(tokens); while (st.hasMoreTokens()) { String token = st.nextToken(); String name = props.getProperty(token + ".name"); String value = props.getProperty(token + ".value"); if (name!=null && value!=null) { setRequestHeader(name, value); count++; } } return count; } /** * Get the content as a string. Uses the character * encoding specified in "encoding", if specified, or the * encoding implied by the http headers. * @param encoding The ISO character encoding to use. * @return The content as a string. */ public String getContent(String encoding) throws IOException, UnsupportedEncodingException { HttpInputStream in = getInputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream(); in.copyTo(out); in.close(); String result = out.toString(encoding!=null ? encoding : getEncoding()); out.close(); return result; } /** * Return the content as a string. */ public String getContent() throws IOException, UnsupportedEncodingException { return getContent(null); } /** * Get the ISO character encoding (if any) associated with this * text stream, * or the default http charset if none found. * Response headers must be available. */ public String getEncoding() { return getEncoding(responseHeaders); } public static final String DEFAULT_CHARSET="ISO-8859-1"; static final Regexp encExp = new Regexp("^text/.*;[ \t]*charset=([^ \t;]*)",true); public static String getEncoding(MimeHeaders headers) { String type = headers.get("content-type"); if (type == null) { return DEFAULT_CHARSET; } else { type = type.trim(); } String subs[] = new String[2]; if (encExp.match(type, subs)) { return subs[1]; } else { return DEFAULT_CHARSET; } } /** * Grab http document(s) and save them in the filesystem. * This is a simple batch HTTP url fetcher. Usage: *

     * java ... sunlabs.brazil.request.HttpRequest [-v(erbose)] [-h(headers)] [-p] url...
     * 
*
*
-v
Verbose. Print the target URL and destination file on stderr *
-h
Print all the HTTP headers on stderr *
-phttp://proxyhost:port
The following url's are to be fetched * via a proxy. *
* The options and url's may be given in any order. Use "-p" by itself * to disable the proxy for all following requests. *

* There are many limitations: only HTTP GET requests are supported, the * output filename is derived autmatically from the URL and can't be * overridden, if a destination file already exists, it is overwritten. */ public static void main(String[] args) throws Exception { String proxyHost = null; int proxyPort = 0; boolean isVerbose = false; if (args.length == 0) { System.err.println("Usage: [-v(erbose) -h(headers) -p] url..."); System.exit(1); } for (int i=0; i 7) { URL url = new URL(arg.substring(1)); proxyHost = url.getHost(); proxyPort = url.getPort(); } else { proxyHost = null; } break; default: System.err.println("Invalid argument, ignored: -" + arg); } continue; } try { HttpRequest target = HttpRequest.getRequest(args[i]); String name = url2file(args[i]); if (isVerbose) { System.err.println("Fetching (" + args[i] + ") to (" + name + ")"); } target.setProxy(proxyHost, proxyPort); HttpInputStream in = target.getInputStream(); FileOutputStream out = new FileOutputStream(name); in.copyTo(out); in.close(); out.close(); } catch (IOException e) { System.err.println("Error fetching " + args[i] + ": " + e.getMessage()); } } } /** * Append a socket pool to the end of the list. * This pool is checked last for a matching URL */ public static void appendPool(HttpSocketPool pool) { socketPools.addElement(pool); } /** * Prepend a socket pool to the end of the list. * This pool is checked first for a matching URL */ public static void prependPool(HttpSocketPool pool) { socketPools.insertElementAt(pool, 0); } /* Invent a url from a file name. */ static String url2file(String url) throws IOException { return url.replace('/', '_'); } /** * This interface represents a cache of idle sockets. Once a request has * been handled, the now-idle socket can be remembered and reused later in * case another HTTP request is made to the same remote host. Currently, the * only instance of this interface is used by the HttpRequest * class. * * @author Colin Stevens (colin.stevens@sun.com) * @version %I% */ public interface HttpSocketPool { /** * Return true if this is one of the url's my pool can handle */ public boolean isMine(URL url); /** * Return the default port (this doesn't belong here) */ public int getDefaultPort(); /** * Returns an HttpSocket that can be used to communicate * with the specified port on the named host. *

* It is this method's responsibility to to fill in all the public * member variables of the HttpSocket before returning. *

* For each call to this method, there should eventually be a call to * close when the HttpSocket isn't needed * anymore. * * @param host * The host name. * @param port * The port number. * @param reuse * true to request that this pool attempt to * find and reuse an existing idle connection, * false * to request that this pool establish a new connection to * the named host. * * @return The HttpSocket. * * @throws IOException * if there is a problem connecting the specified port on * the named host. The IOExceptions (and * subclasses) that might be thrown depend upon how the * socket connection is established. See the socket * documentation for further details. Some subclasses that * might be thrown are as follows: * * @throws java.io.UnknownHostException * if the host name cannot be resolved. * * @throws java.io.ConnectionException * if the named host is not listening on "port". * * @throws java.io.InterruptedIOException * if the connection times out or this thread is * interrupted by Thread.interrupt. */ public HttpSocket get(String host, int port, boolean reuse) throws IOException; /** * Releases an HttpSocket to this pool when it is not * in use any more. *

* It is this method's responsibility to release resources used * by the HttpSocket, e.g. closing the underlying socket. *

* After calling this method, the user should not refer to the specified * HttpSocket any more. * * @param hs * The HttpSocket to release. * * @param reuse * true: the specified HttpSocket * should be put back into the idle pool, * false * if it should be released immediately. */ public void close(HttpSocket hs, boolean reuse); } public static class SimpleHttpSocketPool implements Runnable, HttpRequest.HttpSocketPool { public int maxIdle; // size of the socket pool public int maxAge; // max age of idle socket (mseconds) public int reapInterval; // interval (in msec) to run reaper thread // pool of idle connections Vector idle = new Vector(); /** * Start the background thread that removes old connections */ Thread reaper; /** * Create a socket pool. A socket pool is responsible for * providing the underlying "socket" like transport for the * http protocol, and managing the lifetimes of the connections. * * @param maxIdle (ms) Max number of idle connections * @param maxAge (ms) Max idle time for a connection * @param reapInterval (ms) How often to check for expired sockets */ public SimpleHttpSocketPool(int maxIdle, int maxAge, int reapInterval) { this.maxIdle = maxIdle; this.maxAge = maxAge; this.reapInterval = reapInterval; reaper = new Thread(this); reaper.setDaemon(true); reaper.start(); } /** * Use the default pool settings */ public SimpleHttpSocketPool() { this(20, 30000, 10000); } public boolean isMine(URL url) { return url.getProtocol().equals("http"); } public int getDefaultPort() { return 80; } /** * Get a potentially "pooled" target object. * Call this instead of the constructor to use the pool. * @param host the target content server (or web proxy) * @param port target web server port */ public HttpSocket get(String host, int port, boolean reuse) throws IOException, UnknownHostException { host = host.toLowerCase(); if (reuse) { synchronized (idle) { /* * Start at end to reuse the most recent socket, which is * hopefully the most likely to still be alive. */ int i = idle.size(); while (--i >= 0) { HttpSocket hs = (HttpSocket) idle.elementAt(i); // System.err.println("Checking pool item : " + hs); if (hs.host.equals(host) && (hs.port == port)) { idle.removeElementAt(i); // System.err.println(" reusing: " + hs); hs.timesUsed++; return hs; } } } } Socket s = getSocket(host, port); HttpSocket hs = new HttpSocket(host, port, s); // System.err.println(" new: " + hs); return hs; } /** * Subclasses can override this */ protected Socket getSocket(String host, int port) throws IOException { return new Socket(host, port); } public void close(HttpSocket hs, boolean reuse) { if (reuse) { // System.err.println(" recycling: " + hs); synchronized (idle) { if (idle.size() >= maxIdle) { HttpSocket bump = (HttpSocket) idle.firstElement(); idle.removeElementAt(0); bump.close(); } hs.firstTime = false; hs.lastUsed = System.currentTimeMillis(); idle.addElement(hs); } } else { // System.err.println(" closing: " + hs); hs.close(); } } int lastSize = -1; public void run() { while(true) { try { Thread.sleep(reapInterval); } catch (InterruptedException e) { continue; } /* * Expire after age seconds. * Start looking at the front - the "oldest" sockets first. */ long expired = System.currentTimeMillis() - maxAge; // System.out.println("Reaper..."); synchronized (idle) { while (idle.size() > 0) { HttpSocket hs = (HttpSocket) idle.firstElement(); if (hs.lastUsed >= expired) { break; } idle.removeElementAt(0); // System.out.println("Reaping: " + hs); hs.close(); } } } } public String toString() { StringBuffer sb = new StringBuffer("SimpleHttpSocketPool "); synchronized (idle) { if (idle == null) { return "(null)"; } for (int i = 0; i < idle.size(); i++) { HttpSocket hs = (HttpSocket) idle.elementAt(i); sb.append(hs.toString() + ", "); } } return sb.toString(); } } /** * This class is used as the bag of information kept about a open, idle * socket. It is Used by the SimpleSocketPool. * a better place for it is found. */ public static class HttpSocket { String host; int port; Socket sock; boolean firstTime = true; long lastUsed; int timesUsed = 1; InputStream in; OutputStream out; private static int count = 0; private int serial; public HttpSocket(String host, int port, Socket sock) throws IOException, UnknownHostException { this.sock = sock; this.host = host; this.port = port; in = new BufferedInputStream(sock.getInputStream()); out = new BufferedOutputStream(sock.getOutputStream()); serial = count++; } public void close() { in = null; out = null; if (sock != null) { try { sock.close(); } catch (IOException e) {} } sock = null; } public String toString() { return host + ":" + port + " serial: " + serial + " used: " + timesUsed + " (" + sock + ")"; } } /** * Timeout occured waiting for a socket response */ public static class TimeoutException extends IOException { public TimeoutException(String msg) { super(msg); } } } class RecycleInputStream extends HttpInputStream { HttpRequest target; boolean closed; public RecycleInputStream(HttpRequest target, InputStream in) { super(in); this.target = target; } /** * Reads from the underlying input stream, which might be a raw * input stream, a limit input stream, or an unchunking input stream. * If we get EOF or there is an error reading, close the socket. */ public int read() throws IOException { if (closed) { return -1; } try { int ch = in.read(); if (ch < 0) { close(false); } return ch; } catch (IOException e) { close(false); throw e; } } public int read(byte[] buf, int off, int len) throws IOException { if (closed) { return -1; } try { int count = in.read(buf, off, len); if (count < 0) { close(false); } return count; } catch (IOException e) { close(false); throw e; } } private void close(boolean reuse) { if (closed == false) { closed = true; target.closeSocket(reuse); } } public void close() { close(true); } } class NullInputStream extends InputStream { public int read() { return -1; } public int read(char[] buf, int off, int len) { return -1; } } class LimitInputStream extends HttpInputStream { HttpRequest target; int limit; public LimitInputStream(HttpRequest target, int limit) { super(target.hs.in); this.target = target; this.limit = limit; } public int read() throws IOException { if (limit <= 0) { return -1; } int ch = in.read(); if ((ch >= 0) && (--limit <= 0)) { target.eof = true; target.closeSocket(true); } return ch; } public int read(byte[] buf, int off, int len) throws IOException { if (limit <= 0) { return -1; } len = Math.min(len, limit); int count = in.read(buf, off, len); if (count < 0) { limit = 0; return -1; } limit -= count; if (limit <= 0) { target.eof = true; target.closeSocket(true); } return count; } } class UnchunkingInputStream extends HttpInputStream { HttpRequest target; boolean eof; int bytesLeft; public UnchunkingInputStream(HttpRequest target) { super(target.in); this.target = target; } public int read() throws IOException { if ((bytesLeft <= 0) && (getChunkSize() == false)) { return -1; } bytesLeft--; return in.read(); } public int read(byte[] buf, int off, int len) throws IOException { int total = 0; while (true) { if ((bytesLeft <= 0) && (getChunkSize() == false)) { break; } int count = super.read(buf, off, Math.min(bytesLeft, len)); total += count; off += count; bytesLeft -= count; len -= count; if ((len <= 0) || (available() == 0)) { break; } } return (total == 0) ? -1 : total; } private boolean getChunkSize() throws IOException { if (eof) { return false; } /* * Although HTTP/1.1 chunking spec says that there is one "\r\n" * between chunks, some servers (for example, maps.yahoo.com) * send more than one blank line between chunks. So, read and skip * all the blank lines seen between chunks. */ String line; do { // Sanity check: limit chars when expecting a chunk size. line = ((HttpInputStream) in).readLine(HttpRequest.LINE_LIMIT); } while ((line != null) && (line.length() == 0)); try { bytesLeft = Integer.parseInt(line.trim(), 16); } catch (Exception e) { throw new IOException("malformed chunk"); } if (bytesLeft == 0) { eof = true; target.responseTrailers = new MimeHeaders((HttpInputStream) in); target.eof = true; target.closeSocket(true); return false; } return true; } } class Timer extends Thread { int timeout; // input stream to close after timeout InputStream close; // The stream to close boolean timedOut=false; public Timer(int seconds, InputStream close) { this.close = close; this.timeout = seconds; System.out.println("Watchdog: " + seconds); } public void run() { try { sleep(timeout * 1000); } catch (InterruptedException e) {} {} if (close != null) { timedOut=true; try { System.out.println("Watchdog: kill"); close.close(); } catch (IOException e) {} } } public void cancel() { close = null; System.out.println("Watchdog: cancel"); } public boolean timedOut() { return timedOut; } }