/*
* ContentTemplate.java
*
* Brazil project web application toolkit,
* export version: 2.3
* Copyright (c) 1999-2009 Sun Microsystems, Inc.
*
* Sun Public License Notice
*
* The contents of this file are subject to the Sun Public License Version
* 1.0 (the "License"). You may not use this file except in compliance with
* the License. A copy of the License is included as the file "license.terms",
* and also available at http://www.sun.com/
*
* The Original Code is from:
* Brazil project web application toolkit release 2.3.
* The Initial Developer of the Original Code is: suhler.
* Portions created by suhler are Copyright (C) Sun Microsystems, Inc.
* All Rights Reserved.
*
* Contributor(s): cstevens, suhler.
*
* Version: 2.6
* Created by suhler on 99/06/28
* Last modified by suhler on 09/04/22 11:49:25
*
* Version Histories:
*
* 2.6 09/04/22-11:49:25 (suhler)
* allow for multiple link references to be captured
*
* 2.5 07/04/04-14:30:59 (suhler)
* gather up all externally referenced javascript sources
*
* 2.4 06/11/13-10:44:43 (suhler)
* ignore script references to external files
*
* 2.3 06/01/06-13:50:59 (suhler)
* bug fix: inHead wasn't being reinitialized on every page.
* bug fix: The "prepend" attribute was being ignored for some properties
*
* 2.2 02/11/14-14:27:54 (suhler)
* handle empty bodies
*
* 2.1 02/10/01-16:36:44 (suhler)
* version change
*
* 1.21 02/07/24-10:46:25 (suhler)
* doc updates
*
* 1.20 02/01/29-14:29:35 (suhler)
* doc lint
*
* 1.19 01/09/13-09:23:54 (suhler)
* added "all" to extract the entire content
*
* 1.18 01/08/14-16:38:42 (suhler)
* doc lint
*
* 1.17 01/07/16-16:45:33 (suhler)
* add "prepend" option
*
* 1.16 00/12/27-12:22:42 (suhler)
* Fixed handling of content, script, and style.
* - multiple "content ... /content" pairs now work properly
*
* 1.15 00/12/11-20:24:23 (suhler)
* doc typo
*
* 1.14 00/12/11-13:30:00 (suhler)
* add class=props for automatic property extraction
*
* 1.13 00/12/04-08:45:34 (suhler)
* add satyle handling
*
* 1.12 00/10/31-10:19:25 (suhler)
* capture attributes of body tags
*
* 1.11 00/10/05-11:14:33 (suhler)
* extract http-eauiv tags to request headers
*
* 1.10 00/05/31-13:49:27 (suhler)
* name change
*
* 1.9 00/05/22-14:05:13 (suhler)
* doc updates
*
* 1.8 00/02/02-14:50:53 (suhler)
* removed debugging
*
* 1.7 99/12/07-10:55:39 (suhler)
* gather all javascript in the headers into a "script" property
*
* 1.6 99/10/18-10:26:01 (suhler)
* remove diagnostics
*
* 1.5 99/10/11-12:31:15 (suhler)
* copy some mime headers into the request properties
*
* 1.4 99/10/06-12:17:56 (suhler)
* bug fix
*
* 1.3 99/09/29-16:05:31 (cstevens)
* New HtmlRewriter object, that allows arbitrary rewriting of the HTML (by
* templates and others), instead of forcing the templates to return a string
* that contained all of the new HTML content in one big string.
*
* 1.2 99/09/01-12:16:02 (suhler)
* make url a request property
*
* 1.2 99/06/28-11:07:59 (Codemgr)
* SunPro Code Manager data about conflicts, renames, etc...
* Name history : 1 0 handlers/templates/ContentTemplate.java
*
* 1.1 99/06/28-11:07:58 (suhler)
* date and time created 99/06/28 11:07:58 by suhler
*
*/
package sunlabs.brazil.template;
import java.util.Dictionary;
/**
* Template class for extracting content out of remote html pages.
* This class is used by the TemplateHandler, for extracting
* the "content" out of html documents for later integration with
* a look-and-feel template using one or more of:
* {@link SetTemplate},
* {@link BSLTemplate},
* or
* {@link sunlabs.brazil.filter.ReplaceFilter},
*
* The plan is to snag the title and the content, and put them into
* request properties. The resultant processed output will be
* discarded. The following properties are gathered:
*
* - title
- The document title
*
- all
- The entire content
*
- bodyArgs
- The attributes to the body tag, if any
*
- content
- The body, delimited by content.../content>.
* The text inside multiple <content>
* ... </content> pairs
* are concatenated together.
*
- script
- All "<script>"..."</script>"
* tags found in the document head
*
- scriptSrcs
- A white-space delimited list of all "src"
attributes found in "script" tags.
*
- style
- All "<style">..."</style">
* tags found in the document head
*
- meta-[name]
- Every meta tag "name" and "content"
*
- link-[rel]
- Every link tag "rel" and "href". Mulitple tags with the same "rel"
* entriy are contatenated, using a space as the delimiter.
*
- user-agent
- The origin user agent
*
- referer
- The user agent referrer (if any)
*
- last-modified
- The document last modified time (if any) in std format
*
- content-length
- The document content length, as fetched from the origin server
*
* Properties:
*
* - prepend
- Prepend this string to the property names define above,
* that are populated by this template. (defaults to "").
*
*
* @author Stephen Uhler
* @version %V% 2.2
*/
public class ContentTemplate extends Template {
boolean inHead = true;
String prefix; // prefix all properties with this
public boolean
init(RewriteContext hr) {
inHead=true;
String all = hr.lex.rest();
if (all != null) {
prefix = hr.request.props.getProperty(hr.prefix + "prepend", "");
hr.request.props.put(prefix + "all", all);
}
return super.init(hr);
}
/**
* Toss everything up to and including this entity.
*/
public void
tag_title(RewriteContext hr) {
hr.reset();
}
/**
* Gather up the title - no tags allowed between title .... /title.
*/
public void
tag_slash_title(RewriteContext hr) {
hr.request.props.put(prefix + "title", hr.toString().trim());
hr.reset();
}
/**
* Append all "script" code while in the head section.
* If the script has a "src" attribute, we'll put the "src" in
* a variable so the template can deal with it (them?)
* For now, ignore it.
*/
public void
tag_script(RewriteContext hr) {
String src = hr.get("src");
if (src == null) {
do_tag(hr, "script");
} else {
String srcs = hr.request.props.getProperty(prefix + "scriptSrcs");
if (srcs != null) {
hr.request.props.put(prefix + "scriptSrcs", srcs + " " +src);
} else {
hr.request.props.put(prefix + "scriptSrcs", src);
}
}
}
/**
* Append all "style" code while in the head section.
*/
public void
tag_style(RewriteContext hr) {
do_tag(hr, "style");
}
void
do_tag(RewriteContext hr, String tag) {
if (inHead) {
boolean save = hr.accumulate(false);
hr.nextToken();
String current = hr.request.props.getProperty(prefix + tag,"") +
hr.getBody();
hr.request.props.put(prefix + tag, current);
hr.accumulate(save);
hr.reset();
}
}
/**
* Mark end of head section. All "script" content in the "body"
* is left alone.
*/
public void
tag_slash_head(RewriteContext hr) {
inHead = false;
}
/**
* toss everything up to and including here, but turn on
* content accumulation.
*/
public void
tag_content(RewriteContext hr) {
hr.reset();
hr.accumulate(true);
}
/**
* Grab the "body" attributes, and toss all output to this point.
*/
public void
tag_body(RewriteContext hr) {
inHead = false;
String bodyArgs = hr.getArgs();
if (bodyArgs != null) {
hr.request.props.put(prefix + "bodyArgs", bodyArgs);
}
hr.reset();
}
/**
* Save the content gathered so far, and turn off content accumulation.
*/
public void
tag_slash_content(RewriteContext hr) {
String content = hr.request.props.getProperty("content","") +
hr.toString();
hr.request.props.put(prefix + "content", content);
hr.accumulate(false);
}
/**
* If no content tags are present, use the entire "body" instead.
*/
public void
tag_slash_body(RewriteContext hr) {
if (!hr.request.props.containsKey("content")) {
hr.request.props.put(prefix + "content", hr.toString());
hr.accumulate(false);
}
}
/**
* Extract data out of meta tags into the properties.
* For "http-equiv" tags, set the corrosponding http respones header.
*/
public void
tag_meta(RewriteContext hr) {
String name = hr.get("name", false);
String equiv = hr.get("http-equiv", false);
String content = hr.get("content", false);
if ((name != null) && (content != null)) {
hr.request.props.put(prefix + "meta-" + name, content);
} else if ((equiv != null) && (content != null)) {
hr.request.addHeader(equiv, content);
}
}
/**
* Extract data out of link tags into the properties.
* Prefix the "rel" attribute with "link-" to use as the
* property name. [XXX fix me].
*/
public void
tag_link(RewriteContext hr) {
String type = hr.get("rel", false);
String href = hr.get("href", false);
if ((type != null) && (href != null)) {
String old = hr.request.props.getProperty(prefix + "link-" + type);
if (old == null) {
hr.request.props.put(prefix + "link-" + type, href);
} else {
hr.request.props.put(prefix + "link-" + type, old + " " + href);
}
}
}
/**
* Extract useful properties out of the http mime headers.
*/
public boolean
done(RewriteContext hr) {
tag_slash_body(hr);
transfer("user-agent", hr.request.headers, hr.request.props);
transfer("referer", hr.request.headers, hr.request.props);
transfer("last-modified", hr.request.responseHeaders, hr.request.props);
transfer("content-length", hr.request.responseHeaders,hr.request.props);
return true;
}
/**
* Transfer an item to another hash table, if it exists
*/
private boolean
transfer (String key, Dictionary src, Dictionary dst) {
Object obj = src.get(key);
if (obj != null) {
dst.put(prefix + key, obj);
return true;
} else {
return false;
}
}
}